diff options
author | Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | 2006-09-26 17:44:37 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2006-09-26 17:44:37 -0400 |
commit | 43ed3baf623410b3fa6ca14a9d3f6deca3493c56 (patch) | |
tree | b086b18adff2af6b2633e239e9d1b26d764ae333 /arch/ia64/kernel/mca_drv.c | |
parent | 816add4e986499145135c4014a7c8a8857f9f3c3 (diff) |
[IA64] printing support for MCA/INIT
Printing message to console from MCA/INIT handler is useful,
however doing oops_in_progress = 1 in them exactly makes
something in kernel wrong. Especially it sounds ugly if
system goes wrong after returning from recoverable MCA.
This patch adds ia64_mca_printk() function that collects
messages into temporary-not-so-large message buffer during
in MCA/INIT environment and print them out later, after
returning to normal context or when handlers determine to
down the system.
Also this print function is exported for use in extensional
MCA handler. It would be useful to describe detail about
recovery.
NOTE:
I don't think it is sane thing if temporary message buffer
is enlarged enough to hold whole stack dumps from INIT, so
buffering is disabled during stack dump from INIT-monarch
(= default_monarch_init_process). please fix it in future.
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca_drv.c')
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 54 |
1 files changed, 36 insertions, 18 deletions
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 8db6e0cedadc..a45009d2bc90 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c | |||
@@ -79,14 +79,30 @@ static int | |||
79 | fatal_mca(const char *fmt, ...) | 79 | fatal_mca(const char *fmt, ...) |
80 | { | 80 | { |
81 | va_list args; | 81 | va_list args; |
82 | char buf[256]; | ||
82 | 83 | ||
83 | va_start(args, fmt); | 84 | va_start(args, fmt); |
84 | vprintk(fmt, args); | 85 | vsnprintf(buf, sizeof(buf), fmt, args); |
85 | va_end(args); | 86 | va_end(args); |
87 | ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); | ||
86 | 88 | ||
87 | return MCA_NOT_RECOVERED; | 89 | return MCA_NOT_RECOVERED; |
88 | } | 90 | } |
89 | 91 | ||
92 | static int | ||
93 | mca_recovered(const char *fmt, ...) | ||
94 | { | ||
95 | va_list args; | ||
96 | char buf[256]; | ||
97 | |||
98 | va_start(args, fmt); | ||
99 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
100 | va_end(args); | ||
101 | ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); | ||
102 | |||
103 | return MCA_RECOVERED; | ||
104 | } | ||
105 | |||
90 | /** | 106 | /** |
91 | * mca_page_isolate - isolate a poisoned page in order not to use it later | 107 | * mca_page_isolate - isolate a poisoned page in order not to use it later |
92 | * @paddr: poisoned memory location | 108 | * @paddr: poisoned memory location |
@@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr) | |||
140 | void | 156 | void |
141 | mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) | 157 | mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) |
142 | { | 158 | { |
159 | ia64_mlogbuf_dump(); | ||
143 | printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " | 160 | printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " |
144 | "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", | 161 | "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", |
145 | raw_smp_processor_id(), current->pid, current->uid, | 162 | raw_smp_processor_id(), current->pid, current->uid, |
@@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx, | |||
440 | 457 | ||
441 | /* Is target address valid? */ | 458 | /* Is target address valid? */ |
442 | if (!pbci->tv) | 459 | if (!pbci->tv) |
443 | return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); | 460 | return fatal_mca("target address not valid"); |
444 | 461 | ||
445 | /* | 462 | /* |
446 | * cpu read or memory-mapped io read | 463 | * cpu read or memory-mapped io read |
@@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx, | |||
458 | 475 | ||
459 | /* Is minstate valid? */ | 476 | /* Is minstate valid? */ |
460 | if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) | 477 | if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) |
461 | return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); | 478 | return fatal_mca("minstate not valid"); |
462 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); | 479 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); |
463 | psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); | 480 | psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); |
464 | 481 | ||
@@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx, | |||
492 | psr2->bn = 1; | 509 | psr2->bn = 1; |
493 | psr2->i = 0; | 510 | psr2->i = 0; |
494 | 511 | ||
495 | return MCA_RECOVERED; | 512 | return mca_recovered("user memory corruption. " |
513 | "kill affected process - recovered."); | ||
496 | } | 514 | } |
497 | 515 | ||
498 | } | 516 | } |
499 | 517 | ||
500 | return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," | 518 | return fatal_mca("kernel context not recovered, iip 0x%lx\n", |
501 | " iip 0x%lx\n", pmsa->pmsa_iip); | 519 | pmsa->pmsa_iip); |
502 | } | 520 | } |
503 | 521 | ||
504 | /** | 522 | /** |
@@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
584 | * The machine check is corrected. | 602 | * The machine check is corrected. |
585 | */ | 603 | */ |
586 | if (psp->cm == 1) | 604 | if (psp->cm == 1) |
587 | return MCA_RECOVERED; | 605 | return mca_recovered("machine check is already corrected."); |
588 | 606 | ||
589 | /* | 607 | /* |
590 | * The error was not contained. Software must be reset. | 608 | * The error was not contained. Software must be reset. |
591 | */ | 609 | */ |
592 | if (psp->us || psp->ci == 0) | 610 | if (psp->us || psp->ci == 0) |
593 | return fatal_mca(KERN_ALERT "MCA: error not contained\n"); | 611 | return fatal_mca("error not contained"); |
594 | 612 | ||
595 | /* | 613 | /* |
596 | * The cache check and bus check bits have four possible states | 614 | * The cache check and bus check bits have four possible states |
@@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
601 | * 1 1 Memory error, attempt recovery | 619 | * 1 1 Memory error, attempt recovery |
602 | */ | 620 | */ |
603 | if (psp->bc == 0 || pbci == NULL) | 621 | if (psp->bc == 0 || pbci == NULL) |
604 | return fatal_mca(KERN_ALERT "MCA: No bus check\n"); | 622 | return fatal_mca("No bus check"); |
605 | 623 | ||
606 | /* | 624 | /* |
607 | * Sorry, we cannot handle so many. | 625 | * Sorry, we cannot handle so many. |
608 | */ | 626 | */ |
609 | if (peidx_bus_check_num(peidx) > 1) | 627 | if (peidx_bus_check_num(peidx) > 1) |
610 | return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); | 628 | return fatal_mca("Too many bus checks"); |
611 | /* | 629 | /* |
612 | * Well, here is only one bus error. | 630 | * Well, here is only one bus error. |
613 | */ | 631 | */ |
614 | if (pbci->ib) | 632 | if (pbci->ib) |
615 | return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); | 633 | return fatal_mca("Internal Bus error"); |
616 | if (pbci->cc) | 634 | if (pbci->cc) |
617 | return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); | 635 | return fatal_mca("Cache-cache error"); |
618 | if (pbci->eb && pbci->bsi > 0) | 636 | if (pbci->eb && pbci->bsi > 0) |
619 | return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); | 637 | return fatal_mca("External bus check fatal status"); |
620 | 638 | ||
621 | /* | 639 | /* |
622 | * This is a local MCA and estimated as recoverble external bus error. | 640 | * This is a local MCA and estimated as recoverble external bus error. |
@@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
628 | /* | 646 | /* |
629 | * On account of strange SAL error record, we cannot recover. | 647 | * On account of strange SAL error record, we cannot recover. |
630 | */ | 648 | */ |
631 | return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); | 649 | return fatal_mca("Strange SAL record"); |
632 | } | 650 | } |
633 | 651 | ||
634 | /** | 652 | /** |
@@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) | |||
657 | 675 | ||
658 | /* Now, OS can recover when there is one processor error section */ | 676 | /* Now, OS can recover when there is one processor error section */ |
659 | if (n_proc_err > 1) | 677 | if (n_proc_err > 1) |
660 | return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); | 678 | return fatal_mca("Too Many Errors"); |
661 | else if (n_proc_err == 0) | 679 | else if (n_proc_err == 0) |
662 | /* Weird SAL record ... We need not to recover */ | 680 | /* Weird SAL record ... We can't do anything */ |
663 | return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); | 681 | return fatal_mca("Weird SAL record"); |
664 | 682 | ||
665 | /* Make index of processor error section */ | 683 | /* Make index of processor error section */ |
666 | mca_make_peidx((sal_log_processor_info_t*) | 684 | mca_make_peidx((sal_log_processor_info_t*) |
@@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) | |||
671 | 689 | ||
672 | /* Check whether MCA is global or not */ | 690 | /* Check whether MCA is global or not */ |
673 | if (is_mca_global(&peidx, &pbci, sos)) | 691 | if (is_mca_global(&peidx, &pbci, sos)) |
674 | return fatal_mca(KERN_ALERT "MCA: global MCA\n"); | 692 | return fatal_mca("global MCA"); |
675 | 693 | ||
676 | /* Try to recover a processor error */ | 694 | /* Try to recover a processor error */ |
677 | return recover_from_processor_error(platform_err, &slidx, &peidx, | 695 | return recover_from_processor_error(platform_err, &slidx, &peidx, |