aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/mca_drv.c
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2006-09-26 17:44:37 -0400
committerTony Luck <tony.luck@intel.com>2006-09-26 17:44:37 -0400
commit43ed3baf623410b3fa6ca14a9d3f6deca3493c56 (patch)
treeb086b18adff2af6b2633e239e9d1b26d764ae333 /arch/ia64/kernel/mca_drv.c
parent816add4e986499145135c4014a7c8a8857f9f3c3 (diff)
[IA64] printing support for MCA/INIT
Printing message to console from MCA/INIT handler is useful, however doing oops_in_progress = 1 in them exactly makes something in kernel wrong. Especially it sounds ugly if system goes wrong after returning from recoverable MCA. This patch adds ia64_mca_printk() function that collects messages into temporary-not-so-large message buffer during in MCA/INIT environment and print them out later, after returning to normal context or when handlers determine to down the system. Also this print function is exported for use in extensional MCA handler. It would be useful to describe detail about recovery. NOTE: I don't think it is sane thing if temporary message buffer is enlarged enough to hold whole stack dumps from INIT, so buffering is disabled during stack dump from INIT-monarch (= default_monarch_init_process). please fix it in future. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Acked-by: Russ Anderson <rja@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca_drv.c')
-rw-r--r--arch/ia64/kernel/mca_drv.c54
1 files changed, 36 insertions, 18 deletions
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 8db6e0cedadc..a45009d2bc90 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -79,14 +79,30 @@ static int
79fatal_mca(const char *fmt, ...) 79fatal_mca(const char *fmt, ...)
80{ 80{
81 va_list args; 81 va_list args;
82 char buf[256];
82 83
83 va_start(args, fmt); 84 va_start(args, fmt);
84 vprintk(fmt, args); 85 vsnprintf(buf, sizeof(buf), fmt, args);
85 va_end(args); 86 va_end(args);
87 ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
86 88
87 return MCA_NOT_RECOVERED; 89 return MCA_NOT_RECOVERED;
88} 90}
89 91
92static int
93mca_recovered(const char *fmt, ...)
94{
95 va_list args;
96 char buf[256];
97
98 va_start(args, fmt);
99 vsnprintf(buf, sizeof(buf), fmt, args);
100 va_end(args);
101 ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
102
103 return MCA_RECOVERED;
104}
105
90/** 106/**
91 * mca_page_isolate - isolate a poisoned page in order not to use it later 107 * mca_page_isolate - isolate a poisoned page in order not to use it later
92 * @paddr: poisoned memory location 108 * @paddr: poisoned memory location
@@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr)
140void 156void
141mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) 157mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
142{ 158{
159 ia64_mlogbuf_dump();
143 printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " 160 printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
144 "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", 161 "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
145 raw_smp_processor_id(), current->pid, current->uid, 162 raw_smp_processor_id(), current->pid, current->uid,
@@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx,
440 457
441 /* Is target address valid? */ 458 /* Is target address valid? */
442 if (!pbci->tv) 459 if (!pbci->tv)
443 return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); 460 return fatal_mca("target address not valid");
444 461
445 /* 462 /*
446 * cpu read or memory-mapped io read 463 * cpu read or memory-mapped io read
@@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx,
458 475
459 /* Is minstate valid? */ 476 /* Is minstate valid? */
460 if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) 477 if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
461 return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); 478 return fatal_mca("minstate not valid");
462 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); 479 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
463 psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); 480 psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
464 481
@@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx,
492 psr2->bn = 1; 509 psr2->bn = 1;
493 psr2->i = 0; 510 psr2->i = 0;
494 511
495 return MCA_RECOVERED; 512 return mca_recovered("user memory corruption. "
513 "kill affected process - recovered.");
496 } 514 }
497 515
498 } 516 }
499 517
500 return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," 518 return fatal_mca("kernel context not recovered, iip 0x%lx\n",
501 " iip 0x%lx\n", pmsa->pmsa_iip); 519 pmsa->pmsa_iip);
502} 520}
503 521
504/** 522/**
@@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
584 * The machine check is corrected. 602 * The machine check is corrected.
585 */ 603 */
586 if (psp->cm == 1) 604 if (psp->cm == 1)
587 return MCA_RECOVERED; 605 return mca_recovered("machine check is already corrected.");
588 606
589 /* 607 /*
590 * The error was not contained. Software must be reset. 608 * The error was not contained. Software must be reset.
591 */ 609 */
592 if (psp->us || psp->ci == 0) 610 if (psp->us || psp->ci == 0)
593 return fatal_mca(KERN_ALERT "MCA: error not contained\n"); 611 return fatal_mca("error not contained");
594 612
595 /* 613 /*
596 * The cache check and bus check bits have four possible states 614 * The cache check and bus check bits have four possible states
@@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
601 * 1 1 Memory error, attempt recovery 619 * 1 1 Memory error, attempt recovery
602 */ 620 */
603 if (psp->bc == 0 || pbci == NULL) 621 if (psp->bc == 0 || pbci == NULL)
604 return fatal_mca(KERN_ALERT "MCA: No bus check\n"); 622 return fatal_mca("No bus check");
605 623
606 /* 624 /*
607 * Sorry, we cannot handle so many. 625 * Sorry, we cannot handle so many.
608 */ 626 */
609 if (peidx_bus_check_num(peidx) > 1) 627 if (peidx_bus_check_num(peidx) > 1)
610 return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); 628 return fatal_mca("Too many bus checks");
611 /* 629 /*
612 * Well, here is only one bus error. 630 * Well, here is only one bus error.
613 */ 631 */
614 if (pbci->ib) 632 if (pbci->ib)
615 return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); 633 return fatal_mca("Internal Bus error");
616 if (pbci->cc) 634 if (pbci->cc)
617 return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); 635 return fatal_mca("Cache-cache error");
618 if (pbci->eb && pbci->bsi > 0) 636 if (pbci->eb && pbci->bsi > 0)
619 return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); 637 return fatal_mca("External bus check fatal status");
620 638
621 /* 639 /*
622 * This is a local MCA and estimated as recoverble external bus error. 640 * This is a local MCA and estimated as recoverble external bus error.
@@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
628 /* 646 /*
629 * On account of strange SAL error record, we cannot recover. 647 * On account of strange SAL error record, we cannot recover.
630 */ 648 */
631 return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); 649 return fatal_mca("Strange SAL record");
632} 650}
633 651
634/** 652/**
@@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
657 675
658 /* Now, OS can recover when there is one processor error section */ 676 /* Now, OS can recover when there is one processor error section */
659 if (n_proc_err > 1) 677 if (n_proc_err > 1)
660 return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); 678 return fatal_mca("Too Many Errors");
661 else if (n_proc_err == 0) 679 else if (n_proc_err == 0)
662 /* Weird SAL record ... We need not to recover */ 680 /* Weird SAL record ... We can't do anything */
663 return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); 681 return fatal_mca("Weird SAL record");
664 682
665 /* Make index of processor error section */ 683 /* Make index of processor error section */
666 mca_make_peidx((sal_log_processor_info_t*) 684 mca_make_peidx((sal_log_processor_info_t*)
@@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
671 689
672 /* Check whether MCA is global or not */ 690 /* Check whether MCA is global or not */
673 if (is_mca_global(&peidx, &pbci, sos)) 691 if (is_mca_global(&peidx, &pbci, sos))
674 return fatal_mca(KERN_ALERT "MCA: global MCA\n"); 692 return fatal_mca("global MCA");
675 693
676 /* Try to recover a processor error */ 694 /* Try to recover a processor error */
677 return recover_from_processor_error(platform_err, &slidx, &peidx, 695 return recover_from_processor_error(platform_err, &slidx, &peidx,