diff options
author | Jack Steiner <steiner@sgi.com> | 2011-05-09 12:35:19 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-10 03:26:55 -0400 |
commit | 1d44e8288a0557c28c447d7e511f50d06ff93a34 (patch) | |
tree | fbda66cbc2a229e236a6fa439b57b233b6397d97 /arch/x86/kernel/apic/x2apic_uv_x.c | |
parent | 693d92a1bbc9e42681c42ed190bd42b636ca876f (diff) |
x86, UV: Fix NMI handler for UV platforms
This fixes problems seen on UV systems handling NMIs from the
node controller.
I isolated the "dazed..." messages that I saw earlier to a bug in
the BMC on our platform. It was sending NMIs w/o properly setting
a register that indicated the source of NMI.
So rather than _assuming_ any unhandled NMI came from the UV system
maintenance console (SMC), add a check to verify that the SMC actually
sent the NMI.
Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: gorcunov@gmail.com
Cc: dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/apic/x2apic_uv_x.c')
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 48 |
1 files changed, 43 insertions, 5 deletions
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0fc095..7acd2d2ac965 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -37,6 +37,13 @@ | |||
37 | #include <asm/smp.h> | 37 | #include <asm/smp.h> |
38 | #include <asm/x86_init.h> | 38 | #include <asm/x86_init.h> |
39 | #include <asm/emergency-restart.h> | 39 | #include <asm/emergency-restart.h> |
40 | #include <asm/nmi.h> | ||
41 | |||
42 | /* BMC sets a bit this MMR non-zero before sending an NMI */ | ||
43 | #define UVH_NMI_MMR UVH_SCRATCH5 | ||
44 | #define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) | ||
45 | #define UV_NMI_PENDING_MASK (1UL << 63) | ||
46 | DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); | ||
40 | 47 | ||
41 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 48 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
42 | 49 | ||
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) | |||
642 | */ | 649 | */ |
643 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 650 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) |
644 | { | 651 | { |
652 | unsigned long real_uv_nmi; | ||
653 | int bid; | ||
654 | |||
645 | if (reason != DIE_NMIUNKNOWN) | 655 | if (reason != DIE_NMIUNKNOWN) |
646 | return NOTIFY_OK; | 656 | return NOTIFY_OK; |
647 | 657 | ||
648 | if (in_crash_kexec) | 658 | if (in_crash_kexec) |
649 | /* do nothing if entering the crash kernel */ | 659 | /* do nothing if entering the crash kernel */ |
650 | return NOTIFY_OK; | 660 | return NOTIFY_OK; |
661 | |||
651 | /* | 662 | /* |
652 | * Use a lock so only one cpu prints at a time | 663 | * Each blade has an MMR that indicates when an NMI has been sent |
653 | * to prevent intermixed output. | 664 | * to cpus on the blade. If an NMI is detected, atomically |
665 | * clear the MMR and update a per-blade NMI count used to | ||
666 | * cause each cpu on the blade to notice a new NMI. | ||
667 | */ | ||
668 | bid = uv_numa_blade_id(); | ||
669 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
670 | |||
671 | if (unlikely(real_uv_nmi)) { | ||
672 | spin_lock(&uv_blade_info[bid].nmi_lock); | ||
673 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
674 | if (real_uv_nmi) { | ||
675 | uv_blade_info[bid].nmi_count++; | ||
676 | uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); | ||
677 | } | ||
678 | spin_unlock(&uv_blade_info[bid].nmi_lock); | ||
679 | } | ||
680 | |||
681 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) | ||
682 | return NOTIFY_DONE; | ||
683 | |||
684 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; | ||
685 | |||
686 | /* | ||
687 | * Use a lock so only one cpu prints at a time. | ||
688 | * This prevents intermixed output. | ||
654 | */ | 689 | */ |
655 | spin_lock(&uv_nmi_lock); | 690 | spin_lock(&uv_nmi_lock); |
656 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | 691 | pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); |
657 | dump_stack(); | 692 | dump_stack(); |
658 | spin_unlock(&uv_nmi_lock); | 693 | spin_unlock(&uv_nmi_lock); |
659 | 694 | ||
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
661 | } | 696 | } |
662 | 697 | ||
663 | static struct notifier_block uv_dump_stack_nmi_nb = { | 698 | static struct notifier_block uv_dump_stack_nmi_nb = { |
664 | .notifier_call = uv_handle_nmi | 699 | .notifier_call = uv_handle_nmi, |
700 | .priority = NMI_LOCAL_LOW_PRIOR - 1, | ||
665 | }; | 701 | }; |
666 | 702 | ||
667 | void uv_register_nmi_notifier(void) | 703 | void uv_register_nmi_notifier(void) |
@@ -720,8 +756,9 @@ void __init uv_system_init(void) | |||
720 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 756 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); |
721 | 757 | ||
722 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 758 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
723 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 759 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
724 | BUG_ON(!uv_blade_info); | 760 | BUG_ON(!uv_blade_info); |
761 | |||
725 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | 762 | for (blade = 0; blade < uv_num_possible_blades(); blade++) |
726 | uv_blade_info[blade].memory_nid = -1; | 763 | uv_blade_info[blade].memory_nid = -1; |
727 | 764 | ||
@@ -747,6 +784,7 @@ void __init uv_system_init(void) | |||
747 | uv_blade_info[blade].pnode = pnode; | 784 | uv_blade_info[blade].pnode = pnode; |
748 | uv_blade_info[blade].nr_possible_cpus = 0; | 785 | uv_blade_info[blade].nr_possible_cpus = 0; |
749 | uv_blade_info[blade].nr_online_cpus = 0; | 786 | uv_blade_info[blade].nr_online_cpus = 0; |
787 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | ||
750 | max_pnode = max(pnode, max_pnode); | 788 | max_pnode = max(pnode, max_pnode); |
751 | blade++; | 789 | blade++; |
752 | } | 790 | } |