diff options
author | Jack Steiner <steiner@sgi.com> | 2011-05-09 12:35:19 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-10 03:26:55 -0400 |
commit | 1d44e8288a0557c28c447d7e511f50d06ff93a34 (patch) | |
tree | fbda66cbc2a229e236a6fa439b57b233b6397d97 | |
parent | 693d92a1bbc9e42681c42ed190bd42b636ca876f (diff) |
x86, UV: Fix NMI handler for UV platforms
This fixes problems seen on UV systems handling NMIs from the
node controller.
I isolated the "dazed..." messages that I saw earlier to a bug in
the BMC on our platform. It was sending NMIs w/o properly setting
a register that indicated the source of NMI.
So rather than _assuming_ any unhandled NMI came from the UV system
maintenance console (SMC), add a check to verify that the SMC actually
sent the NMI.
Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: gorcunov@gmail.com
Cc: dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/uv/uv_hub.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/uv/uv_mmrs.h | 16 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 48 |
3 files changed, 60 insertions, 6 deletions
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741c233..4298002d0c8 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -398,6 +398,8 @@ struct uv_blade_info { | |||
398 | unsigned short nr_online_cpus; | 398 | unsigned short nr_online_cpus; |
399 | unsigned short pnode; | 399 | unsigned short pnode; |
400 | short memory_nid; | 400 | short memory_nid; |
401 | spinlock_t nmi_lock; | ||
402 | unsigned long nmi_count; | ||
401 | }; | 403 | }; |
402 | extern struct uv_blade_info *uv_blade_info; | 404 | extern struct uv_blade_info *uv_blade_info; |
403 | extern short *uv_node_to_blade; | 405 | extern short *uv_node_to_blade; |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafeac745..f5bb64a823d 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV MMR definitions | 6 | * SGI UV MMR definitions |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _ASM_X86_UV_UV_MMRS_H | 11 | #ifndef _ASM_X86_UV_UV_MMRS_H |
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { | |||
1099 | } s; | 1099 | } s; |
1100 | }; | 1100 | }; |
1101 | 1101 | ||
1102 | /* ========================================================================= */ | ||
1103 | /* UVH_SCRATCH5 */ | ||
1104 | /* ========================================================================= */ | ||
1105 | #define UVH_SCRATCH5 0x2d0200UL | ||
1106 | #define UVH_SCRATCH5_32 0x00778 | ||
1107 | |||
1108 | #define UVH_SCRATCH5_SCRATCH5_SHFT 0 | ||
1109 | #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL | ||
1110 | union uvh_scratch5_u { | ||
1111 | unsigned long v; | ||
1112 | struct uvh_scratch5_s { | ||
1113 | unsigned long scratch5 : 64; /* RW, W1CS */ | ||
1114 | } s; | ||
1115 | }; | ||
1102 | 1116 | ||
1103 | #endif /* __ASM_UV_MMRS_X86_H__ */ | 1117 | #endif /* __ASM_UV_MMRS_X86_H__ */ |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0fc09..7acd2d2ac96 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -37,6 +37,13 @@ | |||
37 | #include <asm/smp.h> | 37 | #include <asm/smp.h> |
38 | #include <asm/x86_init.h> | 38 | #include <asm/x86_init.h> |
39 | #include <asm/emergency-restart.h> | 39 | #include <asm/emergency-restart.h> |
40 | #include <asm/nmi.h> | ||
41 | |||
42 | /* BMC sets a bit this MMR non-zero before sending an NMI */ | ||
43 | #define UVH_NMI_MMR UVH_SCRATCH5 | ||
44 | #define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) | ||
45 | #define UV_NMI_PENDING_MASK (1UL << 63) | ||
46 | DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); | ||
40 | 47 | ||
41 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 48 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
42 | 49 | ||
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) | |||
642 | */ | 649 | */ |
643 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 650 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) |
644 | { | 651 | { |
652 | unsigned long real_uv_nmi; | ||
653 | int bid; | ||
654 | |||
645 | if (reason != DIE_NMIUNKNOWN) | 655 | if (reason != DIE_NMIUNKNOWN) |
646 | return NOTIFY_OK; | 656 | return NOTIFY_OK; |
647 | 657 | ||
648 | if (in_crash_kexec) | 658 | if (in_crash_kexec) |
649 | /* do nothing if entering the crash kernel */ | 659 | /* do nothing if entering the crash kernel */ |
650 | return NOTIFY_OK; | 660 | return NOTIFY_OK; |
661 | |||
651 | /* | 662 | /* |
652 | * Use a lock so only one cpu prints at a time | 663 | * Each blade has an MMR that indicates when an NMI has been sent |
653 | * to prevent intermixed output. | 664 | * to cpus on the blade. If an NMI is detected, atomically |
665 | * clear the MMR and update a per-blade NMI count used to | ||
666 | * cause each cpu on the blade to notice a new NMI. | ||
667 | */ | ||
668 | bid = uv_numa_blade_id(); | ||
669 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
670 | |||
671 | if (unlikely(real_uv_nmi)) { | ||
672 | spin_lock(&uv_blade_info[bid].nmi_lock); | ||
673 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
674 | if (real_uv_nmi) { | ||
675 | uv_blade_info[bid].nmi_count++; | ||
676 | uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); | ||
677 | } | ||
678 | spin_unlock(&uv_blade_info[bid].nmi_lock); | ||
679 | } | ||
680 | |||
681 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) | ||
682 | return NOTIFY_DONE; | ||
683 | |||
684 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; | ||
685 | |||
686 | /* | ||
687 | * Use a lock so only one cpu prints at a time. | ||
688 | * This prevents intermixed output. | ||
654 | */ | 689 | */ |
655 | spin_lock(&uv_nmi_lock); | 690 | spin_lock(&uv_nmi_lock); |
656 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | 691 | pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); |
657 | dump_stack(); | 692 | dump_stack(); |
658 | spin_unlock(&uv_nmi_lock); | 693 | spin_unlock(&uv_nmi_lock); |
659 | 694 | ||
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
661 | } | 696 | } |
662 | 697 | ||
663 | static struct notifier_block uv_dump_stack_nmi_nb = { | 698 | static struct notifier_block uv_dump_stack_nmi_nb = { |
664 | .notifier_call = uv_handle_nmi | 699 | .notifier_call = uv_handle_nmi, |
700 | .priority = NMI_LOCAL_LOW_PRIOR - 1, | ||
665 | }; | 701 | }; |
666 | 702 | ||
667 | void uv_register_nmi_notifier(void) | 703 | void uv_register_nmi_notifier(void) |
@@ -720,8 +756,9 @@ void __init uv_system_init(void) | |||
720 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 756 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); |
721 | 757 | ||
722 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 758 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
723 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 759 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
724 | BUG_ON(!uv_blade_info); | 760 | BUG_ON(!uv_blade_info); |
761 | |||
725 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | 762 | for (blade = 0; blade < uv_num_possible_blades(); blade++) |
726 | uv_blade_info[blade].memory_nid = -1; | 763 | uv_blade_info[blade].memory_nid = -1; |
727 | 764 | ||
@@ -747,6 +784,7 @@ void __init uv_system_init(void) | |||
747 | uv_blade_info[blade].pnode = pnode; | 784 | uv_blade_info[blade].pnode = pnode; |
748 | uv_blade_info[blade].nr_possible_cpus = 0; | 785 | uv_blade_info[blade].nr_possible_cpus = 0; |
749 | uv_blade_info[blade].nr_online_cpus = 0; | 786 | uv_blade_info[blade].nr_online_cpus = 0; |
787 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | ||
750 | max_pnode = max(pnode, max_pnode); | 788 | max_pnode = max(pnode, max_pnode); |
751 | blade++; | 789 | blade++; |
752 | } | 790 | } |