aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2011-11-08 09:52:15 -0500
committerRobert Richter <robert.richter@amd.com>2011-11-08 09:52:15 -0500
commitde346b6949063aa040ef607943b072835294f4b3 (patch)
tree11f5a10b9ab41a10ea26bf8ab1f133b802e7559e /arch/x86
parentdcfce4a095932e6e95d83ad982be3609947963bc (diff)
parent9c48f1c629ecfa114850c03f875c6691003214de (diff)
Merge branch 'perf/core' into oprofile/master
Merge reason: Resolve conflicts with Don's NMI rework: commit 9c48f1c629ecfa114850c03f875c6691003214de Author: Don Zickus <dzickus@redhat.com> Date: Fri Sep 30 15:06:21 2011 -0400 x86, nmi: Wire up NMI handlers to new routines Conflicts: arch/x86/oprofile/nmi_timer_int.c Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/nmi.h36
-rw-r--r--arch/x86/include/asm/perf_event.h15
-rw-r--r--arch/x86/include/asm/reboot.h2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c27
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c23
-rw-r--r--arch/x86/kernel/cpu/perf_event.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c91
-rw-r--r--arch/x86/kernel/crash.c5
-rw-r--r--arch/x86/kernel/kgdb.c60
-rw-r--r--arch/x86/kernel/nmi.c336
-rw-r--r--arch/x86/kernel/reboot.c23
-rw-r--r--arch/x86/kernel/rtc.c23
-rw-r--r--arch/x86/kernel/traps.c155
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/oprofile/nmi_int.c38
-rw-r--r--arch/x86/platform/mrst/vrtc.c9
22 files changed, 605 insertions, 379 deletions
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 4886a68f267..53610957fea 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -22,25 +22,23 @@ void arch_trigger_all_cpu_backtrace(void);
22#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 22#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
23#endif 23#endif
24 24
25/* 25#define NMI_FLAG_FIRST 1
26 * Define some priorities for the nmi notifier call chain. 26
27 * 27enum {
28 * Create a local nmi bit that has a higher priority than 28 NMI_LOCAL=0,
29 * external nmis, because the local ones are more frequent. 29 NMI_UNKNOWN,
30 * 30 NMI_MAX
31 * Also setup some default high/normal/low settings for 31};
32 * subsystems to registers with. Using 4 bits to separate 32
33 * the priorities. This can go a lot higher if needed be. 33#define NMI_DONE 0
34 */ 34#define NMI_HANDLED 1
35 35
36#define NMI_LOCAL_SHIFT 16 /* randomly picked */ 36typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
37#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) 37
38#define NMI_HIGH_PRIOR (1ULL << 8) 38int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long,
39#define NMI_NORMAL_PRIOR (1ULL << 4) 39 const char *);
40#define NMI_LOW_PRIOR (1ULL << 0) 40
41#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) 41void unregister_nmi_handler(unsigned int, const char *);
42#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
43#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR)
44 42
45void stop_nmi(void); 43void stop_nmi(void);
46void restart_nmi(void); 44void restart_nmi(void);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 094fb30817a..e47cb6167e8 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,6 +29,9 @@
29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
33#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
34
32#define AMD64_EVENTSEL_EVENT \ 35#define AMD64_EVENTSEL_EVENT \
33 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) 36 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
34#define INTEL_ARCH_EVENT_MASK \ 37#define INTEL_ARCH_EVENT_MASK \
@@ -159,7 +162,19 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
159 ); \ 162 ); \
160} 163}
161 164
165struct perf_guest_switch_msr {
166 unsigned msr;
167 u64 host, guest;
168};
169
170extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
162#else 171#else
172static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
173{
174 *nr = 0;
175 return NULL;
176}
177
163static inline void perf_events_lapic_init(void) { } 178static inline void perf_events_lapic_init(void) { }
164#endif 179#endif
165 180
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 3250e3d605d..92f297069e8 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -23,7 +23,7 @@ void machine_real_restart(unsigned int type);
23#define MRR_BIOS 0 23#define MRR_BIOS 0
24#define MRR_APM 1 24#define MRR_APM 1
25 25
26typedef void (*nmi_shootdown_cb)(int, struct die_args*); 26typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
27void nmi_shootdown_cpus(nmi_shootdown_cb callback); 27void nmi_shootdown_cpus(nmi_shootdown_cb callback);
28 28
29#endif /* _ASM_X86_REBOOT_H */ 29#endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 82f2912155a..8baca3c4871 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -19,7 +19,7 @@ endif
19 19
20obj-y := process_$(BITS).o signal.o entry_$(BITS).o 20obj-y := process_$(BITS).o signal.o entry_$(BITS).o
21obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 21obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
22obj-y += time.o ioport.o ldt.o dumpstack.o 22obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
23obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o 23obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
24obj-$(CONFIG_IRQ_WORK) += irq_work.o 24obj-$(CONFIG_IRQ_WORK) += irq_work.o
25obj-y += probe_roms.o 25obj-y += probe_roms.o
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index d5e57db0f7b..31cb9ae992b 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -60,22 +60,10 @@ void arch_trigger_all_cpu_backtrace(void)
60} 60}
61 61
62static int __kprobes 62static int __kprobes
63arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, 63arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
64 unsigned long cmd, void *__args)
65{ 64{
66 struct die_args *args = __args;
67 struct pt_regs *regs;
68 int cpu; 65 int cpu;
69 66
70 switch (cmd) {
71 case DIE_NMI:
72 break;
73
74 default:
75 return NOTIFY_DONE;
76 }
77
78 regs = args->regs;
79 cpu = smp_processor_id(); 67 cpu = smp_processor_id();
80 68
81 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 69 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
@@ -86,21 +74,16 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
86 show_regs(regs); 74 show_regs(regs);
87 arch_spin_unlock(&lock); 75 arch_spin_unlock(&lock);
88 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 76 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
89 return NOTIFY_STOP; 77 return NMI_HANDLED;
90 } 78 }
91 79
92 return NOTIFY_DONE; 80 return NMI_DONE;
93} 81}
94 82
95static __read_mostly struct notifier_block backtrace_notifier = {
96 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
97 .next = NULL,
98 .priority = NMI_LOCAL_LOW_PRIOR,
99};
100
101static int __init register_trigger_all_cpu_backtrace(void) 83static int __init register_trigger_all_cpu_backtrace(void)
102{ 84{
103 register_die_notifier(&backtrace_notifier); 85 register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
86 0, "arch_bt");
104 return 0; 87 return 0;
105} 88}
106early_initcall(register_trigger_all_cpu_backtrace); 89early_initcall(register_trigger_all_cpu_backtrace);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 34b18594e72..75be00ecfff 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -672,18 +672,11 @@ void __cpuinit uv_cpu_init(void)
672/* 672/*
673 * When NMI is received, print a stack trace. 673 * When NMI is received, print a stack trace.
674 */ 674 */
675int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 675int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
676{ 676{
677 unsigned long real_uv_nmi; 677 unsigned long real_uv_nmi;
678 int bid; 678 int bid;
679 679
680 if (reason != DIE_NMIUNKNOWN)
681 return NOTIFY_OK;
682
683 if (in_crash_kexec)
684 /* do nothing if entering the crash kernel */
685 return NOTIFY_OK;
686
687 /* 680 /*
688 * Each blade has an MMR that indicates when an NMI has been sent 681 * Each blade has an MMR that indicates when an NMI has been sent
689 * to cpus on the blade. If an NMI is detected, atomically 682 * to cpus on the blade. If an NMI is detected, atomically
@@ -704,7 +697,7 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
704 } 697 }
705 698
706 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) 699 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
707 return NOTIFY_DONE; 700 return NMI_DONE;
708 701
709 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; 702 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
710 703
@@ -717,17 +710,12 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
717 dump_stack(); 710 dump_stack();
718 spin_unlock(&uv_nmi_lock); 711 spin_unlock(&uv_nmi_lock);
719 712
720 return NOTIFY_STOP; 713 return NMI_HANDLED;
721} 714}
722 715
723static struct notifier_block uv_dump_stack_nmi_nb = {
724 .notifier_call = uv_handle_nmi,
725 .priority = NMI_LOCAL_LOW_PRIOR - 1,
726};
727
728void uv_register_nmi_notifier(void) 716void uv_register_nmi_notifier(void)
729{ 717{
730 if (register_die_notifier(&uv_dump_stack_nmi_nb)) 718 if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
731 printk(KERN_WARNING "UV NMI handler failed to register\n"); 719 printk(KERN_WARNING "UV NMI handler failed to register\n");
732} 720}
733 721
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 0ed633c5048..6199232161c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
78 78
79static cpumask_var_t mce_inject_cpumask; 79static cpumask_var_t mce_inject_cpumask;
80 80
81static int mce_raise_notify(struct notifier_block *self, 81static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
82 unsigned long val, void *data)
83{ 82{
84 struct die_args *args = (struct die_args *)data;
85 int cpu = smp_processor_id(); 83 int cpu = smp_processor_id();
86 struct mce *m = &__get_cpu_var(injectm); 84 struct mce *m = &__get_cpu_var(injectm);
87 if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 85 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
88 return NOTIFY_DONE; 86 return NMI_DONE;
89 cpumask_clear_cpu(cpu, mce_inject_cpumask); 87 cpumask_clear_cpu(cpu, mce_inject_cpumask);
90 if (m->inject_flags & MCJ_EXCEPTION) 88 if (m->inject_flags & MCJ_EXCEPTION)
91 raise_exception(m, args->regs); 89 raise_exception(m, regs);
92 else if (m->status) 90 else if (m->status)
93 raise_poll(m); 91 raise_poll(m);
94 return NOTIFY_STOP; 92 return NMI_HANDLED;
95} 93}
96 94
97static struct notifier_block mce_raise_nb = {
98 .notifier_call = mce_raise_notify,
99 .priority = NMI_LOCAL_NORMAL_PRIOR,
100};
101
102/* Inject mce on current CPU */ 95/* Inject mce on current CPU */
103static int raise_local(void) 96static int raise_local(void)
104{ 97{
@@ -216,7 +209,8 @@ static int inject_init(void)
216 return -ENOMEM; 209 return -ENOMEM;
217 printk(KERN_INFO "Machine check injector initialized\n"); 210 printk(KERN_INFO "Machine check injector initialized\n");
218 mce_chrdev_ops.write = mce_write; 211 mce_chrdev_ops.write = mce_write;
219 register_die_notifier(&mce_raise_nb); 212 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
213 "mce_notify");
220 return 0; 214 return 0;
221} 215}
222 216
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 08363b04212..fce51ad1f36 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -908,9 +908,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
908 908
909 percpu_inc(mce_exception_count); 909 percpu_inc(mce_exception_count);
910 910
911 if (notify_die(DIE_NMI, "machine check", regs, error_code,
912 18, SIGKILL) == NOTIFY_STOP)
913 goto out;
914 if (!banks) 911 if (!banks)
915 goto out; 912 goto out;
916 913
@@ -1140,6 +1137,15 @@ static void mce_start_timer(unsigned long data)
1140 add_timer_on(t, smp_processor_id()); 1137 add_timer_on(t, smp_processor_id());
1141} 1138}
1142 1139
1140/* Must not be called in IRQ context where del_timer_sync() can deadlock */
1141static void mce_timer_delete_all(void)
1142{
1143 int cpu;
1144
1145 for_each_online_cpu(cpu)
1146 del_timer_sync(&per_cpu(mce_timer, cpu));
1147}
1148
1143static void mce_do_trigger(struct work_struct *work) 1149static void mce_do_trigger(struct work_struct *work)
1144{ 1150{
1145 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 1151 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
@@ -1750,7 +1756,6 @@ static struct syscore_ops mce_syscore_ops = {
1750 1756
1751static void mce_cpu_restart(void *data) 1757static void mce_cpu_restart(void *data)
1752{ 1758{
1753 del_timer_sync(&__get_cpu_var(mce_timer));
1754 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1759 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1755 return; 1760 return;
1756 __mcheck_cpu_init_generic(); 1761 __mcheck_cpu_init_generic();
@@ -1760,16 +1765,15 @@ static void mce_cpu_restart(void *data)
1760/* Reinit MCEs after user configuration changes */ 1765/* Reinit MCEs after user configuration changes */
1761static void mce_restart(void) 1766static void mce_restart(void)
1762{ 1767{
1768 mce_timer_delete_all();
1763 on_each_cpu(mce_cpu_restart, NULL, 1); 1769 on_each_cpu(mce_cpu_restart, NULL, 1);
1764} 1770}
1765 1771
1766/* Toggle features for corrected errors */ 1772/* Toggle features for corrected errors */
1767static void mce_disable_ce(void *all) 1773static void mce_disable_cmci(void *data)
1768{ 1774{
1769 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1775 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1770 return; 1776 return;
1771 if (all)
1772 del_timer_sync(&__get_cpu_var(mce_timer));
1773 cmci_clear(); 1777 cmci_clear();
1774} 1778}
1775 1779
@@ -1852,7 +1856,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
1852 if (mce_ignore_ce ^ !!new) { 1856 if (mce_ignore_ce ^ !!new) {
1853 if (new) { 1857 if (new) {
1854 /* disable ce features */ 1858 /* disable ce features */
1855 on_each_cpu(mce_disable_ce, (void *)1, 1); 1859 mce_timer_delete_all();
1860 on_each_cpu(mce_disable_cmci, NULL, 1);
1856 mce_ignore_ce = 1; 1861 mce_ignore_ce = 1;
1857 } else { 1862 } else {
1858 /* enable ce features */ 1863 /* enable ce features */
@@ -1875,7 +1880,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
1875 if (mce_cmci_disabled ^ !!new) { 1880 if (mce_cmci_disabled ^ !!new) {
1876 if (new) { 1881 if (new) {
1877 /* disable cmci */ 1882 /* disable cmci */
1878 on_each_cpu(mce_disable_ce, NULL, 1); 1883 on_each_cpu(mce_disable_cmci, NULL, 1);
1879 mce_cmci_disabled = 1; 1884 mce_cmci_disabled = 1;
1880 } else { 1885 } else {
1881 /* enable cmci */ 1886 /* enable cmci */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8ab89112f93..640891014b2 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1058,76 +1058,15 @@ void perf_events_lapic_init(void)
1058 apic_write(APIC_LVTPC, APIC_DM_NMI); 1058 apic_write(APIC_LVTPC, APIC_DM_NMI);
1059} 1059}
1060 1060
1061struct pmu_nmi_state {
1062 unsigned int marked;
1063 int handled;
1064};
1065
1066static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
1067
1068static int __kprobes 1061static int __kprobes
1069perf_event_nmi_handler(struct notifier_block *self, 1062perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1070 unsigned long cmd, void *__args)
1071{ 1063{
1072 struct die_args *args = __args;
1073 unsigned int this_nmi;
1074 int handled;
1075
1076 if (!atomic_read(&active_events)) 1064 if (!atomic_read(&active_events))
1077 return NOTIFY_DONE; 1065 return NMI_DONE;
1078
1079 switch (cmd) {
1080 case DIE_NMI:
1081 break;
1082 case DIE_NMIUNKNOWN:
1083 this_nmi = percpu_read(irq_stat.__nmi_count);
1084 if (this_nmi != __this_cpu_read(pmu_nmi.marked))
1085 /* let the kernel handle the unknown nmi */
1086 return NOTIFY_DONE;
1087 /*
1088 * This one is a PMU back-to-back nmi. Two events
1089 * trigger 'simultaneously' raising two back-to-back
1090 * NMIs. If the first NMI handles both, the latter
1091 * will be empty and daze the CPU. So, we drop it to
1092 * avoid false-positive 'unknown nmi' messages.
1093 */
1094 return NOTIFY_STOP;
1095 default:
1096 return NOTIFY_DONE;
1097 }
1098
1099 handled = x86_pmu.handle_irq(args->regs);
1100 if (!handled)
1101 return NOTIFY_DONE;
1102 1066
1103 this_nmi = percpu_read(irq_stat.__nmi_count); 1067 return x86_pmu.handle_irq(regs);
1104 if ((handled > 1) ||
1105 /* the next nmi could be a back-to-back nmi */
1106 ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
1107 (__this_cpu_read(pmu_nmi.handled) > 1))) {
1108 /*
1109 * We could have two subsequent back-to-back nmis: The
1110 * first handles more than one counter, the 2nd
1111 * handles only one counter and the 3rd handles no
1112 * counter.
1113 *
1114 * This is the 2nd nmi because the previous was
1115 * handling more than one counter. We will mark the
1116 * next (3rd) and then drop it if unhandled.
1117 */
1118 __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
1119 __this_cpu_write(pmu_nmi.handled, handled);
1120 }
1121
1122 return NOTIFY_STOP;
1123} 1068}
1124 1069
1125static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1126 .notifier_call = perf_event_nmi_handler,
1127 .next = NULL,
1128 .priority = NMI_LOCAL_LOW_PRIOR,
1129};
1130
1131struct event_constraint emptyconstraint; 1070struct event_constraint emptyconstraint;
1132struct event_constraint unconstrained; 1071struct event_constraint unconstrained;
1133 1072
@@ -1232,7 +1171,7 @@ static int __init init_hw_perf_events(void)
1232 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1171 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1233 1172
1234 perf_events_lapic_init(); 1173 perf_events_lapic_init();
1235 register_die_notifier(&perf_event_nmi_notifier); 1174 register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
1236 1175
1237 unconstrained = (struct event_constraint) 1176 unconstrained = (struct event_constraint)
1238 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1177 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index fb330b0a816..b9698d40ac4 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -131,6 +131,13 @@ struct cpu_hw_events {
131 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 131 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
132 132
133 /* 133 /*
134 * Intel host/guest exclude bits
135 */
136 u64 intel_ctrl_guest_mask;
137 u64 intel_ctrl_host_mask;
138 struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
139
140 /*
134 * manage shared (per-core, per-cpu) registers 141 * manage shared (per-core, per-cpu) registers
135 * used on Intel NHM/WSM/SNB 142 * used on Intel NHM/WSM/SNB
136 */ 143 */
@@ -295,6 +302,11 @@ struct x86_pmu {
295 */ 302 */
296 struct extra_reg *extra_regs; 303 struct extra_reg *extra_regs;
297 unsigned int er_flags; 304 unsigned int er_flags;
305
306 /*
307 * Intel host/guest support (KVM)
308 */
309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
298}; 310};
299 311
300#define ERF_NO_HT_SHARING 1 312#define ERF_NO_HT_SHARING 1
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 384450d6712..db8e603ff0c 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -138,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event)
138 if (ret) 138 if (ret)
139 return ret; 139 return ret;
140 140
141 if (event->attr.exclude_host && event->attr.exclude_guest)
142 /*
143 * When HO == GO == 1 the hardware treats that as GO == HO == 0
144 * and will count in both modes. We don't want to count in that
145 * case so we emulate no-counting by setting US = OS = 0.
146 */
147 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
148 ARCH_PERFMON_EVENTSEL_OS);
149 else if (event->attr.exclude_host)
150 event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
151 else if (event->attr.exclude_guest)
152 event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
153
141 if (event->attr.type != PERF_TYPE_RAW) 154 if (event->attr.type != PERF_TYPE_RAW)
142 return 0; 155 return 0;
143 156
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 61fa35750b9..e09ca20e86e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -749,7 +749,8 @@ static void intel_pmu_enable_all(int added)
749 749
750 intel_pmu_pebs_enable_all(); 750 intel_pmu_pebs_enable_all();
751 intel_pmu_lbr_enable_all(); 751 intel_pmu_lbr_enable_all();
752 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 752 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
753 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
753 754
754 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 755 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
755 struct perf_event *event = 756 struct perf_event *event =
@@ -872,6 +873,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
872static void intel_pmu_disable_event(struct perf_event *event) 873static void intel_pmu_disable_event(struct perf_event *event)
873{ 874{
874 struct hw_perf_event *hwc = &event->hw; 875 struct hw_perf_event *hwc = &event->hw;
876 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
875 877
876 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 878 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
877 intel_pmu_disable_bts(); 879 intel_pmu_disable_bts();
@@ -879,6 +881,9 @@ static void intel_pmu_disable_event(struct perf_event *event)
879 return; 881 return;
880 } 882 }
881 883
884 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
885 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
886
882 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 887 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
883 intel_pmu_disable_fixed(hwc); 888 intel_pmu_disable_fixed(hwc);
884 return; 889 return;
@@ -924,6 +929,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
924static void intel_pmu_enable_event(struct perf_event *event) 929static void intel_pmu_enable_event(struct perf_event *event)
925{ 930{
926 struct hw_perf_event *hwc = &event->hw; 931 struct hw_perf_event *hwc = &event->hw;
932 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
927 933
928 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 934 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
929 if (!__this_cpu_read(cpu_hw_events.enabled)) 935 if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -933,6 +939,11 @@ static void intel_pmu_enable_event(struct perf_event *event)
933 return; 939 return;
934 } 940 }
935 941
942 if (event->attr.exclude_host)
943 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
944 if (event->attr.exclude_guest)
945 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
946
936 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 947 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
937 intel_pmu_enable_fixed(hwc); 948 intel_pmu_enable_fixed(hwc);
938 return; 949 return;
@@ -1302,12 +1313,84 @@ static int intel_pmu_hw_config(struct perf_event *event)
1302 return 0; 1313 return 0;
1303} 1314}
1304 1315
1316struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
1317{
1318 if (x86_pmu.guest_get_msrs)
1319 return x86_pmu.guest_get_msrs(nr);
1320 *nr = 0;
1321 return NULL;
1322}
1323EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
1324
1325static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
1326{
1327 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1328 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1329
1330 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
1331 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
1332 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
1333
1334 *nr = 1;
1335 return arr;
1336}
1337
1338static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
1339{
1340 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1341 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1342 int idx;
1343
1344 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1345 struct perf_event *event = cpuc->events[idx];
1346
1347 arr[idx].msr = x86_pmu_config_addr(idx);
1348 arr[idx].host = arr[idx].guest = 0;
1349
1350 if (!test_bit(idx, cpuc->active_mask))
1351 continue;
1352
1353 arr[idx].host = arr[idx].guest =
1354 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
1355
1356 if (event->attr.exclude_host)
1357 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1358 else if (event->attr.exclude_guest)
1359 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1360 }
1361
1362 *nr = x86_pmu.num_counters;
1363 return arr;
1364}
1365
1366static void core_pmu_enable_event(struct perf_event *event)
1367{
1368 if (!event->attr.exclude_host)
1369 x86_pmu_enable_event(event);
1370}
1371
1372static void core_pmu_enable_all(int added)
1373{
1374 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1375 int idx;
1376
1377 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1378 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
1379
1380 if (!test_bit(idx, cpuc->active_mask) ||
1381 cpuc->events[idx]->attr.exclude_host)
1382 continue;
1383
1384 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1385 }
1386}
1387
1305static __initconst const struct x86_pmu core_pmu = { 1388static __initconst const struct x86_pmu core_pmu = {
1306 .name = "core", 1389 .name = "core",
1307 .handle_irq = x86_pmu_handle_irq, 1390 .handle_irq = x86_pmu_handle_irq,
1308 .disable_all = x86_pmu_disable_all, 1391 .disable_all = x86_pmu_disable_all,
1309 .enable_all = x86_pmu_enable_all, 1392 .enable_all = core_pmu_enable_all,
1310 .enable = x86_pmu_enable_event, 1393 .enable = core_pmu_enable_event,
1311 .disable = x86_pmu_disable_event, 1394 .disable = x86_pmu_disable_event,
1312 .hw_config = x86_pmu_hw_config, 1395 .hw_config = x86_pmu_hw_config,
1313 .schedule_events = x86_schedule_events, 1396 .schedule_events = x86_schedule_events,
@@ -1325,6 +1408,7 @@ static __initconst const struct x86_pmu core_pmu = {
1325 .get_event_constraints = intel_get_event_constraints, 1408 .get_event_constraints = intel_get_event_constraints,
1326 .put_event_constraints = intel_put_event_constraints, 1409 .put_event_constraints = intel_put_event_constraints,
1327 .event_constraints = intel_core_event_constraints, 1410 .event_constraints = intel_core_event_constraints,
1411 .guest_get_msrs = core_guest_get_msrs,
1328}; 1412};
1329 1413
1330struct intel_shared_regs *allocate_shared_regs(int cpu) 1414struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1431,6 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1431 .cpu_prepare = intel_pmu_cpu_prepare, 1515 .cpu_prepare = intel_pmu_cpu_prepare,
1432 .cpu_starting = intel_pmu_cpu_starting, 1516 .cpu_starting = intel_pmu_cpu_starting,
1433 .cpu_dying = intel_pmu_cpu_dying, 1517 .cpu_dying = intel_pmu_cpu_dying,
1518 .guest_get_msrs = intel_guest_get_msrs,
1434}; 1519};
1435 1520
1436static void intel_clovertown_quirks(void) 1521static void intel_clovertown_quirks(void)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 764c7c2b181..13ad89971d4 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -32,15 +32,12 @@ int in_crash_kexec;
32 32
33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
34 34
35static void kdump_nmi_callback(int cpu, struct die_args *args) 35static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
36{ 36{
37 struct pt_regs *regs;
38#ifdef CONFIG_X86_32 37#ifdef CONFIG_X86_32
39 struct pt_regs fixed_regs; 38 struct pt_regs fixed_regs;
40#endif 39#endif
41 40
42 regs = args->regs;
43
44#ifdef CONFIG_X86_32 41#ifdef CONFIG_X86_32
45 if (!user_mode_vm(regs)) { 42 if (!user_mode_vm(regs)) {
46 crash_fixup_ss_esp(&fixed_regs, regs); 43 crash_fixup_ss_esp(&fixed_regs, regs);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 00354d4919a..faba5771aca 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -511,28 +511,37 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
511 511
512static int was_in_debug_nmi[NR_CPUS]; 512static int was_in_debug_nmi[NR_CPUS];
513 513
514static int __kgdb_notify(struct die_args *args, unsigned long cmd) 514static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
515{ 515{
516 struct pt_regs *regs = args->regs;
517
518 switch (cmd) { 516 switch (cmd) {
519 case DIE_NMI: 517 case NMI_LOCAL:
520 if (atomic_read(&kgdb_active) != -1) { 518 if (atomic_read(&kgdb_active) != -1) {
521 /* KGDB CPU roundup */ 519 /* KGDB CPU roundup */
522 kgdb_nmicallback(raw_smp_processor_id(), regs); 520 kgdb_nmicallback(raw_smp_processor_id(), regs);
523 was_in_debug_nmi[raw_smp_processor_id()] = 1; 521 was_in_debug_nmi[raw_smp_processor_id()] = 1;
524 touch_nmi_watchdog(); 522 touch_nmi_watchdog();
525 return NOTIFY_STOP; 523 return NMI_HANDLED;
526 } 524 }
527 return NOTIFY_DONE; 525 break;
528 526
529 case DIE_NMIUNKNOWN: 527 case NMI_UNKNOWN:
530 if (was_in_debug_nmi[raw_smp_processor_id()]) { 528 if (was_in_debug_nmi[raw_smp_processor_id()]) {
531 was_in_debug_nmi[raw_smp_processor_id()] = 0; 529 was_in_debug_nmi[raw_smp_processor_id()] = 0;
532 return NOTIFY_STOP; 530 return NMI_HANDLED;
533 } 531 }
534 return NOTIFY_DONE; 532 break;
533 default:
534 /* do nothing */
535 break;
536 }
537 return NMI_DONE;
538}
539
540static int __kgdb_notify(struct die_args *args, unsigned long cmd)
541{
542 struct pt_regs *regs = args->regs;
535 543
544 switch (cmd) {
536 case DIE_DEBUG: 545 case DIE_DEBUG:
537 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { 546 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
538 if (user_mode(regs)) 547 if (user_mode(regs))
@@ -590,11 +599,6 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
590 599
591static struct notifier_block kgdb_notifier = { 600static struct notifier_block kgdb_notifier = {
592 .notifier_call = kgdb_notify, 601 .notifier_call = kgdb_notify,
593
594 /*
595 * Lowest-prio notifier priority, we want to be notified last:
596 */
597 .priority = NMI_LOCAL_LOW_PRIOR,
598}; 602};
599 603
600/** 604/**
@@ -605,7 +609,31 @@ static struct notifier_block kgdb_notifier = {
605 */ 609 */
606int kgdb_arch_init(void) 610int kgdb_arch_init(void)
607{ 611{
608 return register_die_notifier(&kgdb_notifier); 612 int retval;
613
614 retval = register_die_notifier(&kgdb_notifier);
615 if (retval)
616 goto out;
617
618 retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler,
619 0, "kgdb");
620 if (retval)
621 goto out1;
622
623 retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler,
624 0, "kgdb");
625
626 if (retval)
627 goto out2;
628
629 return retval;
630
631out2:
632 unregister_nmi_handler(NMI_LOCAL, "kgdb");
633out1:
634 unregister_die_notifier(&kgdb_notifier);
635out:
636 return retval;
609} 637}
610 638
611static void kgdb_hw_overflow_handler(struct perf_event *event, 639static void kgdb_hw_overflow_handler(struct perf_event *event,
@@ -673,6 +701,8 @@ void kgdb_arch_exit(void)
673 breakinfo[i].pev = NULL; 701 breakinfo[i].pev = NULL;
674 } 702 }
675 } 703 }
704 unregister_nmi_handler(NMI_UNKNOWN, "kgdb");
705 unregister_nmi_handler(NMI_LOCAL, "kgdb");
676 unregister_die_notifier(&kgdb_notifier); 706 unregister_die_notifier(&kgdb_notifier);
677} 707}
678 708
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
new file mode 100644
index 00000000000..e20f5e79059
--- /dev/null
+++ b/arch/x86/kernel/nmi.c
@@ -0,0 +1,336 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 * Copyright (C) 2011 Don Zickus Red Hat, Inc.
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */
9
10/*
11 * Handle hardware traps and faults.
12 */
13#include <linux/spinlock.h>
14#include <linux/kprobes.h>
15#include <linux/kdebug.h>
16#include <linux/nmi.h>
17#include <linux/delay.h>
18#include <linux/hardirq.h>
19#include <linux/slab.h>
20
21#if defined(CONFIG_EDAC)
22#include <linux/edac.h>
23#endif
24
25#include <linux/atomic.h>
26#include <asm/traps.h>
27#include <asm/mach_traps.h>
28#include <asm/nmi.h>
29
30#define NMI_MAX_NAMELEN 16
31struct nmiaction {
32 struct list_head list;
33 nmi_handler_t handler;
34 unsigned int flags;
35 char *name;
36};
37
38struct nmi_desc {
39 spinlock_t lock;
40 struct list_head head;
41};
42
43static struct nmi_desc nmi_desc[NMI_MAX] =
44{
45 {
46 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
47 .head = LIST_HEAD_INIT(nmi_desc[0].head),
48 },
49 {
50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
51 .head = LIST_HEAD_INIT(nmi_desc[1].head),
52 },
53
54};
55
56static int ignore_nmis;
57
58int unknown_nmi_panic;
59/*
60 * Prevent NMI reason port (0x61) being accessed simultaneously, can
61 * only be used in NMI handler.
62 */
63static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
64
65static int __init setup_unknown_nmi_panic(char *str)
66{
67 unknown_nmi_panic = 1;
68 return 1;
69}
70__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
71
72#define nmi_to_desc(type) (&nmi_desc[type])
73
74static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs)
75{
76 struct nmi_desc *desc = nmi_to_desc(type);
77 struct nmiaction *a;
78 int handled=0;
79
80 rcu_read_lock();
81
82 /*
83 * NMIs are edge-triggered, which means if you have enough
84 * of them concurrently, you can lose some because only one
85 * can be latched at any given time. Walk the whole list
86 * to handle those situations.
87 */
88 list_for_each_entry_rcu(a, &desc->head, list) {
89
90 handled += a->handler(type, regs);
91
92 }
93
94 rcu_read_unlock();
95
96 /* return total number of NMI events handled */
97 return handled;
98}
99
100static int __setup_nmi(unsigned int type, struct nmiaction *action)
101{
102 struct nmi_desc *desc = nmi_to_desc(type);
103 unsigned long flags;
104
105 spin_lock_irqsave(&desc->lock, flags);
106
107 /*
108 * some handlers need to be executed first otherwise a fake
109 * event confuses some handlers (kdump uses this flag)
110 */
111 if (action->flags & NMI_FLAG_FIRST)
112 list_add_rcu(&action->list, &desc->head);
113 else
114 list_add_tail_rcu(&action->list, &desc->head);
115
116 spin_unlock_irqrestore(&desc->lock, flags);
117 return 0;
118}
119
120static struct nmiaction *__free_nmi(unsigned int type, const char *name)
121{
122 struct nmi_desc *desc = nmi_to_desc(type);
123 struct nmiaction *n;
124 unsigned long flags;
125
126 spin_lock_irqsave(&desc->lock, flags);
127
128 list_for_each_entry_rcu(n, &desc->head, list) {
129 /*
130 * the name passed in to describe the nmi handler
131 * is used as the lookup key
132 */
133 if (!strcmp(n->name, name)) {
134 WARN(in_nmi(),
135 "Trying to free NMI (%s) from NMI context!\n", n->name);
136 list_del_rcu(&n->list);
137 break;
138 }
139 }
140
141 spin_unlock_irqrestore(&desc->lock, flags);
142 synchronize_rcu();
143 return (n);
144}
145
146int register_nmi_handler(unsigned int type, nmi_handler_t handler,
147 unsigned long nmiflags, const char *devname)
148{
149 struct nmiaction *action;
150 int retval = -ENOMEM;
151
152 if (!handler)
153 return -EINVAL;
154
155 action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
156 if (!action)
157 goto fail_action;
158
159 action->handler = handler;
160 action->flags = nmiflags;
161 action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
162 if (!action->name)
163 goto fail_action_name;
164
165 retval = __setup_nmi(type, action);
166
167 if (retval)
168 goto fail_setup_nmi;
169
170 return retval;
171
172fail_setup_nmi:
173 kfree(action->name);
174fail_action_name:
175 kfree(action);
176fail_action:
177
178 return retval;
179}
180EXPORT_SYMBOL_GPL(register_nmi_handler);
181
182void unregister_nmi_handler(unsigned int type, const char *name)
183{
184 struct nmiaction *a;
185
186 a = __free_nmi(type, name);
187 if (a) {
188 kfree(a->name);
189 kfree(a);
190 }
191}
192
193EXPORT_SYMBOL_GPL(unregister_nmi_handler);
194
195static notrace __kprobes void
196pci_serr_error(unsigned char reason, struct pt_regs *regs)
197{
198 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
199 reason, smp_processor_id());
200
201 /*
202 * On some machines, PCI SERR line is used to report memory
203 * errors. EDAC makes use of it.
204 */
205#if defined(CONFIG_EDAC)
206 if (edac_handler_set()) {
207 edac_atomic_assert_error();
208 return;
209 }
210#endif
211
212 if (panic_on_unrecovered_nmi)
213 panic("NMI: Not continuing");
214
215 pr_emerg("Dazed and confused, but trying to continue\n");
216
217 /* Clear and disable the PCI SERR error line. */
218 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
219 outb(reason, NMI_REASON_PORT);
220}
221
222static notrace __kprobes void
223io_check_error(unsigned char reason, struct pt_regs *regs)
224{
225 unsigned long i;
226
227 pr_emerg(
228 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
229 reason, smp_processor_id());
230 show_registers(regs);
231
232 if (panic_on_io_nmi)
233 panic("NMI IOCK error: Not continuing");
234
235 /* Re-enable the IOCK line, wait for a few seconds */
236 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
237 outb(reason, NMI_REASON_PORT);
238
239 i = 20000;
240 while (--i) {
241 touch_nmi_watchdog();
242 udelay(100);
243 }
244
245 reason &= ~NMI_REASON_CLEAR_IOCHK;
246 outb(reason, NMI_REASON_PORT);
247}
248
249static notrace __kprobes void
250unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
251{
252 int handled;
253
254 handled = nmi_handle(NMI_UNKNOWN, regs);
255 if (handled)
256 return;
257#ifdef CONFIG_MCA
258 /*
259 * Might actually be able to figure out what the guilty party
260 * is:
261 */
262 if (MCA_bus) {
263 mca_handle_nmi();
264 return;
265 }
266#endif
267 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
268 reason, smp_processor_id());
269
270 pr_emerg("Do you have a strange power saving mode enabled?\n");
271 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
272 panic("NMI: Not continuing");
273
274 pr_emerg("Dazed and confused, but trying to continue\n");
275}
276
277static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
278{
279 unsigned char reason = 0;
280 int handled;
281
282 /*
283 * CPU-specific NMI must be processed before non-CPU-specific
284 * NMI, otherwise we may lose it, because the CPU-specific
285 * NMI can not be detected/processed on other CPUs.
286 */
287 handled = nmi_handle(NMI_LOCAL, regs);
288 if (handled)
289 return;
290
291 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
292 raw_spin_lock(&nmi_reason_lock);
293 reason = get_nmi_reason();
294
295 if (reason & NMI_REASON_MASK) {
296 if (reason & NMI_REASON_SERR)
297 pci_serr_error(reason, regs);
298 else if (reason & NMI_REASON_IOCHK)
299 io_check_error(reason, regs);
300#ifdef CONFIG_X86_32
301 /*
302 * Reassert NMI in case it became active
303 * meanwhile as it's edge-triggered:
304 */
305 reassert_nmi();
306#endif
307 raw_spin_unlock(&nmi_reason_lock);
308 return;
309 }
310 raw_spin_unlock(&nmi_reason_lock);
311
312 unknown_nmi_error(reason, regs);
313}
314
315dotraplinkage notrace __kprobes void
316do_nmi(struct pt_regs *regs, long error_code)
317{
318 nmi_enter();
319
320 inc_irq_stat(__nmi_count);
321
322 if (!ignore_nmis)
323 default_do_nmi(regs);
324
325 nmi_exit();
326}
327
328void stop_nmi(void)
329{
330 ignore_nmis++;
331}
332
333void restart_nmi(void)
334{
335 ignore_nmis--;
336}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9242436e993..e334be1182b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -464,7 +464,7 @@ static inline void kb_wait(void)
464 } 464 }
465} 465}
466 466
467static void vmxoff_nmi(int cpu, struct die_args *args) 467static void vmxoff_nmi(int cpu, struct pt_regs *regs)
468{ 468{
469 cpu_emergency_vmxoff(); 469 cpu_emergency_vmxoff();
470} 470}
@@ -736,14 +736,10 @@ static nmi_shootdown_cb shootdown_callback;
736 736
737static atomic_t waiting_for_crash_ipi; 737static atomic_t waiting_for_crash_ipi;
738 738
739static int crash_nmi_callback(struct notifier_block *self, 739static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
740 unsigned long val, void *data)
741{ 740{
742 int cpu; 741 int cpu;
743 742
744 if (val != DIE_NMI)
745 return NOTIFY_OK;
746
747 cpu = raw_smp_processor_id(); 743 cpu = raw_smp_processor_id();
748 744
749 /* Don't do anything if this handler is invoked on crashing cpu. 745 /* Don't do anything if this handler is invoked on crashing cpu.
@@ -751,10 +747,10 @@ static int crash_nmi_callback(struct notifier_block *self,
751 * an NMI if system was initially booted with nmi_watchdog parameter. 747 * an NMI if system was initially booted with nmi_watchdog parameter.
752 */ 748 */
753 if (cpu == crashing_cpu) 749 if (cpu == crashing_cpu)
754 return NOTIFY_STOP; 750 return NMI_HANDLED;
755 local_irq_disable(); 751 local_irq_disable();
756 752
757 shootdown_callback(cpu, (struct die_args *)data); 753 shootdown_callback(cpu, regs);
758 754
759 atomic_dec(&waiting_for_crash_ipi); 755 atomic_dec(&waiting_for_crash_ipi);
760 /* Assume hlt works */ 756 /* Assume hlt works */
@@ -762,7 +758,7 @@ static int crash_nmi_callback(struct notifier_block *self,
762 for (;;) 758 for (;;)
763 cpu_relax(); 759 cpu_relax();
764 760
765 return 1; 761 return NMI_HANDLED;
766} 762}
767 763
768static void smp_send_nmi_allbutself(void) 764static void smp_send_nmi_allbutself(void)
@@ -770,12 +766,6 @@ static void smp_send_nmi_allbutself(void)
770 apic->send_IPI_allbutself(NMI_VECTOR); 766 apic->send_IPI_allbutself(NMI_VECTOR);
771} 767}
772 768
773static struct notifier_block crash_nmi_nb = {
774 .notifier_call = crash_nmi_callback,
775 /* we want to be the first one called */
776 .priority = NMI_LOCAL_HIGH_PRIOR+1,
777};
778
779/* Halt all other CPUs, calling the specified function on each of them 769/* Halt all other CPUs, calling the specified function on each of them
780 * 770 *
781 * This function can be used to halt all other CPUs on crash 771 * This function can be used to halt all other CPUs on crash
@@ -794,7 +784,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
794 784
795 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); 785 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
796 /* Would it be better to replace the trap vector here? */ 786 /* Would it be better to replace the trap vector here? */
797 if (register_die_notifier(&crash_nmi_nb)) 787 if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
788 NMI_FLAG_FIRST, "crash"))
798 return; /* return what? */ 789 return; /* return what? */
799 /* Ensure the new callback function is set before sending 790 /* Ensure the new callback function is set before sending
800 * out the NMI 791 * out the NMI
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 3f2ad2640d8..ccdbc16b894 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -42,8 +42,11 @@ int mach_set_rtc_mmss(unsigned long nowtime)
42{ 42{
43 int real_seconds, real_minutes, cmos_minutes; 43 int real_seconds, real_minutes, cmos_minutes;
44 unsigned char save_control, save_freq_select; 44 unsigned char save_control, save_freq_select;
45 unsigned long flags;
45 int retval = 0; 46 int retval = 0;
46 47
48 spin_lock_irqsave(&rtc_lock, flags);
49
47 /* tell the clock it's being set */ 50 /* tell the clock it's being set */
48 save_control = CMOS_READ(RTC_CONTROL); 51 save_control = CMOS_READ(RTC_CONTROL);
49 CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); 52 CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
@@ -93,12 +96,17 @@ int mach_set_rtc_mmss(unsigned long nowtime)
93 CMOS_WRITE(save_control, RTC_CONTROL); 96 CMOS_WRITE(save_control, RTC_CONTROL);
94 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); 97 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
95 98
99 spin_unlock_irqrestore(&rtc_lock, flags);
100
96 return retval; 101 return retval;
97} 102}
98 103
99unsigned long mach_get_cmos_time(void) 104unsigned long mach_get_cmos_time(void)
100{ 105{
101 unsigned int status, year, mon, day, hour, min, sec, century = 0; 106 unsigned int status, year, mon, day, hour, min, sec, century = 0;
107 unsigned long flags;
108
109 spin_lock_irqsave(&rtc_lock, flags);
102 110
103 /* 111 /*
104 * If UIP is clear, then we have >= 244 microseconds before 112 * If UIP is clear, then we have >= 244 microseconds before
@@ -125,6 +133,8 @@ unsigned long mach_get_cmos_time(void)
125 status = CMOS_READ(RTC_CONTROL); 133 status = CMOS_READ(RTC_CONTROL);
126 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); 134 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
127 135
136 spin_unlock_irqrestore(&rtc_lock, flags);
137
128 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { 138 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
129 sec = bcd2bin(sec); 139 sec = bcd2bin(sec);
130 min = bcd2bin(min); 140 min = bcd2bin(min);
@@ -169,24 +179,15 @@ EXPORT_SYMBOL(rtc_cmos_write);
169 179
170int update_persistent_clock(struct timespec now) 180int update_persistent_clock(struct timespec now)
171{ 181{
172 unsigned long flags; 182 return x86_platform.set_wallclock(now.tv_sec);
173 int retval;
174
175 spin_lock_irqsave(&rtc_lock, flags);
176 retval = x86_platform.set_wallclock(now.tv_sec);
177 spin_unlock_irqrestore(&rtc_lock, flags);
178
179 return retval;
180} 183}
181 184
182/* not static: needed by APM */ 185/* not static: needed by APM */
183void read_persistent_clock(struct timespec *ts) 186void read_persistent_clock(struct timespec *ts)
184{ 187{
185 unsigned long retval, flags; 188 unsigned long retval;
186 189
187 spin_lock_irqsave(&rtc_lock, flags);
188 retval = x86_platform.get_wallclock(); 190 retval = x86_platform.get_wallclock();
189 spin_unlock_irqrestore(&rtc_lock, flags);
190 191
191 ts->tv_sec = retval; 192 ts->tv_sec = retval;
192 ts->tv_nsec = 0; 193 ts->tv_nsec = 0;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6913369c234..a8e3eb83466 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -81,15 +81,6 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
81DECLARE_BITMAP(used_vectors, NR_VECTORS); 81DECLARE_BITMAP(used_vectors, NR_VECTORS);
82EXPORT_SYMBOL_GPL(used_vectors); 82EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis;
85
86int unknown_nmi_panic;
87/*
88 * Prevent NMI reason port (0x61) being accessed simultaneously, can
89 * only be used in NMI handler.
90 */
91static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
92
93static inline void conditional_sti(struct pt_regs *regs) 84static inline void conditional_sti(struct pt_regs *regs)
94{ 85{
95 if (regs->flags & X86_EFLAGS_IF) 86 if (regs->flags & X86_EFLAGS_IF)
@@ -307,152 +298,6 @@ gp_in_kernel:
307 die("general protection fault", regs, error_code); 298 die("general protection fault", regs, error_code);
308} 299}
309 300
310static int __init setup_unknown_nmi_panic(char *str)
311{
312 unknown_nmi_panic = 1;
313 return 1;
314}
315__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
316
317static notrace __kprobes void
318pci_serr_error(unsigned char reason, struct pt_regs *regs)
319{
320 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
321 reason, smp_processor_id());
322
323 /*
324 * On some machines, PCI SERR line is used to report memory
325 * errors. EDAC makes use of it.
326 */
327#if defined(CONFIG_EDAC)
328 if (edac_handler_set()) {
329 edac_atomic_assert_error();
330 return;
331 }
332#endif
333
334 if (panic_on_unrecovered_nmi)
335 panic("NMI: Not continuing");
336
337 pr_emerg("Dazed and confused, but trying to continue\n");
338
339 /* Clear and disable the PCI SERR error line. */
340 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
341 outb(reason, NMI_REASON_PORT);
342}
343
344static notrace __kprobes void
345io_check_error(unsigned char reason, struct pt_regs *regs)
346{
347 unsigned long i;
348
349 pr_emerg(
350 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
351 reason, smp_processor_id());
352 show_registers(regs);
353
354 if (panic_on_io_nmi)
355 panic("NMI IOCK error: Not continuing");
356
357 /* Re-enable the IOCK line, wait for a few seconds */
358 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
359 outb(reason, NMI_REASON_PORT);
360
361 i = 20000;
362 while (--i) {
363 touch_nmi_watchdog();
364 udelay(100);
365 }
366
367 reason &= ~NMI_REASON_CLEAR_IOCHK;
368 outb(reason, NMI_REASON_PORT);
369}
370
371static notrace __kprobes void
372unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
373{
374 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
375 NOTIFY_STOP)
376 return;
377#ifdef CONFIG_MCA
378 /*
379 * Might actually be able to figure out what the guilty party
380 * is:
381 */
382 if (MCA_bus) {
383 mca_handle_nmi();
384 return;
385 }
386#endif
387 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
388 reason, smp_processor_id());
389
390 pr_emerg("Do you have a strange power saving mode enabled?\n");
391 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
392 panic("NMI: Not continuing");
393
394 pr_emerg("Dazed and confused, but trying to continue\n");
395}
396
397static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
398{
399 unsigned char reason = 0;
400
401 /*
402 * CPU-specific NMI must be processed before non-CPU-specific
403 * NMI, otherwise we may lose it, because the CPU-specific
404 * NMI can not be detected/processed on other CPUs.
405 */
406 if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
407 return;
408
409 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
410 raw_spin_lock(&nmi_reason_lock);
411 reason = get_nmi_reason();
412
413 if (reason & NMI_REASON_MASK) {
414 if (reason & NMI_REASON_SERR)
415 pci_serr_error(reason, regs);
416 else if (reason & NMI_REASON_IOCHK)
417 io_check_error(reason, regs);
418#ifdef CONFIG_X86_32
419 /*
420 * Reassert NMI in case it became active
421 * meanwhile as it's edge-triggered:
422 */
423 reassert_nmi();
424#endif
425 raw_spin_unlock(&nmi_reason_lock);
426 return;
427 }
428 raw_spin_unlock(&nmi_reason_lock);
429
430 unknown_nmi_error(reason, regs);
431}
432
433dotraplinkage notrace __kprobes void
434do_nmi(struct pt_regs *regs, long error_code)
435{
436 nmi_enter();
437
438 inc_irq_stat(__nmi_count);
439
440 if (!ignore_nmis)
441 default_do_nmi(regs);
442
443 nmi_exit();
444}
445
446void stop_nmi(void)
447{
448 ignore_nmis++;
449}
450
451void restart_nmi(void)
452{
453 ignore_nmis--;
454}
455
456/* May run on IST stack. */ 301/* May run on IST stack. */
457dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 302dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
458{ 303{
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 6f08bc940fa..8b4cc5f067d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3603,7 +3603,7 @@ done_prefixes:
3603 break; 3603 break;
3604 case Src2CL: 3604 case Src2CL:
3605 ctxt->src2.bytes = 1; 3605 ctxt->src2.bytes = 1;
3606 ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0x8; 3606 ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
3607 break; 3607 break;
3608 case Src2ImmByte: 3608 case Src2ImmByte:
3609 rc = decode_imm(ctxt, &ctxt->src2, 1, true); 3609 rc = decode_imm(ctxt, &ctxt->src2, 1, true);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1c5b69373a0..8e8da7960db 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -400,7 +400,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
400 400
401 /* xchg acts as a barrier before the setting of the high bits */ 401 /* xchg acts as a barrier before the setting of the high bits */
402 orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); 402 orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low);
403 orig.spte_high = ssptep->spte_high = sspte.spte_high; 403 orig.spte_high = ssptep->spte_high;
404 ssptep->spte_high = sspte.spte_high;
404 count_spte_clear(sptep, spte); 405 count_spte_clear(sptep, spte);
405 406
406 return orig.spte; 407 return orig.spte;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 7ca4d43e898..990c35bfa88 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -61,26 +61,15 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
61} 61}
62 62
63 63
64static int profile_exceptions_notify(struct notifier_block *self, 64static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs)
65 unsigned long val, void *data)
66{ 65{
67 struct die_args *args = (struct die_args *)data; 66 if (ctr_running)
68 int ret = NOTIFY_DONE; 67 model->check_ctrs(regs, &__get_cpu_var(cpu_msrs));
69 68 else if (!nmi_enabled)
70 switch (val) { 69 return NMI_DONE;
71 case DIE_NMI: 70 else
72 if (ctr_running) 71 model->stop(&__get_cpu_var(cpu_msrs));
73 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); 72 return NMI_HANDLED;
74 else if (!nmi_enabled)
75 break;
76 else
77 model->stop(&__get_cpu_var(cpu_msrs));
78 ret = NOTIFY_STOP;
79 break;
80 default:
81 break;
82 }
83 return ret;
84} 73}
85 74
86static void nmi_cpu_save_registers(struct op_msrs *msrs) 75static void nmi_cpu_save_registers(struct op_msrs *msrs)
@@ -363,12 +352,6 @@ static void nmi_cpu_setup(void *dummy)
363 apic_write(APIC_LVTPC, APIC_DM_NMI); 352 apic_write(APIC_LVTPC, APIC_DM_NMI);
364} 353}
365 354
366static struct notifier_block profile_exceptions_nb = {
367 .notifier_call = profile_exceptions_notify,
368 .next = NULL,
369 .priority = NMI_LOCAL_LOW_PRIOR,
370};
371
372static void nmi_cpu_restore_registers(struct op_msrs *msrs) 355static void nmi_cpu_restore_registers(struct op_msrs *msrs)
373{ 356{
374 struct op_msr *counters = msrs->counters; 357 struct op_msr *counters = msrs->counters;
@@ -508,7 +491,8 @@ static int nmi_setup(void)
508 ctr_running = 0; 491 ctr_running = 0;
509 /* make variables visible to the nmi handler: */ 492 /* make variables visible to the nmi handler: */
510 smp_mb(); 493 smp_mb();
511 err = register_die_notifier(&profile_exceptions_nb); 494 err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify,
495 0, "oprofile");
512 if (err) 496 if (err)
513 goto fail; 497 goto fail;
514 498
@@ -538,7 +522,7 @@ static void nmi_shutdown(void)
538 put_online_cpus(); 522 put_online_cpus();
539 /* make variables visible to the nmi handler: */ 523 /* make variables visible to the nmi handler: */
540 smp_mb(); 524 smp_mb();
541 unregister_die_notifier(&profile_exceptions_nb); 525 unregister_nmi_handler(NMI_LOCAL, "oprofile");
542 msrs = &get_cpu_var(cpu_msrs); 526 msrs = &get_cpu_var(cpu_msrs);
543 model->shutdown(msrs); 527 model->shutdown(msrs);
544 free_msrs(); 528 free_msrs();
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 73d70d65e76..6d5dbcdd444 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -58,8 +58,11 @@ EXPORT_SYMBOL_GPL(vrtc_cmos_write);
58unsigned long vrtc_get_time(void) 58unsigned long vrtc_get_time(void)
59{ 59{
60 u8 sec, min, hour, mday, mon; 60 u8 sec, min, hour, mday, mon;
61 unsigned long flags;
61 u32 year; 62 u32 year;
62 63
64 spin_lock_irqsave(&rtc_lock, flags);
65
63 while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP)) 66 while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
64 cpu_relax(); 67 cpu_relax();
65 68
@@ -70,6 +73,8 @@ unsigned long vrtc_get_time(void)
70 mon = vrtc_cmos_read(RTC_MONTH); 73 mon = vrtc_cmos_read(RTC_MONTH);
71 year = vrtc_cmos_read(RTC_YEAR); 74 year = vrtc_cmos_read(RTC_YEAR);
72 75
76 spin_unlock_irqrestore(&rtc_lock, flags);
77
73 /* vRTC YEAR reg contains the offset to 1960 */ 78 /* vRTC YEAR reg contains the offset to 1960 */
74 year += 1960; 79 year += 1960;
75 80
@@ -83,8 +88,10 @@ unsigned long vrtc_get_time(void)
83int vrtc_set_mmss(unsigned long nowtime) 88int vrtc_set_mmss(unsigned long nowtime)
84{ 89{
85 int real_sec, real_min; 90 int real_sec, real_min;
91 unsigned long flags;
86 int vrtc_min; 92 int vrtc_min;
87 93
94 spin_lock_irqsave(&rtc_lock, flags);
88 vrtc_min = vrtc_cmos_read(RTC_MINUTES); 95 vrtc_min = vrtc_cmos_read(RTC_MINUTES);
89 96
90 real_sec = nowtime % 60; 97 real_sec = nowtime % 60;
@@ -95,6 +102,8 @@ int vrtc_set_mmss(unsigned long nowtime)
95 102
96 vrtc_cmos_write(real_sec, RTC_SECONDS); 103 vrtc_cmos_write(real_sec, RTC_SECONDS);
97 vrtc_cmos_write(real_min, RTC_MINUTES); 104 vrtc_cmos_write(real_min, RTC_MINUTES);
105 spin_unlock_irqrestore(&rtc_lock, flags);
106
98 return 0; 107 return 0;
99} 108}
100 109