aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/alternative.c49
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c15
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c16
-rw-r--r--arch/x86/kernel/apic/io_apic.c46
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c24
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c642
-rw-r--r--arch/x86/kernel/dumpstack.c12
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/kprobes.c113
-rw-r--r--arch/x86/kernel/process.c3
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/traps.c11
18 files changed, 191 insertions, 1410 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24c955a..553d0b0d639b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first;
591static int wrote_text; 591static int wrote_text;
592 592
593struct text_poke_params { 593struct text_poke_params {
594 void *addr; 594 struct text_poke_param *params;
595 const void *opcode; 595 int nparams;
596 size_t len;
597}; 596};
598 597
599static int __kprobes stop_machine_text_poke(void *data) 598static int __kprobes stop_machine_text_poke(void *data)
600{ 599{
601 struct text_poke_params *tpp = data; 600 struct text_poke_params *tpp = data;
601 struct text_poke_param *p;
602 int i;
602 603
603 if (atomic_dec_and_test(&stop_machine_first)) { 604 if (atomic_dec_and_test(&stop_machine_first)) {
604 text_poke(tpp->addr, tpp->opcode, tpp->len); 605 for (i = 0; i < tpp->nparams; i++) {
606 p = &tpp->params[i];
607 text_poke(p->addr, p->opcode, p->len);
608 }
605 smp_wmb(); /* Make sure other cpus see that this has run */ 609 smp_wmb(); /* Make sure other cpus see that this has run */
606 wrote_text = 1; 610 wrote_text = 1;
607 } else { 611 } else {
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data)
610 smp_mb(); /* Load wrote_text before following execution */ 614 smp_mb(); /* Load wrote_text before following execution */
611 } 615 }
612 616
613 flush_icache_range((unsigned long)tpp->addr, 617 for (i = 0; i < tpp->nparams; i++) {
614 (unsigned long)tpp->addr + tpp->len); 618 p = &tpp->params[i];
619 flush_icache_range((unsigned long)p->addr,
620 (unsigned long)p->addr + p->len);
621 }
622
615 return 0; 623 return 0;
616} 624}
617 625
@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
631void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) 639void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
632{ 640{
633 struct text_poke_params tpp; 641 struct text_poke_params tpp;
642 struct text_poke_param p;
634 643
635 tpp.addr = addr; 644 p.addr = addr;
636 tpp.opcode = opcode; 645 p.opcode = opcode;
637 tpp.len = len; 646 p.len = len;
647 tpp.params = &p;
648 tpp.nparams = 1;
638 atomic_set(&stop_machine_first, 1); 649 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 650 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 651 /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
642 return addr; 653 return addr;
643} 654}
644 655
656/**
657 * text_poke_smp_batch - Update instructions on a live kernel on SMP
658 * @params: an array of text_poke parameters
659 * @n: the number of elements in params.
660 *
661 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
662 * stop_machine() is heavy task, it is better to aggregate text_poke requests
663 * and do it once if possible.
664 *
665 * Note: Must be called under get_online_cpus() and text_mutex.
666 */
667void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
668{
669 struct text_poke_params tpp = {.params = params, .nparams = n};
670
671 atomic_set(&stop_machine_first, 1);
672 wrote_text = 0;
673 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
674}
675
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 676#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 677
647#ifdef CONFIG_X86_64 678#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3f838d537392..e9e2a93783f9 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void)
799 * PIT/HPET going. Otherwise register lapic as a dummy 798 * PIT/HPET going. Otherwise register lapic as a dummy
800 * device. 799 * device.
801 */ 800 */
802 if (nmi_watchdog != NMI_IO_APIC) 801 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
803 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
804 else
805 pr_warning("APIC timer registered as dummy,"
806 " due to nmi_watchdog=%d!\n", nmi_watchdog);
807 802
808 /* Setup the lapic or request the broadcast */ 803 /* Setup the lapic or request the broadcast */
809 setup_APIC_timer(); 804 setup_APIC_timer();
@@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void)
1387 } 1382 }
1388#endif 1383#endif
1389 1384
1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1385 apic_pm_activate();
1392} 1386}
1393 1387
@@ -1750,17 +1744,10 @@ int __init APIC_init_uniprocessor(void)
1750 setup_IO_APIC(); 1744 setup_IO_APIC();
1751 else { 1745 else {
1752 nr_ioapics = 0; 1746 nr_ioapics = 0;
1753 localise_nmi_watchdog();
1754 } 1747 }
1755#else
1756 localise_nmi_watchdog();
1757#endif 1748#endif
1758 1749
1759 x86_init.timers.setup_percpu_clockev(); 1750 x86_init.timers.setup_percpu_clockev();
1760#ifdef CONFIG_X86_64
1761 check_nmi_watchdog();
1762#endif
1763
1764 return 0; 1751 return 0;
1765} 1752}
1766 1753
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e55b90..a0e71cb4fa9c 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,16 +17,18 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20#ifdef CONFIG_HARDLOCKUP_DETECTOR
20u64 hw_nmi_get_sample_period(void) 21u64 hw_nmi_get_sample_period(void)
21{ 22{
22 return (u64)(cpu_khz) * 1000 * 60; 23 return (u64)(cpu_khz) * 1000 * 60;
23} 24}
25#endif
24 26
25#ifdef ARCH_HAS_NMI_WATCHDOG
26 27
27/* For reliability, we're prepared to waste bits here. */ 28/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 29static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29 30
31#ifdef arch_trigger_all_cpu_backtrace
30void arch_trigger_all_cpu_backtrace(void) 32void arch_trigger_all_cpu_backtrace(void)
31{ 33{
32 int i; 34 int i;
@@ -93,16 +95,4 @@ early_initcall(register_trigger_all_cpu_backtrace);
93#endif 95#endif
94 96
95/* STUB calls to mimic old nmi_watchdog behaviour */ 97/* STUB calls to mimic old nmi_watchdog behaviour */
96#if defined(CONFIG_X86_LOCAL_APIC)
97unsigned int nmi_watchdog = NMI_NONE;
98EXPORT_SYMBOL(nmi_watchdog);
99void acpi_nmi_enable(void) { return; }
100void acpi_nmi_disable(void) { return; }
101#endif
102atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
103EXPORT_SYMBOL(nmi_active);
104int unknown_nmi_panic; 98int unknown_nmi_panic;
105void cpu_nmi_set_wd_enabled(void) { return; }
106void stop_apic_nmi_watchdog(void *unused) { return; }
107void setup_apic_nmi_watchdog(void *unused) { return; }
108int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a721f628..e4a040c28de1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -2643,24 +2642,6 @@ static void lapic_register_intr(int irq)
2643 "edge"); 2642 "edge");
2644} 2643}
2645 2644
2646static void __init setup_nmi(void)
2647{
2648 /*
2649 * Dirty trick to enable the NMI watchdog ...
2650 * We put the 8259A master into AEOI mode and
2651 * unmask on all local APICs LVT0 as NMI.
2652 *
2653 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2654 * is from Maciej W. Rozycki - so we do not have to EOI from
2655 * the NMI handler or the timer interrupt.
2656 */
2657 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2658
2659 enable_NMI_through_LVT0();
2660
2661 apic_printk(APIC_VERBOSE, " done.\n");
2662}
2663
2664/* 2645/*
2665 * This looks a bit hackish but it's about the only one way of sending 2646 * This looks a bit hackish but it's about the only one way of sending
2666 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2647 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2766,15 +2747,6 @@ static inline void __init check_timer(void)
2766 */ 2747 */
2767 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2748 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2768 legacy_pic->init(1); 2749 legacy_pic->init(1);
2769#ifdef CONFIG_X86_32
2770 {
2771 unsigned int ver;
2772
2773 ver = apic_read(APIC_LVR);
2774 ver = GET_APIC_VERSION(ver);
2775 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2776 }
2777#endif
2778 2750
2779 pin1 = find_isa_irq_pin(0, mp_INT); 2751 pin1 = find_isa_irq_pin(0, mp_INT);
2780 apic1 = find_isa_irq_apic(0, mp_INT); 2752 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2822,10 +2794,6 @@ static inline void __init check_timer(void)
2822 unmask_ioapic(cfg); 2794 unmask_ioapic(cfg);
2823 } 2795 }
2824 if (timer_irq_works()) { 2796 if (timer_irq_works()) {
2825 if (nmi_watchdog == NMI_IO_APIC) {
2826 setup_nmi();
2827 legacy_pic->unmask(0);
2828 }
2829 if (disable_timer_pin_1 > 0) 2797 if (disable_timer_pin_1 > 0)
2830 clear_IO_APIC_pin(0, pin1); 2798 clear_IO_APIC_pin(0, pin1);
2831 goto out; 2799 goto out;
@@ -2851,11 +2819,6 @@ static inline void __init check_timer(void)
2851 if (timer_irq_works()) { 2819 if (timer_irq_works()) {
2852 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2820 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2853 timer_through_8259 = 1; 2821 timer_through_8259 = 1;
2854 if (nmi_watchdog == NMI_IO_APIC) {
2855 legacy_pic->mask(0);
2856 setup_nmi();
2857 legacy_pic->unmask(0);
2858 }
2859 goto out; 2822 goto out;
2860 } 2823 }
2861 /* 2824 /*
@@ -2867,15 +2830,6 @@ static inline void __init check_timer(void)
2867 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2830 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2868 } 2831 }
2869 2832
2870 if (nmi_watchdog == NMI_IO_APIC) {
2871 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2872 "through the IO-APIC - disabling NMI Watchdog!\n");
2873 nmi_watchdog = NMI_NONE;
2874 }
2875#ifdef CONFIG_X86_32
2876 timer_ack = 0;
2877#endif
2878
2879 apic_printk(APIC_QUIET, KERN_INFO 2833 apic_printk(APIC_QUIET, KERN_INFO
2880 "...trying to set up timer as Virtual Wire IRQ...\n"); 2834 "...trying to set up timer as Virtual Wire IRQ...\n");
2881 2835
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
894#else 894#else
895 vgetcpu_set_mode(); 895 vgetcpu_set_mode();
896#endif 896#endif
897 init_hw_perf_events();
898} 897}
899 898
900void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 899void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b9145b13..817d2b195e8e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
330{ 330{
331 int i; 331 int i;
332 332
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
338 goto perfctr_fail; 335 goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
355 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 353 release_perfctr_nmi(x86_pmu.perfctr + i);
357 354
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360
361 return false; 355 return false;
362} 356}
363 357
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
369 release_perfctr_nmi(x86_pmu.perfctr + i); 363 release_perfctr_nmi(x86_pmu.perfctr + i);
370 release_evntsel_nmi(x86_pmu.eventsel + i); 364 release_evntsel_nmi(x86_pmu.eventsel + i);
371 } 365 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 366}
376 367
377#else 368#else
@@ -451,7 +442,7 @@ static int x86_setup_perfctr(struct perf_event *event)
451 struct hw_perf_event *hwc = &event->hw; 442 struct hw_perf_event *hwc = &event->hw;
452 u64 config; 443 u64 config;
453 444
454 if (!hwc->sample_period) { 445 if (!is_sampling_event(event)) {
455 hwc->sample_period = x86_pmu.max_period; 446 hwc->sample_period = x86_pmu.max_period;
456 hwc->last_period = hwc->sample_period; 447 hwc->last_period = hwc->sample_period;
457 local64_set(&hwc->period_left, hwc->sample_period); 448 local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1353,7 @@ static void __init pmu_check_apic(void)
1362 pr_info("no hardware sampling interrupt available.\n"); 1353 pr_info("no hardware sampling interrupt available.\n");
1363} 1354}
1364 1355
1365void __init init_hw_perf_events(void) 1356int __init init_hw_perf_events(void)
1366{ 1357{
1367 struct event_constraint *c; 1358 struct event_constraint *c;
1368 int err; 1359 int err;
@@ -1377,11 +1368,11 @@ void __init init_hw_perf_events(void)
1377 err = amd_pmu_init(); 1368 err = amd_pmu_init();
1378 break; 1369 break;
1379 default: 1370 default:
1380 return; 1371 return 0;
1381 } 1372 }
1382 if (err != 0) { 1373 if (err != 0) {
1383 pr_cont("no PMU driver, software events only.\n"); 1374 pr_cont("no PMU driver, software events only.\n");
1384 return; 1375 return 0;
1385 } 1376 }
1386 1377
1387 pmu_check_apic(); 1378 pmu_check_apic();
@@ -1389,7 +1380,7 @@ void __init init_hw_perf_events(void)
1389 /* sanity check that the hardware exists or is emulated */ 1380 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) { 1381 if (!check_hw_exists()) {
1391 pr_cont("Broken PMU hardware detected, software events only.\n"); 1382 pr_cont("Broken PMU hardware detected, software events only.\n");
1392 return; 1383 return 0;
1393 } 1384 }
1394 1385
1395 pr_cont("%s PMU driver.\n", x86_pmu.name); 1386 pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -1440,7 +1431,10 @@ void __init init_hw_perf_events(void)
1440 1431
1441 perf_pmu_register(&pmu); 1432 perf_pmu_register(&pmu);
1442 perf_cpu_notifier(x86_pmu_notifier); 1433 perf_cpu_notifier(x86_pmu_notifier);
1434
1435 return 0;
1443} 1436}
1437early_initcall(init_hw_perf_events);
1444 1438
1445static inline void x86_pmu_read(struct perf_event *event) 1439static inline void x86_pmu_read(struct perf_event *event)
1446{ 1440{
@@ -1686,7 +1680,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1686 1680
1687 perf_callchain_store(entry, regs->ip); 1681 perf_callchain_store(entry, regs->ip);
1688 1682
1689 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1683 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
1690} 1684}
1691 1685
1692#ifdef CONFIG_COMPAT 1686#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..14d45928c282 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -22,26 +22,6 @@
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_event.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr;
27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
28 unsigned int evntsel_msr; /* the MSR to select the events to handle */
29};
30
31/* Interface defining a CPU specific perfctr watchdog */
32struct wd_ops {
33 int (*reserve)(void);
34 void (*unreserve)(void);
35 int (*setup)(unsigned nmi_hz);
36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 void (*stop)(void);
38 unsigned perfctr;
39 unsigned evntsel;
40 u64 checkbit;
41};
42
43static const struct wd_ops *wd_ops;
44
45/* 25/*
46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 26 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. 27 * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 40static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 41static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 42
63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65/* converts an msr to an appropriate reservation bit */ 43/* converts an msr to an appropriate reservation bit */
66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 44static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67{ 45{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
172 clear_bit(counter, evntsel_nmi_owner); 150 clear_bit(counter, evntsel_nmi_owner);
173} 151}
174EXPORT_SYMBOL(release_evntsel_nmi); 152EXPORT_SYMBOL(release_evntsel_nmi);
175
176void disable_lapic_nmi_watchdog(void)
177{
178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180 if (atomic_read(&nmi_active) <= 0)
181 return;
182
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185 if (wd_ops)
186 wd_ops->unreserve();
187
188 BUG_ON(atomic_read(&nmi_active) != 0);
189}
190
191void enable_lapic_nmi_watchdog(void)
192{
193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195 /* are we already enabled */
196 if (atomic_read(&nmi_active) != 0)
197 return;
198
199 /* are we lapic aware */
200 if (!wd_ops)
201 return;
202 if (!wd_ops->reserve()) {
203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204 return;
205 }
206
207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208 touch_nmi_watchdog();
209}
210
211/*
212 * Activate the NMI watchdog via the local APIC.
213 */
214
215static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216{
217 u64 counter_val;
218 unsigned int retval = hz;
219
220 /*
221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222 * are writable, with higher bits sign extending from bit 31.
223 * So, we can only program the counter with 31 bit values and
224 * 32nd bit should be 1, for 33.. to be 1.
225 * Find the appropriate nmi_hz
226 */
227 counter_val = (u64)cpu_khz * 1000;
228 do_div(counter_val, retval);
229 if (counter_val > 0x7fffffffULL) {
230 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, 0x7fffffffUL);
232 retval = count + 1;
233 }
234 return retval;
235}
236
237static void write_watchdog_counter(unsigned int perfctr_msr,
238 const char *descr, unsigned nmi_hz)
239{
240 u64 count = (u64)cpu_khz * 1000;
241
242 do_div(count, nmi_hz);
243 if (descr)
244 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245 wrmsrl(perfctr_msr, 0 - count);
246}
247
248static void write_watchdog_counter32(unsigned int perfctr_msr,
249 const char *descr, unsigned nmi_hz)
250{
251 u64 count = (u64)cpu_khz * 1000;
252
253 do_div(count, nmi_hz);
254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsr(perfctr_msr, (u32)(-count), 0);
257}
258
259/*
260 * AMD K7/K8/Family10h/Family11h support.
261 * AMD keeps this interface nicely stable so there is not much variety
262 */
263#define K7_EVNTSEL_ENABLE (1 << 22)
264#define K7_EVNTSEL_INT (1 << 20)
265#define K7_EVNTSEL_OS (1 << 17)
266#define K7_EVNTSEL_USR (1 << 16)
267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270static int setup_k7_watchdog(unsigned nmi_hz)
271{
272 unsigned int perfctr_msr, evntsel_msr;
273 unsigned int evntsel;
274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276 perfctr_msr = wd_ops->perfctr;
277 evntsel_msr = wd_ops->evntsel;
278
279 wrmsrl(perfctr_msr, 0UL);
280
281 evntsel = K7_EVNTSEL_INT
282 | K7_EVNTSEL_OS
283 | K7_EVNTSEL_USR
284 | K7_NMI_EVENT;
285
286 /* setup the timer */
287 wrmsr(evntsel_msr, evntsel, 0);
288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290 /* initialize the wd struct before enabling */
291 wd->perfctr_msr = perfctr_msr;
292 wd->evntsel_msr = evntsel_msr;
293 wd->cccr_msr = 0; /* unused */
294
295 /* ok, everything is initialized, announce that we're set */
296 cpu_nmi_set_wd_enabled();
297
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 return 1;
303}
304
305static void single_msr_stop_watchdog(void)
306{
307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309 wrmsr(wd->evntsel_msr, 0, 0);
310}
311
312static int single_msr_reserve(void)
313{
314 if (!reserve_perfctr_nmi(wd_ops->perfctr))
315 return 0;
316
317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318 release_perfctr_nmi(wd_ops->perfctr);
319 return 0;
320 }
321 return 1;
322}
323
324static void single_msr_unreserve(void)
325{
326 release_evntsel_nmi(wd_ops->evntsel);
327 release_perfctr_nmi(wd_ops->perfctr);
328}
329
330static void __kprobes
331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332{
333 /* start the cycle over again */
334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335}
336
337static const struct wd_ops k7_wd_ops = {
338 .reserve = single_msr_reserve,
339 .unreserve = single_msr_unreserve,
340 .setup = setup_k7_watchdog,
341 .rearm = single_msr_rearm,
342 .stop = single_msr_stop_watchdog,
343 .perfctr = MSR_K7_PERFCTR0,
344 .evntsel = MSR_K7_EVNTSEL0,
345 .checkbit = 1ULL << 47,
346};
347
348/*
349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350 */
351#define P6_EVNTSEL0_ENABLE (1 << 22)
352#define P6_EVNTSEL_INT (1 << 20)
353#define P6_EVNTSEL_OS (1 << 17)
354#define P6_EVNTSEL_USR (1 << 16)
355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358static int setup_p6_watchdog(unsigned nmi_hz)
359{
360 unsigned int perfctr_msr, evntsel_msr;
361 unsigned int evntsel;
362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364 perfctr_msr = wd_ops->perfctr;
365 evntsel_msr = wd_ops->evntsel;
366
367 /* KVM doesn't implement this MSR */
368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369 return 0;
370
371 evntsel = P6_EVNTSEL_INT
372 | P6_EVNTSEL_OS
373 | P6_EVNTSEL_USR
374 | P6_NMI_EVENT;
375
376 /* setup the timer */
377 wrmsr(evntsel_msr, evntsel, 0);
378 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381 /* initialize the wd struct before enabling */
382 wd->perfctr_msr = perfctr_msr;
383 wd->evntsel_msr = evntsel_msr;
384 wd->cccr_msr = 0; /* unused */
385
386 /* ok, everything is initialized, announce that we're set */
387 cpu_nmi_set_wd_enabled();
388
389 apic_write(APIC_LVTPC, APIC_DM_NMI);
390 evntsel |= P6_EVNTSEL0_ENABLE;
391 wrmsr(evntsel_msr, evntsel, 0);
392
393 return 1;
394}
395
396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397{
398 /*
399 * P6 based Pentium M need to re-unmask
400 * the apic vector but it doesn't hurt
401 * other P6 variant.
402 * ArchPerfom/Core Duo also needs this
403 */
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406 /* P6/ARCH_PERFMON has 32 bit counter write */
407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408}
409
410static const struct wd_ops p6_wd_ops = {
411 .reserve = single_msr_reserve,
412 .unreserve = single_msr_unreserve,
413 .setup = setup_p6_watchdog,
414 .rearm = p6_rearm,
415 .stop = single_msr_stop_watchdog,
416 .perfctr = MSR_P6_PERFCTR0,
417 .evntsel = MSR_P6_EVNTSEL0,
418 .checkbit = 1ULL << 39,
419};
420
421/*
422 * Intel P4 performance counters.
423 * By far the most complicated of all.
424 */
425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427#define P4_ESCR_OS (1 << 3)
428#define P4_ESCR_USR (1 << 2)
429#define P4_CCCR_OVF_PMI0 (1 << 26)
430#define P4_CCCR_OVF_PMI1 (1 << 27)
431#define P4_CCCR_THRESHOLD(N) ((N) << 20)
432#define P4_CCCR_COMPLEMENT (1 << 19)
433#define P4_CCCR_COMPARE (1 << 18)
434#define P4_CCCR_REQUIRED (3 << 16)
435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
436#define P4_CCCR_ENABLE (1 << 12)
437#define P4_CCCR_OVF (1 << 31)
438
439#define P4_CONTROLS 18
440static unsigned int p4_controls[18] = {
441 MSR_P4_BPU_CCCR0,
442 MSR_P4_BPU_CCCR1,
443 MSR_P4_BPU_CCCR2,
444 MSR_P4_BPU_CCCR3,
445 MSR_P4_MS_CCCR0,
446 MSR_P4_MS_CCCR1,
447 MSR_P4_MS_CCCR2,
448 MSR_P4_MS_CCCR3,
449 MSR_P4_FLAME_CCCR0,
450 MSR_P4_FLAME_CCCR1,
451 MSR_P4_FLAME_CCCR2,
452 MSR_P4_FLAME_CCCR3,
453 MSR_P4_IQ_CCCR0,
454 MSR_P4_IQ_CCCR1,
455 MSR_P4_IQ_CCCR2,
456 MSR_P4_IQ_CCCR3,
457 MSR_P4_IQ_CCCR4,
458 MSR_P4_IQ_CCCR5,
459};
460/*
461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462 * CRU_ESCR0 (with any non-null event selector) through a complemented
463 * max threshold. [IA32-Vol3, Section 14.9.9]
464 */
465static int setup_p4_watchdog(unsigned nmi_hz)
466{
467 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468 unsigned int evntsel, cccr_val;
469 unsigned int misc_enable, dummy;
470 unsigned int ht_num;
471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475 return 0;
476
477#ifdef CONFIG_SMP
478 /* detect which hyperthread we are on */
479 if (smp_num_siblings == 2) {
480 unsigned int ebx, apicid;
481
482 ebx = cpuid_ebx(1);
483 apicid = (ebx >> 24) & 0xff;
484 ht_num = apicid & 1;
485 } else
486#endif
487 ht_num = 0;
488
489 /*
490 * performance counters are shared resources
491 * assign each hyperthread its own set
492 * (re-use the ESCR0 register, seems safe
493 * and keeps the cccr_val the same)
494 */
495 if (!ht_num) {
496 /* logical cpu 0 */
497 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498 evntsel_msr = MSR_P4_CRU_ESCR0;
499 cccr_msr = MSR_P4_IQ_CCCR0;
500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502 /*
503 * If we're on the kdump kernel or other situation, we may
504 * still have other performance counter registers set to
505 * interrupt and they'll keep interrupting forever because
506 * of the P4_CCCR_OVF quirk. So we need to ACK all the
507 * pending interrupts and disable all the registers here,
508 * before reenabling the NMI delivery. Refer to p4_rearm()
509 * about the P4_CCCR_OVF quirk.
510 */
511 if (reset_devices) {
512 unsigned int low, high;
513 int i;
514
515 for (i = 0; i < P4_CONTROLS; i++) {
516 rdmsr(p4_controls[i], low, high);
517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518 wrmsr(p4_controls[i], low, high);
519 }
520 }
521 } else {
522 /* logical cpu 1 */
523 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524 evntsel_msr = MSR_P4_CRU_ESCR0;
525 cccr_msr = MSR_P4_IQ_CCCR1;
526
527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529 cccr_val = P4_CCCR_OVF_PMI0;
530 else
531 cccr_val = P4_CCCR_OVF_PMI1;
532 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533 }
534
535 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536 | P4_ESCR_OS
537 | P4_ESCR_USR;
538
539 cccr_val |= P4_CCCR_THRESHOLD(15)
540 | P4_CCCR_COMPLEMENT
541 | P4_CCCR_COMPARE
542 | P4_CCCR_REQUIRED;
543
544 wrmsr(evntsel_msr, evntsel, 0);
545 wrmsr(cccr_msr, cccr_val, 0);
546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548 wd->perfctr_msr = perfctr_msr;
549 wd->evntsel_msr = evntsel_msr;
550 wd->cccr_msr = cccr_msr;
551
552 /* ok, everything is initialized, announce that we're set */
553 cpu_nmi_set_wd_enabled();
554
555 apic_write(APIC_LVTPC, APIC_DM_NMI);
556 cccr_val |= P4_CCCR_ENABLE;
557 wrmsr(cccr_msr, cccr_val, 0);
558 return 1;
559}
560
561static void stop_p4_watchdog(void)
562{
563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564 wrmsr(wd->cccr_msr, 0, 0);
565 wrmsr(wd->evntsel_msr, 0, 0);
566}
567
568static int p4_reserve(void)
569{
570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571 return 0;
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574 goto fail1;
575#endif
576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577 goto fail2;
578 /* RED-PEN why is ESCR1 not reserved here? */
579 return 1;
580 fail2:
581#ifdef CONFIG_SMP
582 if (smp_num_siblings > 1)
583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584 fail1:
585#endif
586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587 return 0;
588}
589
590static void p4_unreserve(void)
591{
592#ifdef CONFIG_SMP
593 if (smp_num_siblings > 1)
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595#endif
596 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598}
599
600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601{
602 unsigned dummy;
603 /*
604 * P4 quirks:
605 * - An overflown perfctr will assert its interrupt
606 * until the OVF flag in its CCCR is cleared.
607 * - LVTPC is masked on interrupt and must be
608 * unmasked by the LVTPC handler.
609 */
610 rdmsrl(wd->cccr_msr, dummy);
611 dummy &= ~P4_CCCR_OVF;
612 wrmsrl(wd->cccr_msr, dummy);
613 apic_write(APIC_LVTPC, APIC_DM_NMI);
614 /* start the cycle over again */
615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616}
617
618static const struct wd_ops p4_wd_ops = {
619 .reserve = p4_reserve,
620 .unreserve = p4_unreserve,
621 .setup = setup_p4_watchdog,
622 .rearm = p4_rearm,
623 .stop = stop_p4_watchdog,
624 /* RED-PEN this is wrong for the other sibling */
625 .perfctr = MSR_P4_BPU_PERFCTR0,
626 .evntsel = MSR_P4_BSU_ESCR0,
627 .checkbit = 1ULL << 39,
628};
629
630/*
631 * Watchdog using the Intel architected PerfMon.
632 * Used for Core2 and hopefully all future Intel CPUs.
633 */
634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637static struct wd_ops intel_arch_wd_ops;
638
639static int setup_intel_arch_watchdog(unsigned nmi_hz)
640{
641 unsigned int ebx;
642 union cpuid10_eax eax;
643 unsigned int unused;
644 unsigned int perfctr_msr, evntsel_msr;
645 unsigned int evntsel;
646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648 /*
649 * Check whether the Architectural PerfMon supports
650 * Unhalted Core Cycles Event or not.
651 * NOTE: Corresponding bit = 0 in ebx indicates event present.
652 */
653 cpuid(10, &(eax.full), &ebx, &unused, &unused);
654 if ((eax.split.mask_length <
655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657 return 0;
658
659 perfctr_msr = wd_ops->perfctr;
660 evntsel_msr = wd_ops->evntsel;
661
662 wrmsrl(perfctr_msr, 0UL);
663
664 evntsel = ARCH_PERFMON_EVENTSEL_INT
665 | ARCH_PERFMON_EVENTSEL_OS
666 | ARCH_PERFMON_EVENTSEL_USR
667 | ARCH_PERFMON_NMI_EVENT_SEL
668 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670 /* setup the timer */
671 wrmsr(evntsel_msr, evntsel, 0);
672 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675 wd->perfctr_msr = perfctr_msr;
676 wd->evntsel_msr = evntsel_msr;
677 wd->cccr_msr = 0; /* unused */
678
679 /* ok, everything is initialized, announce that we're set */
680 cpu_nmi_set_wd_enabled();
681
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1;
687}
688
689static struct wd_ops intel_arch_wd_ops __read_mostly = {
690 .reserve = single_msr_reserve,
691 .unreserve = single_msr_unreserve,
692 .setup = setup_intel_arch_watchdog,
693 .rearm = p6_rearm,
694 .stop = single_msr_stop_watchdog,
695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
697};
698
699static void probe_nmi_watchdog(void)
700{
701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 == 6 ||
704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 wd_ops = &k7_wd_ops;
706 return;
707 case X86_VENDOR_INTEL:
708 /* Work around where perfctr1 doesn't have a working enable
709 * bit as described in the following errata:
710 * AE49 Core Duo and Intel Core Solo 65 nm
711 * AN49 Intel Pentium Dual-Core
712 * AF49 Dual-Core Intel Xeon Processor LV
713 */
714 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716 boot_cpu_data.x86_mask == 4))) {
717 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719 }
720 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721 wd_ops = &intel_arch_wd_ops;
722 break;
723 }
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 13)
727 return;
728
729 wd_ops = &p6_wd_ops;
730 break;
731 case 15:
732 wd_ops = &p4_wd_ops;
733 break;
734 default:
735 return;
736 }
737 break;
738 }
739}
740
741/* Interface to nmi.c */
742
743int lapic_watchdog_init(unsigned nmi_hz)
744{
745 if (!wd_ops) {
746 probe_nmi_watchdog();
747 if (!wd_ops) {
748 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749 return -1;
750 }
751
752 if (!wd_ops->reserve()) {
753 printk(KERN_ERR
754 "NMI watchdog: cannot reserve perfctrs\n");
755 return -1;
756 }
757 }
758
759 if (!(wd_ops->setup(nmi_hz))) {
760 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761 raw_smp_processor_id());
762 return -1;
763 }
764
765 return 0;
766}
767
768void lapic_watchdog_stop(void)
769{
770 if (wd_ops)
771 wd_ops->stop();
772}
773
774unsigned lapic_adjust_nmi_hz(unsigned hz)
775{
776 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779 hz = adjust_for_32bit_ctr(hz);
780 return hz;
781}
782
783int __kprobes lapic_wd_event(unsigned nmi_hz)
784{
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 ctr;
787
788 rdmsrl(wd->perfctr_msr, ctr);
789 if (ctr & wd_ops->checkbit) /* perfctr still running? */
790 return 0;
791
792 wd_ops->rearm(wd, nmi_hz);
793 return 1;
794}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..8474c998cbd4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
175 175
176void 176void
177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
178 unsigned long *stack, unsigned long bp, char *log_lvl) 178 unsigned long *stack, char *log_lvl)
179{ 179{
180 printk("%sCall Trace:\n", log_lvl); 180 printk("%sCall Trace:\n", log_lvl);
181 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 181 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
182} 182}
183 183
184void show_trace(struct task_struct *task, struct pt_regs *regs, 184void show_trace(struct task_struct *task, struct pt_regs *regs,
185 unsigned long *stack, unsigned long bp) 185 unsigned long *stack)
186{ 186{
187 show_trace_log_lvl(task, regs, stack, bp, ""); 187 show_trace_log_lvl(task, regs, stack, "");
188} 188}
189 189
190void show_stack(struct task_struct *task, unsigned long *sp) 190void show_stack(struct task_struct *task, unsigned long *sp)
191{ 191{
192 show_stack_log_lvl(task, NULL, sp, 0, ""); 192 show_stack_log_lvl(task, NULL, sp, "");
193} 193}
194 194
195/* 195/*
@@ -210,7 +210,7 @@ void dump_stack(void)
210 init_utsname()->release, 210 init_utsname()->release,
211 (int)strcspn(init_utsname()->version, " "), 211 (int)strcspn(init_utsname()->version, " "),
212 init_utsname()->version); 212 init_utsname()->version);
213 show_trace(NULL, NULL, &stack, bp); 213 show_trace(NULL, NULL, &stack);
214} 214}
215EXPORT_SYMBOL(dump_stack); 215EXPORT_SYMBOL(dump_stack);
216 216
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19 19
20void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task,
21 unsigned long *stack, unsigned long bp, 21 struct pt_regs *regs, unsigned long *stack,
22 const struct stacktrace_ops *ops, void *data) 22 const struct stacktrace_ops *ops, void *data)
23{ 23{
24 int graph = 0; 24 int graph = 0;
25 unsigned long bp;
25 26
26 if (!task) 27 if (!task)
27 task = current; 28 task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
34 stack = (unsigned long *)task->thread.sp; 35 stack = (unsigned long *)task->thread.sp;
35 } 36 }
36 37
37#ifdef CONFIG_FRAME_POINTER 38 bp = stack_frame(task, regs);
38 if (!bp) {
39 if (task == current) {
40 /* Grab bp right from our regs */
41 get_bp(bp);
42 } else {
43 /* bp is the last reg pushed by switch_to */
44 bp = *(unsigned long *) task->thread.sp;
45 }
46 }
47#endif
48
49 for (;;) { 39 for (;;) {
50 struct thread_info *context; 40 struct thread_info *context;
51 41
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
65 55
66void 56void
67show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 57show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
68 unsigned long *sp, unsigned long bp, char *log_lvl) 58 unsigned long *sp, char *log_lvl)
69{ 59{
70 unsigned long *stack; 60 unsigned long *stack;
71 int i; 61 int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
87 touch_nmi_watchdog(); 77 touch_nmi_watchdog();
88 } 78 }
89 printk(KERN_CONT "\n"); 79 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 80 show_trace_log_lvl(task, regs, sp, log_lvl);
91} 81}
92 82
93 83
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
112 u8 *ip; 102 u8 *ip;
113 103
114 printk(KERN_EMERG "Stack:\n"); 104 printk(KERN_EMERG "Stack:\n");
115 show_stack_log_lvl(NULL, regs, &regs->sp, 105 show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
116 0, KERN_EMERG);
117 106
118 printk(KERN_EMERG "Code: "); 107 printk(KERN_EMERG "Code: ");
119 108
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
140 */ 140 */
141 141
142void dump_trace(struct task_struct *task, struct pt_regs *regs, 142void dump_trace(struct task_struct *task,
143 unsigned long *stack, unsigned long bp, 143 struct pt_regs *regs, unsigned long *stack,
144 const struct stacktrace_ops *ops, void *data) 144 const struct stacktrace_ops *ops, void *data)
145{ 145{
146 const unsigned cpu = get_cpu(); 146 const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 unsigned used = 0; 149 unsigned used = 0;
150 struct thread_info *tinfo; 150 struct thread_info *tinfo;
151 int graph = 0; 151 int graph = 0;
152 unsigned long bp;
152 153
153 if (!task) 154 if (!task)
154 task = current; 155 task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *)task->thread.sp; 161 stack = (unsigned long *)task->thread.sp;
161 } 162 }
162 163
163#ifdef CONFIG_FRAME_POINTER 164 bp = stack_frame(task, regs);
164 if (!bp) {
165 if (task == current) {
166 /* Grab bp right from our regs */
167 get_bp(bp);
168 } else {
169 /* bp is the last reg pushed by switch_to */
170 bp = *(unsigned long *) task->thread.sp;
171 }
172 }
173#endif
174
175 /* 165 /*
176 * Print function call entries in all stacks, starting at the 166 * Print function call entries in all stacks, starting at the
177 * current stack address. If the stacks consist of nested 167 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
235 225
236void 226void
237show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 227show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
238 unsigned long *sp, unsigned long bp, char *log_lvl) 228 unsigned long *sp, char *log_lvl)
239{ 229{
240 unsigned long *irq_stack_end; 230 unsigned long *irq_stack_end;
241 unsigned long *irq_stack; 231 unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
279 preempt_enable(); 269 preempt_enable();
280 270
281 printk(KERN_CONT "\n"); 271 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 272 show_trace_log_lvl(task, regs, sp, log_lvl);
283} 273}
284 274
285void show_registers(struct pt_regs *regs) 275void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
308 298
309 printk(KERN_EMERG "Stack:\n"); 299 printk(KERN_EMERG "Stack:\n");
310 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 300 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
311 regs->bp, KERN_EMERG); 301 KERN_EMERG);
312 302
313 printk(KERN_EMERG "Code: "); 303 printk(KERN_EMERG "Code: ");
314 304
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..5940282bd2f9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 1186
1187 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp))
1189 return;
1190
1187 preempt_disable(); 1191 preempt_disable();
1188 if (kprobe_running()) { 1192 if (kprobe_running()) {
1189 kprobes_inc_nmissed_count(&op->kp); 1193 kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1401 return 0; 1405 return 0;
1402} 1406}
1403 1407
1404/* Replace a breakpoint (int3) with a relative jump. */ 1408#define MAX_OPTIMIZE_PROBES 256
1405int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) 1409static struct text_poke_param *jump_poke_params;
1410static struct jump_poke_buffer {
1411 u8 buf[RELATIVEJUMP_SIZE];
1412} *jump_poke_bufs;
1413
1414static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1415 u8 *insn_buf,
1416 struct optimized_kprobe *op)
1406{ 1417{
1407 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1408 s32 rel = (s32)((long)op->optinsn.insn - 1418 s32 rel = (s32)((long)op->optinsn.insn -
1409 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 1419 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1410 1420
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1412 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 1422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1413 RELATIVE_ADDR_SIZE); 1423 RELATIVE_ADDR_SIZE);
1414 1424
1415 jmp_code[0] = RELATIVEJUMP_OPCODE; 1425 insn_buf[0] = RELATIVEJUMP_OPCODE;
1416 *(s32 *)(&jmp_code[1]) = rel; 1426 *(s32 *)(&insn_buf[1]) = rel;
1427
1428 tprm->addr = op->kp.addr;
1429 tprm->opcode = insn_buf;
1430 tprm->len = RELATIVEJUMP_SIZE;
1431}
1432
1433/*
1434 * Replace breakpoints (int3) with relative jumps.
1435 * Caller must call with locking kprobe_mutex and text_mutex.
1436 */
1437void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1438{
1439 struct optimized_kprobe *op, *tmp;
1440 int c = 0;
1441
1442 list_for_each_entry_safe(op, tmp, oplist, list) {
1443 WARN_ON(kprobe_disabled(&op->kp));
1444 /* Setup param */
1445 setup_optimize_kprobe(&jump_poke_params[c],
1446 jump_poke_bufs[c].buf, op);
1447 list_del_init(&op->list);
1448 if (++c >= MAX_OPTIMIZE_PROBES)
1449 break;
1450 }
1417 1451
1418 /* 1452 /*
1419 * text_poke_smp doesn't support NMI/MCE code modifying. 1453 * text_poke_smp doesn't support NMI/MCE code modifying.
1420 * However, since kprobes itself also doesn't support NMI/MCE 1454 * However, since kprobes itself also doesn't support NMI/MCE
1421 * code probing, it's not a problem. 1455 * code probing, it's not a problem.
1422 */ 1456 */
1423 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); 1457 text_poke_smp_batch(jump_poke_params, c);
1424 return 0; 1458}
1459
1460static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1461 u8 *insn_buf,
1462 struct optimized_kprobe *op)
1463{
1464 /* Set int3 to first byte for kprobes */
1465 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1466 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1467
1468 tprm->addr = op->kp.addr;
1469 tprm->opcode = insn_buf;
1470 tprm->len = RELATIVEJUMP_SIZE;
1471}
1472
1473/*
1474 * Recover original instructions and breakpoints from relative jumps.
1475 * Caller must call with locking kprobe_mutex.
1476 */
1477extern void arch_unoptimize_kprobes(struct list_head *oplist,
1478 struct list_head *done_list)
1479{
1480 struct optimized_kprobe *op, *tmp;
1481 int c = 0;
1482
1483 list_for_each_entry_safe(op, tmp, oplist, list) {
1484 /* Setup param */
1485 setup_unoptimize_kprobe(&jump_poke_params[c],
1486 jump_poke_bufs[c].buf, op);
1487 list_move(&op->list, done_list);
1488 if (++c >= MAX_OPTIMIZE_PROBES)
1489 break;
1490 }
1491
1492 /*
1493 * text_poke_smp doesn't support NMI/MCE code modifying.
1494 * However, since kprobes itself also doesn't support NMI/MCE
1495 * code probing, it's not a problem.
1496 */
1497 text_poke_smp_batch(jump_poke_params, c);
1425} 1498}
1426 1499
1427/* Replace a relative jump with a breakpoint (int3). */ 1500/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
1453 } 1526 }
1454 return 0; 1527 return 0;
1455} 1528}
1529
1530static int __kprobes init_poke_params(void)
1531{
1532 /* Allocate code buffer and parameter array */
1533 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1534 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1535 if (!jump_poke_bufs)
1536 return -ENOMEM;
1537
1538 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1539 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1540 if (!jump_poke_params) {
1541 kfree(jump_poke_bufs);
1542 jump_poke_bufs = NULL;
1543 return -ENOMEM;
1544 }
1545
1546 return 0;
1547}
1548#else /* !CONFIG_OPTPROBES */
1549static int __kprobes init_poke_params(void)
1550{
1551 return 0;
1552}
1456#endif 1553#endif
1457 1554
1458int __init arch_init_kprobes(void) 1555int __init arch_init_kprobes(void)
1459{ 1556{
1460 return 0; 1557 return init_poke_params();
1461} 1558}
1462 1559
1463int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1560int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868a86aa..96ed1aac543a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -91,8 +91,7 @@ void exit_thread(void)
91void show_regs(struct pt_regs *regs) 91void show_regs(struct pt_regs *regs)
92{ 92{
93 show_registers(regs); 93 show_registers(regs);
94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
95 regs->bp);
96} 95}
97 96
98void show_regs_common(void) 97void show_regs_common(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d1b7df..68f61ac632e1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 /*
285 * This must be done before setting cpu_online_mask
286 * or calling notify_cpu_starting.
287 */
288 set_cpu_sibling_map(raw_smp_processor_id());
289 wmb();
290
284 notify_cpu_starting(cpuid); 291 notify_cpu_starting(cpuid);
285 292
286 /* 293 /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
316 */ 323 */
317 check_tsc_sync_target(); 324 check_tsc_sync_target();
318 325
319 if (nmi_watchdog == NMI_IO_APIC) {
320 legacy_pic->mask(0);
321 enable_NMI_through_LVT0();
322 legacy_pic->unmask(0);
323 }
324
325 /* This must be done before setting cpu_online_mask */
326 set_cpu_sibling_map(raw_smp_processor_id());
327 wmb();
328
329 /* 326 /*
330 * We need to hold call_lock, so there is no inconsistency 327 * We need to hold call_lock, so there is no inconsistency
331 * between the time smp_call_function() determines number of 328 * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
1061 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1062 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1063 1060
1064 localise_nmi_watchdog();
1065
1066 connect_bsp_APIC(); 1061 connect_bsp_APIC();
1067 setup_local_APIC(); 1062 setup_local_APIC();
1068 end_local_APIC_setup(); 1063 end_local_APIC_setup();
@@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1196#ifdef CONFIG_X86_IO_APIC 1191#ifdef CONFIG_X86_IO_APIC
1197 setup_ioapic_dest(); 1192 setup_ioapic_dest();
1198#endif 1193#endif
1199 check_nmi_watchdog();
1200 mtrr_aps_init(); 1194 mtrr_aps_init();
1201} 1195}
1202 1196
@@ -1341,8 +1335,6 @@ int native_cpu_disable(void)
1341 if (cpu == 0) 1335 if (cpu == 0)
1342 return -EBUSY; 1336 return -EBUSY;
1343 1337
1344 if (nmi_watchdog == NMI_LOCAL_APIC)
1345 stop_apic_nmi_watchdog(NULL);
1346 clear_local_APIC(); 1338 clear_local_APIC();
1347 1339
1348 cpu_disable_common(); 1340 cpu_disable_common();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
73 */ 73 */
74void save_stack_trace(struct stack_trace *trace) 74void save_stack_trace(struct stack_trace *trace)
75{ 75{
76 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 76 dump_trace(current, NULL, NULL, &save_stack_ops, trace);
77 if (trace->nr_entries < trace->max_entries) 77 if (trace->nr_entries < trace->max_entries)
78 trace->entries[trace->nr_entries++] = ULONG_MAX; 78 trace->entries[trace->nr_entries++] = ULONG_MAX;
79} 79}
80EXPORT_SYMBOL_GPL(save_stack_trace); 80EXPORT_SYMBOL_GPL(save_stack_trace);
81 81
82void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) 82void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
83{ 83{
84 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); 84 dump_trace(current, regs, NULL, &save_stack_ops, trace);
85 if (trace->nr_entries < trace->max_entries) 85 if (trace->nr_entries < trace->max_entries)
86 trace->entries[trace->nr_entries++] = ULONG_MAX; 86 trace->entries[trace->nr_entries++] = ULONG_MAX;
87} 87}
88 88
89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
90{ 90{
91 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 91 dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
92 if (trace->nr_entries < trace->max_entries) 92 if (trace->nr_entries < trace->max_entries)
93 trace->entries[trace->nr_entries++] = ULONG_MAX; 93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94} 94}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
26int timer_ack;
27#endif
28
29#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
30volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
31#endif 27#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
63 /* Keep nmi watchdog up to date */ 59 /* Keep nmi watchdog up to date */
64 inc_irq_stat(irq0_irqs); 60 inc_irq_stat(irq0_irqs);
65 61
66 /* Optimized out for !IO_APIC and x86_64 */
67 if (timer_ack) {
68 /*
69 * Subtle, when I/O APICs are used we have to ack timer IRQ
70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system.
72 */
73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL);
77 raw_spin_unlock(&i8259A_lock);
78 }
79
80 global_clock_event->event_handler(global_clock_event); 62 global_clock_event->event_handler(global_clock_event);
81 63
82 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 64 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..f02c179c2552 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -398,15 +398,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
398 == NOTIFY_STOP) 398 == NOTIFY_STOP)
399 return; 399 return;
400 400
401#ifndef CONFIG_LOCKUP_DETECTOR
402 /*
403 * Ok, so this is none of the documented NMI sources,
404 * so it must be the NMI watchdog.
405 */
406 if (nmi_watchdog_tick(regs, reason))
407 return;
408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
410 unknown_nmi_error(reason, regs); 401 unknown_nmi_error(reason, regs);
411#else 402#else
412 unknown_nmi_error(reason, regs); 403 unknown_nmi_error(reason, regs);
@@ -446,14 +437,12 @@ do_nmi(struct pt_regs *regs, long error_code)
446 437
447void stop_nmi(void) 438void stop_nmi(void)
448{ 439{
449 acpi_nmi_disable();
450 ignore_nmis++; 440 ignore_nmis++;
451} 441}
452 442
453void restart_nmi(void) 443void restart_nmi(void)
454{ 444{
455 ignore_nmis--; 445 ignore_nmis--;
456 acpi_nmi_enable();
457} 446}
458 447
459/* May run on IST stack. */ 448/* May run on IST stack. */