diff options
Diffstat (limited to 'arch/x86')
36 files changed, 415 insertions, 1541 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 76561d20ea2f..4a2adaa9aefc 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); | |||
180 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an | 180 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an |
181 | * inconsistent instruction while you patch. | 181 | * inconsistent instruction while you patch. |
182 | */ | 182 | */ |
183 | struct text_poke_param { | ||
184 | void *addr; | ||
185 | const void *opcode; | ||
186 | size_t len; | ||
187 | }; | ||
188 | |||
183 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 189 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
184 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | 190 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); |
191 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); | ||
185 | 192 | ||
186 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | 193 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) |
187 | #define IDEAL_NOP_SIZE_5 5 | 194 | #define IDEAL_NOP_SIZE_5 5 |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 13b0ebaa512f..ba870bb6dd8e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq) | |||
15 | return ((irq == 2) ? 9 : irq); | 15 | return ((irq == 2) ? 9 : irq); |
16 | } | 16 | } |
17 | 17 | ||
18 | #ifdef CONFIG_X86_LOCAL_APIC | ||
19 | # define ARCH_HAS_NMI_WATCHDOG | ||
20 | #endif | ||
21 | |||
22 | #ifdef CONFIG_X86_32 | 18 | #ifdef CONFIG_X86_32 |
23 | extern void irq_ctx_init(int cpu); | 19 | extern void irq_ctx_init(int cpu); |
24 | #else | 20 | #else |
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca86581b..f23eb2528464 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long); | |||
28 | extern int __must_check __die(const char *, struct pt_regs *, long); | 28 | extern int __must_check __die(const char *, struct pt_regs *, long); |
29 | extern void show_registers(struct pt_regs *regs); | 29 | extern void show_registers(struct pt_regs *regs); |
30 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, | 30 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, |
31 | unsigned long *sp, unsigned long bp); | 31 | unsigned long *sp); |
32 | extern void __show_regs(struct pt_regs *regs, int all); | 32 | extern void __show_regs(struct pt_regs *regs, int all); |
33 | extern void show_regs(struct pt_regs *regs); | 33 | extern void show_regs(struct pt_regs *regs); |
34 | extern unsigned long oops_begin(void); | 34 | extern unsigned long oops_begin(void); |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 6b89f5e86021..86030f63ba02 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -123,6 +123,10 @@ | |||
123 | #define MSR_AMD64_IBSCTL 0xc001103a | 123 | #define MSR_AMD64_IBSCTL 0xc001103a |
124 | #define MSR_AMD64_IBSBRTARGET 0xc001103b | 124 | #define MSR_AMD64_IBSBRTARGET 0xc001103b |
125 | 125 | ||
126 | /* Fam 15h MSRs */ | ||
127 | #define MSR_F15H_PERF_CTL 0xc0010200 | ||
128 | #define MSR_F15H_PERF_CTR 0xc0010201 | ||
129 | |||
126 | /* Fam 10h MSRs */ | 130 | /* Fam 10h MSRs */ |
127 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 | 131 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 |
128 | #define FAM10H_MMIO_CONF_ENABLE (1<<0) | 132 | #define FAM10H_MMIO_CONF_ENABLE (1<<0) |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f86b4b7..c4021b953510 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -5,41 +5,15 @@ | |||
5 | #include <asm/irq.h> | 5 | #include <asm/irq.h> |
6 | #include <asm/io.h> | 6 | #include <asm/io.h> |
7 | 7 | ||
8 | #ifdef ARCH_HAS_NMI_WATCHDOG | 8 | #ifdef CONFIG_X86_LOCAL_APIC |
9 | |||
10 | /** | ||
11 | * do_nmi_callback | ||
12 | * | ||
13 | * Check to see if a callback exists and execute it. Return 1 | ||
14 | * if the handler exists and was handled successfully. | ||
15 | */ | ||
16 | int do_nmi_callback(struct pt_regs *regs, int cpu); | ||
17 | 9 | ||
18 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); | 10 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); |
19 | extern int check_nmi_watchdog(void); | ||
20 | #if !defined(CONFIG_LOCKUP_DETECTOR) | ||
21 | extern int nmi_watchdog_enabled; | ||
22 | #endif | ||
23 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); | 11 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); |
24 | extern int reserve_perfctr_nmi(unsigned int); | 12 | extern int reserve_perfctr_nmi(unsigned int); |
25 | extern void release_perfctr_nmi(unsigned int); | 13 | extern void release_perfctr_nmi(unsigned int); |
26 | extern int reserve_evntsel_nmi(unsigned int); | 14 | extern int reserve_evntsel_nmi(unsigned int); |
27 | extern void release_evntsel_nmi(unsigned int); | 15 | extern void release_evntsel_nmi(unsigned int); |
28 | 16 | ||
29 | extern void setup_apic_nmi_watchdog(void *); | ||
30 | extern void stop_apic_nmi_watchdog(void *); | ||
31 | extern void disable_timer_nmi_watchdog(void); | ||
32 | extern void enable_timer_nmi_watchdog(void); | ||
33 | extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); | ||
34 | extern void cpu_nmi_set_wd_enabled(void); | ||
35 | |||
36 | extern atomic_t nmi_active; | ||
37 | extern unsigned int nmi_watchdog; | ||
38 | #define NMI_NONE 0 | ||
39 | #define NMI_IO_APIC 1 | ||
40 | #define NMI_LOCAL_APIC 2 | ||
41 | #define NMI_INVALID 3 | ||
42 | |||
43 | struct ctl_table; | 17 | struct ctl_table; |
44 | extern int proc_nmi_enabled(struct ctl_table *, int , | 18 | extern int proc_nmi_enabled(struct ctl_table *, int , |
45 | void __user *, size_t *, loff_t *); | 19 | void __user *, size_t *, loff_t *); |
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic; | |||
47 | 21 | ||
48 | void arch_trigger_all_cpu_backtrace(void); | 22 | void arch_trigger_all_cpu_backtrace(void); |
49 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | 23 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
50 | |||
51 | static inline void localise_nmi_watchdog(void) | ||
52 | { | ||
53 | if (nmi_watchdog == NMI_IO_APIC) | ||
54 | nmi_watchdog = NMI_LOCAL_APIC; | ||
55 | } | ||
56 | |||
57 | /* check if nmi_watchdog is active (ie was specified at boot) */ | ||
58 | static inline int nmi_watchdog_active(void) | ||
59 | { | ||
60 | /* | ||
61 | * actually it should be: | ||
62 | * return (nmi_watchdog == NMI_LOCAL_APIC || | ||
63 | * nmi_watchdog == NMI_IO_APIC) | ||
64 | * but since they are power of two we could use a | ||
65 | * cheaper way --cvg | ||
66 | */ | ||
67 | return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); | ||
68 | } | ||
69 | #endif | 24 | #endif |
70 | 25 | ||
71 | void lapic_watchdog_stop(void); | ||
72 | int lapic_watchdog_init(unsigned nmi_hz); | ||
73 | int lapic_wd_event(unsigned nmi_hz); | ||
74 | unsigned lapic_adjust_nmi_hz(unsigned hz); | ||
75 | void disable_lapic_nmi_watchdog(void); | ||
76 | void enable_lapic_nmi_watchdog(void); | ||
77 | void stop_nmi(void); | 26 | void stop_nmi(void); |
78 | void restart_nmi(void); | 27 | void restart_nmi(void); |
79 | 28 | ||
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 550e26b1dbb3..d9d4dae305f6 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -125,7 +125,6 @@ union cpuid10_edx { | |||
125 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ | 125 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ |
126 | 126 | ||
127 | #ifdef CONFIG_PERF_EVENTS | 127 | #ifdef CONFIG_PERF_EVENTS |
128 | extern void init_hw_perf_events(void); | ||
129 | extern void perf_events_lapic_init(void); | 128 | extern void perf_events_lapic_init(void); |
130 | 129 | ||
131 | #define PERF_EVENT_INDEX_OFFSET 0 | 130 | #define PERF_EVENT_INDEX_OFFSET 0 |
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); | |||
156 | } | 155 | } |
157 | 156 | ||
158 | #else | 157 | #else |
159 | static inline void init_hw_perf_events(void) { } | ||
160 | static inline void perf_events_lapic_init(void) { } | 158 | static inline void perf_events_lapic_init(void) { } |
161 | #endif | 159 | #endif |
162 | 160 | ||
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index a70cd216be5d..295e2ff18a6a 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS { | |||
744 | }; | 744 | }; |
745 | 745 | ||
746 | /* | 746 | /* |
747 | * P4 PEBS specifics (Replay Event only) | ||
748 | * | ||
749 | * Format (bits): | ||
750 | * 0-6: metric from P4_PEBS_METRIC enum | ||
751 | * 7 : reserved | ||
752 | * 8 : reserved | ||
753 | * 9-11 : reserved | ||
754 | * | ||
755 | * Note we have UOP and PEBS bits reserved for now | 747 | * Note we have UOP and PEBS bits reserved for now |
756 | * just in case if we will need them once | 748 | * just in case if we will need them once |
757 | */ | 749 | */ |
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC { | |||
788 | P4_PEBS_METRIC__max | 780 | P4_PEBS_METRIC__max |
789 | }; | 781 | }; |
790 | 782 | ||
783 | /* | ||
784 | * Notes on internal configuration of ESCR+CCCR tuples | ||
785 | * | ||
786 | * Since P4 has quite the different architecture of | ||
787 | * performance registers in compare with "architectural" | ||
788 | * once and we have on 64 bits to keep configuration | ||
789 | * of performance event, the following trick is used. | ||
790 | * | ||
791 | * 1) Since both ESCR and CCCR registers have only low | ||
792 | * 32 bits valuable, we pack them into a single 64 bit | ||
793 | * configuration. Low 32 bits of such config correspond | ||
794 | * to low 32 bits of CCCR register and high 32 bits | ||
795 | * correspond to low 32 bits of ESCR register. | ||
796 | * | ||
797 | * 2) The meaning of every bit of such config field can | ||
798 | * be found in Intel SDM but it should be noted that | ||
799 | * we "borrow" some reserved bits for own usage and | ||
800 | * clean them or set to a proper value when we do | ||
801 | * a real write to hardware registers. | ||
802 | * | ||
803 | * 3) The format of bits of config is the following | ||
804 | * and should be either 0 or set to some predefined | ||
805 | * values: | ||
806 | * | ||
807 | * Low 32 bits | ||
808 | * ----------- | ||
809 | * 0-6: P4_PEBS_METRIC enum | ||
810 | * 7-11: reserved | ||
811 | * 12: reserved (Enable) | ||
812 | * 13-15: reserved (ESCR select) | ||
813 | * 16-17: Active Thread | ||
814 | * 18: Compare | ||
815 | * 19: Complement | ||
816 | * 20-23: Threshold | ||
817 | * 24: Edge | ||
818 | * 25: reserved (FORCE_OVF) | ||
819 | * 26: reserved (OVF_PMI_T0) | ||
820 | * 27: reserved (OVF_PMI_T1) | ||
821 | * 28-29: reserved | ||
822 | * 30: reserved (Cascade) | ||
823 | * 31: reserved (OVF) | ||
824 | * | ||
825 | * High 32 bits | ||
826 | * ------------ | ||
827 | * 0: reserved (T1_USR) | ||
828 | * 1: reserved (T1_OS) | ||
829 | * 2: reserved (T0_USR) | ||
830 | * 3: reserved (T0_OS) | ||
831 | * 4: Tag Enable | ||
832 | * 5-8: Tag Value | ||
833 | * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper) | ||
834 | * 25-30: enum P4_EVENTS | ||
835 | * 31: reserved (HT thread) | ||
836 | */ | ||
837 | |||
791 | #endif /* PERF_EVENT_P4_H */ | 838 | #endif /* PERF_EVENT_P4_H */ |
792 | 839 | ||
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 1def60114906..6c22bf353f26 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h | |||
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void) | |||
48 | setup_IO_APIC(); | 48 | setup_IO_APIC(); |
49 | else { | 49 | else { |
50 | nr_ioapics = 0; | 50 | nr_ioapics = 0; |
51 | localise_nmi_watchdog(); | ||
52 | } | 51 | } |
53 | #endif | 52 | #endif |
54 | } | 53 | } |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 2b16a2ad23dc..52b5c7ed3608 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #define _ASM_X86_STACKTRACE_H | 7 | #define _ASM_X86_STACKTRACE_H |
8 | 8 | ||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <linux/ptrace.h> | ||
10 | 11 | ||
11 | extern int kstack_depth_to_print; | 12 | extern int kstack_depth_to_print; |
12 | 13 | ||
@@ -46,7 +47,7 @@ struct stacktrace_ops { | |||
46 | }; | 47 | }; |
47 | 48 | ||
48 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 49 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
49 | unsigned long *stack, unsigned long bp, | 50 | unsigned long *stack, |
50 | const struct stacktrace_ops *ops, void *data); | 51 | const struct stacktrace_ops *ops, void *data); |
51 | 52 | ||
52 | #ifdef CONFIG_X86_32 | 53 | #ifdef CONFIG_X86_32 |
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
57 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | 58 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) |
58 | #endif | 59 | #endif |
59 | 60 | ||
61 | #ifdef CONFIG_FRAME_POINTER | ||
62 | static inline unsigned long | ||
63 | stack_frame(struct task_struct *task, struct pt_regs *regs) | ||
64 | { | ||
65 | unsigned long bp; | ||
66 | |||
67 | if (regs) | ||
68 | return regs->bp; | ||
69 | |||
70 | if (task == current) { | ||
71 | /* Grab bp right from our regs */ | ||
72 | get_bp(bp); | ||
73 | return bp; | ||
74 | } | ||
75 | |||
76 | /* bp is the last reg pushed by switch_to */ | ||
77 | return *(unsigned long *)task->thread.sp; | ||
78 | } | ||
79 | #else | ||
80 | static inline unsigned long | ||
81 | stack_frame(struct task_struct *task, struct pt_regs *regs) | ||
82 | { | ||
83 | return 0; | ||
84 | } | ||
85 | #endif | ||
86 | |||
60 | extern void | 87 | extern void |
61 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 88 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
62 | unsigned long *stack, unsigned long bp, char *log_lvl); | 89 | unsigned long *stack, char *log_lvl); |
63 | 90 | ||
64 | extern void | 91 | extern void |
65 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
66 | unsigned long *sp, unsigned long bp, char *log_lvl); | 93 | unsigned long *sp, char *log_lvl); |
67 | 94 | ||
68 | extern unsigned int code_bytes; | 95 | extern unsigned int code_bytes; |
69 | 96 | ||
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 5469630b27f5..fa7b9176b76c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -10,12 +10,6 @@ | |||
10 | unsigned long long native_sched_clock(void); | 10 | unsigned long long native_sched_clock(void); |
11 | extern int recalibrate_cpu_khz(void); | 11 | extern int recalibrate_cpu_khz(void); |
12 | 12 | ||
13 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
14 | extern int timer_ack; | ||
15 | #else | ||
16 | # define timer_ack (0) | ||
17 | #endif | ||
18 | |||
19 | extern int no_timer_check; | 13 | extern int no_timer_check; |
20 | 14 | ||
21 | /* Accelerators for sched_clock() | 15 | /* Accelerators for sched_clock() |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5079f24c955a..553d0b0d639b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first; | |||
591 | static int wrote_text; | 591 | static int wrote_text; |
592 | 592 | ||
593 | struct text_poke_params { | 593 | struct text_poke_params { |
594 | void *addr; | 594 | struct text_poke_param *params; |
595 | const void *opcode; | 595 | int nparams; |
596 | size_t len; | ||
597 | }; | 596 | }; |
598 | 597 | ||
599 | static int __kprobes stop_machine_text_poke(void *data) | 598 | static int __kprobes stop_machine_text_poke(void *data) |
600 | { | 599 | { |
601 | struct text_poke_params *tpp = data; | 600 | struct text_poke_params *tpp = data; |
601 | struct text_poke_param *p; | ||
602 | int i; | ||
602 | 603 | ||
603 | if (atomic_dec_and_test(&stop_machine_first)) { | 604 | if (atomic_dec_and_test(&stop_machine_first)) { |
604 | text_poke(tpp->addr, tpp->opcode, tpp->len); | 605 | for (i = 0; i < tpp->nparams; i++) { |
606 | p = &tpp->params[i]; | ||
607 | text_poke(p->addr, p->opcode, p->len); | ||
608 | } | ||
605 | smp_wmb(); /* Make sure other cpus see that this has run */ | 609 | smp_wmb(); /* Make sure other cpus see that this has run */ |
606 | wrote_text = 1; | 610 | wrote_text = 1; |
607 | } else { | 611 | } else { |
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
610 | smp_mb(); /* Load wrote_text before following execution */ | 614 | smp_mb(); /* Load wrote_text before following execution */ |
611 | } | 615 | } |
612 | 616 | ||
613 | flush_icache_range((unsigned long)tpp->addr, | 617 | for (i = 0; i < tpp->nparams; i++) { |
614 | (unsigned long)tpp->addr + tpp->len); | 618 | p = &tpp->params[i]; |
619 | flush_icache_range((unsigned long)p->addr, | ||
620 | (unsigned long)p->addr + p->len); | ||
621 | } | ||
622 | |||
615 | return 0; | 623 | return 0; |
616 | } | 624 | } |
617 | 625 | ||
@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
631 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | 639 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) |
632 | { | 640 | { |
633 | struct text_poke_params tpp; | 641 | struct text_poke_params tpp; |
642 | struct text_poke_param p; | ||
634 | 643 | ||
635 | tpp.addr = addr; | 644 | p.addr = addr; |
636 | tpp.opcode = opcode; | 645 | p.opcode = opcode; |
637 | tpp.len = len; | 646 | p.len = len; |
647 | tpp.params = &p; | ||
648 | tpp.nparams = 1; | ||
638 | atomic_set(&stop_machine_first, 1); | 649 | atomic_set(&stop_machine_first, 1); |
639 | wrote_text = 0; | 650 | wrote_text = 0; |
640 | /* Use __stop_machine() because the caller already got online_cpus. */ | 651 | /* Use __stop_machine() because the caller already got online_cpus. */ |
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | |||
642 | return addr; | 653 | return addr; |
643 | } | 654 | } |
644 | 655 | ||
656 | /** | ||
657 | * text_poke_smp_batch - Update instructions on a live kernel on SMP | ||
658 | * @params: an array of text_poke parameters | ||
659 | * @n: the number of elements in params. | ||
660 | * | ||
661 | * Modify multi-byte instruction by using stop_machine() on SMP. Since the | ||
662 | * stop_machine() is heavy task, it is better to aggregate text_poke requests | ||
663 | * and do it once if possible. | ||
664 | * | ||
665 | * Note: Must be called under get_online_cpus() and text_mutex. | ||
666 | */ | ||
667 | void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) | ||
668 | { | ||
669 | struct text_poke_params tpp = {.params = params, .nparams = n}; | ||
670 | |||
671 | atomic_set(&stop_machine_first, 1); | ||
672 | wrote_text = 0; | ||
673 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
674 | } | ||
675 | |||
645 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | 676 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) |
646 | 677 | ||
647 | #ifdef CONFIG_X86_64 | 678 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 910f20b457c4..3966b564ea47 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -3,10 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o |
6 | ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) | 6 | obj-y += hw_nmi.o |
7 | obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o | ||
8 | endif | ||
9 | obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o | ||
10 | 7 | ||
11 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 8 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
12 | obj-$(CONFIG_SMP) += ipi.o | 9 | obj-$(CONFIG_SMP) += ipi.o |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 78218135b48e..fb7657822aad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/cpu.h> | 32 | #include <linux/cpu.h> |
33 | #include <linux/dmi.h> | 33 | #include <linux/dmi.h> |
34 | #include <linux/nmi.h> | ||
35 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
36 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
37 | 36 | ||
@@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void) | |||
799 | * PIT/HPET going. Otherwise register lapic as a dummy | 798 | * PIT/HPET going. Otherwise register lapic as a dummy |
800 | * device. | 799 | * device. |
801 | */ | 800 | */ |
802 | if (nmi_watchdog != NMI_IO_APIC) | 801 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; |
803 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
804 | else | ||
805 | pr_warning("APIC timer registered as dummy," | ||
806 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
807 | 802 | ||
808 | /* Setup the lapic or request the broadcast */ | 803 | /* Setup the lapic or request the broadcast */ |
809 | setup_APIC_timer(); | 804 | setup_APIC_timer(); |
@@ -1387,7 +1382,6 @@ void __cpuinit end_local_APIC_setup(void) | |||
1387 | } | 1382 | } |
1388 | #endif | 1383 | #endif |
1389 | 1384 | ||
1390 | setup_apic_nmi_watchdog(NULL); | ||
1391 | apic_pm_activate(); | 1385 | apic_pm_activate(); |
1392 | 1386 | ||
1393 | /* | 1387 | /* |
@@ -1758,17 +1752,10 @@ int __init APIC_init_uniprocessor(void) | |||
1758 | setup_IO_APIC(); | 1752 | setup_IO_APIC(); |
1759 | else { | 1753 | else { |
1760 | nr_ioapics = 0; | 1754 | nr_ioapics = 0; |
1761 | localise_nmi_watchdog(); | ||
1762 | } | 1755 | } |
1763 | #else | ||
1764 | localise_nmi_watchdog(); | ||
1765 | #endif | 1756 | #endif |
1766 | 1757 | ||
1767 | x86_init.timers.setup_percpu_clockev(); | 1758 | x86_init.timers.setup_percpu_clockev(); |
1768 | #ifdef CONFIG_X86_64 | ||
1769 | check_nmi_watchdog(); | ||
1770 | #endif | ||
1771 | |||
1772 | return 0; | 1759 | return 0; |
1773 | } | 1760 | } |
1774 | 1761 | ||
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 62f6e1e55b90..c57d0b599448 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -17,13 +17,14 @@ | |||
17 | #include <linux/nmi.h> | 17 | #include <linux/nmi.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | 19 | ||
20 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
20 | u64 hw_nmi_get_sample_period(void) | 21 | u64 hw_nmi_get_sample_period(void) |
21 | { | 22 | { |
22 | return (u64)(cpu_khz) * 1000 * 60; | 23 | return (u64)(cpu_khz) * 1000 * 60; |
23 | } | 24 | } |
25 | #endif | ||
24 | 26 | ||
25 | #ifdef ARCH_HAS_NMI_WATCHDOG | 27 | #ifdef arch_trigger_all_cpu_backtrace |
26 | |||
27 | /* For reliability, we're prepared to waste bits here. */ | 28 | /* For reliability, we're prepared to waste bits here. */ |
28 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | 29 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; |
29 | 30 | ||
@@ -91,18 +92,3 @@ static int __init register_trigger_all_cpu_backtrace(void) | |||
91 | } | 92 | } |
92 | early_initcall(register_trigger_all_cpu_backtrace); | 93 | early_initcall(register_trigger_all_cpu_backtrace); |
93 | #endif | 94 | #endif |
94 | |||
95 | /* STUB calls to mimic old nmi_watchdog behaviour */ | ||
96 | #if defined(CONFIG_X86_LOCAL_APIC) | ||
97 | unsigned int nmi_watchdog = NMI_NONE; | ||
98 | EXPORT_SYMBOL(nmi_watchdog); | ||
99 | void acpi_nmi_enable(void) { return; } | ||
100 | void acpi_nmi_disable(void) { return; } | ||
101 | #endif | ||
102 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
103 | EXPORT_SYMBOL(nmi_active); | ||
104 | int unknown_nmi_panic; | ||
105 | void cpu_nmi_set_wd_enabled(void) { return; } | ||
106 | void stop_apic_nmi_watchdog(void *unused) { return; } | ||
107 | void setup_apic_nmi_watchdog(void *unused) { return; } | ||
108 | int __init check_nmi_watchdog(void) { return 0; } | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index fadcd743a74f..16c2db8750a2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -54,7 +54,6 @@ | |||
54 | #include <asm/dma.h> | 54 | #include <asm/dma.h> |
55 | #include <asm/timer.h> | 55 | #include <asm/timer.h> |
56 | #include <asm/i8259.h> | 56 | #include <asm/i8259.h> |
57 | #include <asm/nmi.h> | ||
58 | #include <asm/msidef.h> | 57 | #include <asm/msidef.h> |
59 | #include <asm/hypertransport.h> | 58 | #include <asm/hypertransport.h> |
60 | #include <asm/setup.h> | 59 | #include <asm/setup.h> |
@@ -2642,24 +2641,6 @@ static void lapic_register_intr(int irq) | |||
2642 | "edge"); | 2641 | "edge"); |
2643 | } | 2642 | } |
2644 | 2643 | ||
2645 | static void __init setup_nmi(void) | ||
2646 | { | ||
2647 | /* | ||
2648 | * Dirty trick to enable the NMI watchdog ... | ||
2649 | * We put the 8259A master into AEOI mode and | ||
2650 | * unmask on all local APICs LVT0 as NMI. | ||
2651 | * | ||
2652 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | ||
2653 | * is from Maciej W. Rozycki - so we do not have to EOI from | ||
2654 | * the NMI handler or the timer interrupt. | ||
2655 | */ | ||
2656 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | ||
2657 | |||
2658 | enable_NMI_through_LVT0(); | ||
2659 | |||
2660 | apic_printk(APIC_VERBOSE, " done.\n"); | ||
2661 | } | ||
2662 | |||
2663 | /* | 2644 | /* |
2664 | * This looks a bit hackish but it's about the only one way of sending | 2645 | * This looks a bit hackish but it's about the only one way of sending |
2665 | * a few INTA cycles to 8259As and any associated glue logic. ICR does | 2646 | * a few INTA cycles to 8259As and any associated glue logic. ICR does |
@@ -2765,15 +2746,6 @@ static inline void __init check_timer(void) | |||
2765 | */ | 2746 | */ |
2766 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2747 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2767 | legacy_pic->init(1); | 2748 | legacy_pic->init(1); |
2768 | #ifdef CONFIG_X86_32 | ||
2769 | { | ||
2770 | unsigned int ver; | ||
2771 | |||
2772 | ver = apic_read(APIC_LVR); | ||
2773 | ver = GET_APIC_VERSION(ver); | ||
2774 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
2775 | } | ||
2776 | #endif | ||
2777 | 2749 | ||
2778 | pin1 = find_isa_irq_pin(0, mp_INT); | 2750 | pin1 = find_isa_irq_pin(0, mp_INT); |
2779 | apic1 = find_isa_irq_apic(0, mp_INT); | 2751 | apic1 = find_isa_irq_apic(0, mp_INT); |
@@ -2821,10 +2793,6 @@ static inline void __init check_timer(void) | |||
2821 | unmask_ioapic(cfg); | 2793 | unmask_ioapic(cfg); |
2822 | } | 2794 | } |
2823 | if (timer_irq_works()) { | 2795 | if (timer_irq_works()) { |
2824 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2825 | setup_nmi(); | ||
2826 | legacy_pic->unmask(0); | ||
2827 | } | ||
2828 | if (disable_timer_pin_1 > 0) | 2796 | if (disable_timer_pin_1 > 0) |
2829 | clear_IO_APIC_pin(0, pin1); | 2797 | clear_IO_APIC_pin(0, pin1); |
2830 | goto out; | 2798 | goto out; |
@@ -2850,11 +2818,6 @@ static inline void __init check_timer(void) | |||
2850 | if (timer_irq_works()) { | 2818 | if (timer_irq_works()) { |
2851 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2819 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
2852 | timer_through_8259 = 1; | 2820 | timer_through_8259 = 1; |
2853 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2854 | legacy_pic->mask(0); | ||
2855 | setup_nmi(); | ||
2856 | legacy_pic->unmask(0); | ||
2857 | } | ||
2858 | goto out; | 2821 | goto out; |
2859 | } | 2822 | } |
2860 | /* | 2823 | /* |
@@ -2866,15 +2829,6 @@ static inline void __init check_timer(void) | |||
2866 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 2829 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
2867 | } | 2830 | } |
2868 | 2831 | ||
2869 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2870 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " | ||
2871 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
2872 | nmi_watchdog = NMI_NONE; | ||
2873 | } | ||
2874 | #ifdef CONFIG_X86_32 | ||
2875 | timer_ack = 0; | ||
2876 | #endif | ||
2877 | |||
2878 | apic_printk(APIC_QUIET, KERN_INFO | 2832 | apic_printk(APIC_QUIET, KERN_INFO |
2879 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 2833 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
2880 | 2834 | ||
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c deleted file mode 100644 index c90041ccb742..000000000000 --- a/arch/x86/kernel/apic/nmi.c +++ /dev/null | |||
@@ -1,567 +0,0 @@ | |||
1 | /* | ||
2 | * NMI watchdog support on APIC systems | ||
3 | * | ||
4 | * Started by Ingo Molnar <mingo@redhat.com> | ||
5 | * | ||
6 | * Fixes: | ||
7 | * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. | ||
8 | * Mikael Pettersson : Power Management for local APIC NMI watchdog. | ||
9 | * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. | ||
10 | * Pavel Machek and | ||
11 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. | ||
12 | */ | ||
13 | |||
14 | #include <asm/apic.h> | ||
15 | |||
16 | #include <linux/nmi.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/delay.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/sysdev.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/percpu.h> | ||
25 | #include <linux/kprobes.h> | ||
26 | #include <linux/cpumask.h> | ||
27 | #include <linux/kernel_stat.h> | ||
28 | #include <linux/kdebug.h> | ||
29 | #include <linux/smp.h> | ||
30 | |||
31 | #include <asm/i8259.h> | ||
32 | #include <asm/io_apic.h> | ||
33 | #include <asm/proto.h> | ||
34 | #include <asm/timer.h> | ||
35 | |||
36 | #include <asm/mce.h> | ||
37 | |||
38 | #include <asm/mach_traps.h> | ||
39 | |||
40 | int unknown_nmi_panic; | ||
41 | int nmi_watchdog_enabled; | ||
42 | |||
43 | /* For reliability, we're prepared to waste bits here. */ | ||
44 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
45 | |||
46 | /* nmi_active: | ||
47 | * >0: the lapic NMI watchdog is active, but can be disabled | ||
48 | * <0: the lapic NMI watchdog has not been set up, and cannot | ||
49 | * be enabled | ||
50 | * 0: the lapic NMI watchdog is disabled, but can be enabled | ||
51 | */ | ||
52 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
53 | EXPORT_SYMBOL(nmi_active); | ||
54 | |||
55 | unsigned int nmi_watchdog = NMI_NONE; | ||
56 | EXPORT_SYMBOL(nmi_watchdog); | ||
57 | |||
58 | static int panic_on_timeout; | ||
59 | |||
60 | static unsigned int nmi_hz = HZ; | ||
61 | static DEFINE_PER_CPU(short, wd_enabled); | ||
62 | static int endflag __initdata; | ||
63 | |||
64 | static inline unsigned int get_nmi_count(int cpu) | ||
65 | { | ||
66 | return per_cpu(irq_stat, cpu).__nmi_count; | ||
67 | } | ||
68 | |||
69 | static inline int mce_in_progress(void) | ||
70 | { | ||
71 | #if defined(CONFIG_X86_MCE) | ||
72 | return atomic_read(&mce_entry) > 0; | ||
73 | #endif | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Take the local apic timer and PIT/HPET into account. We don't | ||
79 | * know which one is active, when we have highres/dyntick on | ||
80 | */ | ||
81 | static inline unsigned int get_timer_irqs(int cpu) | ||
82 | { | ||
83 | return per_cpu(irq_stat, cpu).apic_timer_irqs + | ||
84 | per_cpu(irq_stat, cpu).irq0_irqs; | ||
85 | } | ||
86 | |||
87 | #ifdef CONFIG_SMP | ||
88 | /* | ||
89 | * The performance counters used by NMI_LOCAL_APIC don't trigger when | ||
90 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | ||
91 | * CPUs during the test make them busy. | ||
92 | */ | ||
93 | static __init void nmi_cpu_busy(void *data) | ||
94 | { | ||
95 | local_irq_enable_in_hardirq(); | ||
96 | /* | ||
97 | * Intentionally don't use cpu_relax here. This is | ||
98 | * to make sure that the performance counter really ticks, | ||
99 | * even if there is a simulator or similar that catches the | ||
100 | * pause instruction. On a real HT machine this is fine because | ||
101 | * all other CPUs are busy with "useless" delay loops and don't | ||
102 | * care if they get somewhat less cycles. | ||
103 | */ | ||
104 | while (endflag == 0) | ||
105 | mb(); | ||
106 | } | ||
107 | #endif | ||
108 | |||
109 | static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) | ||
110 | { | ||
111 | printk(KERN_CONT "\n"); | ||
112 | |||
113 | printk(KERN_WARNING | ||
114 | "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
115 | cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); | ||
116 | |||
117 | printk(KERN_WARNING | ||
118 | "Please report this to bugzilla.kernel.org,\n"); | ||
119 | printk(KERN_WARNING | ||
120 | "and attach the output of the 'dmesg' command.\n"); | ||
121 | |||
122 | per_cpu(wd_enabled, cpu) = 0; | ||
123 | atomic_dec(&nmi_active); | ||
124 | } | ||
125 | |||
126 | static void __acpi_nmi_disable(void *__unused) | ||
127 | { | ||
128 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
129 | } | ||
130 | |||
131 | int __init check_nmi_watchdog(void) | ||
132 | { | ||
133 | unsigned int *prev_nmi_count; | ||
134 | int cpu; | ||
135 | |||
136 | if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) | ||
137 | return 0; | ||
138 | |||
139 | prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); | ||
140 | if (!prev_nmi_count) | ||
141 | goto error; | ||
142 | |||
143 | printk(KERN_INFO "Testing NMI watchdog ... "); | ||
144 | |||
145 | #ifdef CONFIG_SMP | ||
146 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
147 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); | ||
148 | #endif | ||
149 | |||
150 | for_each_possible_cpu(cpu) | ||
151 | prev_nmi_count[cpu] = get_nmi_count(cpu); | ||
152 | local_irq_enable(); | ||
153 | mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ | ||
154 | |||
155 | for_each_online_cpu(cpu) { | ||
156 | if (!per_cpu(wd_enabled, cpu)) | ||
157 | continue; | ||
158 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) | ||
159 | report_broken_nmi(cpu, prev_nmi_count); | ||
160 | } | ||
161 | endflag = 1; | ||
162 | if (!atomic_read(&nmi_active)) { | ||
163 | kfree(prev_nmi_count); | ||
164 | atomic_set(&nmi_active, -1); | ||
165 | goto error; | ||
166 | } | ||
167 | printk("OK.\n"); | ||
168 | |||
169 | /* | ||
170 | * now that we know it works we can reduce NMI frequency to | ||
171 | * something more reasonable; makes a difference in some configs | ||
172 | */ | ||
173 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
174 | nmi_hz = lapic_adjust_nmi_hz(1); | ||
175 | |||
176 | kfree(prev_nmi_count); | ||
177 | return 0; | ||
178 | error: | ||
179 | if (nmi_watchdog == NMI_IO_APIC) { | ||
180 | if (!timer_through_8259) | ||
181 | legacy_pic->mask(0); | ||
182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
183 | } | ||
184 | |||
185 | #ifdef CONFIG_X86_32 | ||
186 | timer_ack = 0; | ||
187 | #endif | ||
188 | return -1; | ||
189 | } | ||
190 | |||
191 | static int __init setup_nmi_watchdog(char *str) | ||
192 | { | ||
193 | unsigned int nmi; | ||
194 | |||
195 | if (!strncmp(str, "panic", 5)) { | ||
196 | panic_on_timeout = 1; | ||
197 | str = strchr(str, ','); | ||
198 | if (!str) | ||
199 | return 1; | ||
200 | ++str; | ||
201 | } | ||
202 | |||
203 | if (!strncmp(str, "lapic", 5)) | ||
204 | nmi_watchdog = NMI_LOCAL_APIC; | ||
205 | else if (!strncmp(str, "ioapic", 6)) | ||
206 | nmi_watchdog = NMI_IO_APIC; | ||
207 | else { | ||
208 | get_option(&str, &nmi); | ||
209 | if (nmi >= NMI_INVALID) | ||
210 | return 0; | ||
211 | nmi_watchdog = nmi; | ||
212 | } | ||
213 | |||
214 | return 1; | ||
215 | } | ||
216 | __setup("nmi_watchdog=", setup_nmi_watchdog); | ||
217 | |||
218 | /* | ||
219 | * Suspend/resume support | ||
220 | */ | ||
221 | #ifdef CONFIG_PM | ||
222 | |||
223 | static int nmi_pm_active; /* nmi_active before suspend */ | ||
224 | |||
225 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | ||
226 | { | ||
227 | /* only CPU0 goes here, other CPUs should be offline */ | ||
228 | nmi_pm_active = atomic_read(&nmi_active); | ||
229 | stop_apic_nmi_watchdog(NULL); | ||
230 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int lapic_nmi_resume(struct sys_device *dev) | ||
235 | { | ||
236 | /* only CPU0 goes here, other CPUs should be offline */ | ||
237 | if (nmi_pm_active > 0) { | ||
238 | setup_apic_nmi_watchdog(NULL); | ||
239 | touch_nmi_watchdog(); | ||
240 | } | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static struct sysdev_class nmi_sysclass = { | ||
245 | .name = "lapic_nmi", | ||
246 | .resume = lapic_nmi_resume, | ||
247 | .suspend = lapic_nmi_suspend, | ||
248 | }; | ||
249 | |||
250 | static struct sys_device device_lapic_nmi = { | ||
251 | .id = 0, | ||
252 | .cls = &nmi_sysclass, | ||
253 | }; | ||
254 | |||
255 | static int __init init_lapic_nmi_sysfs(void) | ||
256 | { | ||
257 | int error; | ||
258 | |||
259 | /* | ||
260 | * should really be a BUG_ON but b/c this is an | ||
261 | * init call, it just doesn't work. -dcz | ||
262 | */ | ||
263 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
264 | return 0; | ||
265 | |||
266 | if (atomic_read(&nmi_active) < 0) | ||
267 | return 0; | ||
268 | |||
269 | error = sysdev_class_register(&nmi_sysclass); | ||
270 | if (!error) | ||
271 | error = sysdev_register(&device_lapic_nmi); | ||
272 | return error; | ||
273 | } | ||
274 | |||
275 | /* must come after the local APIC's device_initcall() */ | ||
276 | late_initcall(init_lapic_nmi_sysfs); | ||
277 | |||
278 | #endif /* CONFIG_PM */ | ||
279 | |||
280 | static void __acpi_nmi_enable(void *__unused) | ||
281 | { | ||
282 | apic_write(APIC_LVT0, APIC_DM_NMI); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Enable timer based NMIs on all CPUs: | ||
287 | */ | ||
288 | void acpi_nmi_enable(void) | ||
289 | { | ||
290 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
291 | on_each_cpu(__acpi_nmi_enable, NULL, 1); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Disable timer based NMIs on all CPUs: | ||
296 | */ | ||
297 | void acpi_nmi_disable(void) | ||
298 | { | ||
299 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
300 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * This function is called as soon the LAPIC NMI watchdog driver has everything | ||
305 | * in place and it's ready to check if the NMIs belong to the NMI watchdog | ||
306 | */ | ||
307 | void cpu_nmi_set_wd_enabled(void) | ||
308 | { | ||
309 | __get_cpu_var(wd_enabled) = 1; | ||
310 | } | ||
311 | |||
312 | void setup_apic_nmi_watchdog(void *unused) | ||
313 | { | ||
314 | if (__get_cpu_var(wd_enabled)) | ||
315 | return; | ||
316 | |||
317 | /* cheap hack to support suspend/resume */ | ||
318 | /* if cpu0 is not active neither should the other cpus */ | ||
319 | if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) | ||
320 | return; | ||
321 | |||
322 | switch (nmi_watchdog) { | ||
323 | case NMI_LOCAL_APIC: | ||
324 | if (lapic_watchdog_init(nmi_hz) < 0) { | ||
325 | __get_cpu_var(wd_enabled) = 0; | ||
326 | return; | ||
327 | } | ||
328 | /* FALL THROUGH */ | ||
329 | case NMI_IO_APIC: | ||
330 | __get_cpu_var(wd_enabled) = 1; | ||
331 | atomic_inc(&nmi_active); | ||
332 | } | ||
333 | } | ||
334 | |||
335 | void stop_apic_nmi_watchdog(void *unused) | ||
336 | { | ||
337 | /* only support LOCAL and IO APICs for now */ | ||
338 | if (!nmi_watchdog_active()) | ||
339 | return; | ||
340 | if (__get_cpu_var(wd_enabled) == 0) | ||
341 | return; | ||
342 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
343 | lapic_watchdog_stop(); | ||
344 | else | ||
345 | __acpi_nmi_disable(NULL); | ||
346 | __get_cpu_var(wd_enabled) = 0; | ||
347 | atomic_dec(&nmi_active); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * the best way to detect whether a CPU has a 'hard lockup' problem | ||
352 | * is to check it's local APIC timer IRQ counts. If they are not | ||
353 | * changing then that CPU has some problem. | ||
354 | * | ||
355 | * as these watchdog NMI IRQs are generated on every CPU, we only | ||
356 | * have to check the current processor. | ||
357 | * | ||
358 | * since NMIs don't listen to _any_ locks, we have to be extremely | ||
359 | * careful not to rely on unsafe variables. The printk might lock | ||
360 | * up though, so we have to break up any console locks first ... | ||
361 | * [when there will be more tty-related locks, break them up here too!] | ||
362 | */ | ||
363 | |||
364 | static DEFINE_PER_CPU(unsigned, last_irq_sum); | ||
365 | static DEFINE_PER_CPU(long, alert_counter); | ||
366 | static DEFINE_PER_CPU(int, nmi_touch); | ||
367 | |||
368 | void touch_nmi_watchdog(void) | ||
369 | { | ||
370 | if (nmi_watchdog_active()) { | ||
371 | unsigned cpu; | ||
372 | |||
373 | /* | ||
374 | * Tell other CPUs to reset their alert counters. We cannot | ||
375 | * do it ourselves because the alert count increase is not | ||
376 | * atomic. | ||
377 | */ | ||
378 | for_each_present_cpu(cpu) { | ||
379 | if (per_cpu(nmi_touch, cpu) != 1) | ||
380 | per_cpu(nmi_touch, cpu) = 1; | ||
381 | } | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Tickle the softlockup detector too: | ||
386 | */ | ||
387 | touch_softlockup_watchdog(); | ||
388 | } | ||
389 | EXPORT_SYMBOL(touch_nmi_watchdog); | ||
390 | |||
391 | notrace __kprobes int | ||
392 | nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | ||
393 | { | ||
394 | /* | ||
395 | * Since current_thread_info()-> is always on the stack, and we | ||
396 | * always switch the stack NMI-atomically, it's safe to use | ||
397 | * smp_processor_id(). | ||
398 | */ | ||
399 | unsigned int sum; | ||
400 | int touched = 0; | ||
401 | int cpu = smp_processor_id(); | ||
402 | int rc = 0; | ||
403 | |||
404 | sum = get_timer_irqs(cpu); | ||
405 | |||
406 | if (__get_cpu_var(nmi_touch)) { | ||
407 | __get_cpu_var(nmi_touch) = 0; | ||
408 | touched = 1; | ||
409 | } | ||
410 | |||
411 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | ||
412 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | ||
413 | static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ | ||
414 | |||
415 | raw_spin_lock(&lock); | ||
416 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | ||
417 | show_regs(regs); | ||
418 | dump_stack(); | ||
419 | raw_spin_unlock(&lock); | ||
420 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | ||
421 | |||
422 | rc = 1; | ||
423 | } | ||
424 | |||
425 | /* Could check oops_in_progress here too, but it's safer not to */ | ||
426 | if (mce_in_progress()) | ||
427 | touched = 1; | ||
428 | |||
429 | /* if the none of the timers isn't firing, this cpu isn't doing much */ | ||
430 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | ||
431 | /* | ||
432 | * Ayiee, looks like this CPU is stuck ... | ||
433 | * wait a few IRQs (5 seconds) before doing the oops ... | ||
434 | */ | ||
435 | __this_cpu_inc(alert_counter); | ||
436 | if (__this_cpu_read(alert_counter) == 5 * nmi_hz) | ||
437 | /* | ||
438 | * die_nmi will return ONLY if NOTIFY_STOP happens.. | ||
439 | */ | ||
440 | die_nmi("BUG: NMI Watchdog detected LOCKUP", | ||
441 | regs, panic_on_timeout); | ||
442 | } else { | ||
443 | __get_cpu_var(last_irq_sum) = sum; | ||
444 | __this_cpu_write(alert_counter, 0); | ||
445 | } | ||
446 | |||
447 | /* see if the nmi watchdog went off */ | ||
448 | if (!__get_cpu_var(wd_enabled)) | ||
449 | return rc; | ||
450 | switch (nmi_watchdog) { | ||
451 | case NMI_LOCAL_APIC: | ||
452 | rc |= lapic_wd_event(nmi_hz); | ||
453 | break; | ||
454 | case NMI_IO_APIC: | ||
455 | /* | ||
456 | * don't know how to accurately check for this. | ||
457 | * just assume it was a watchdog timer interrupt | ||
458 | * This matches the old behaviour. | ||
459 | */ | ||
460 | rc = 1; | ||
461 | break; | ||
462 | } | ||
463 | return rc; | ||
464 | } | ||
465 | |||
466 | #ifdef CONFIG_SYSCTL | ||
467 | |||
468 | static void enable_ioapic_nmi_watchdog_single(void *unused) | ||
469 | { | ||
470 | __get_cpu_var(wd_enabled) = 1; | ||
471 | atomic_inc(&nmi_active); | ||
472 | __acpi_nmi_enable(NULL); | ||
473 | } | ||
474 | |||
475 | static void enable_ioapic_nmi_watchdog(void) | ||
476 | { | ||
477 | on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); | ||
478 | touch_nmi_watchdog(); | ||
479 | } | ||
480 | |||
481 | static void disable_ioapic_nmi_watchdog(void) | ||
482 | { | ||
483 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
484 | } | ||
485 | |||
486 | static int __init setup_unknown_nmi_panic(char *str) | ||
487 | { | ||
488 | unknown_nmi_panic = 1; | ||
489 | return 1; | ||
490 | } | ||
491 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
492 | |||
493 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | ||
494 | { | ||
495 | unsigned char reason = get_nmi_reason(); | ||
496 | char buf[64]; | ||
497 | |||
498 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | ||
499 | die_nmi(buf, regs, 1); /* Always panic here */ | ||
500 | return 0; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * proc handler for /proc/sys/kernel/nmi | ||
505 | */ | ||
506 | int proc_nmi_enabled(struct ctl_table *table, int write, | ||
507 | void __user *buffer, size_t *length, loff_t *ppos) | ||
508 | { | ||
509 | int old_state; | ||
510 | |||
511 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; | ||
512 | old_state = nmi_watchdog_enabled; | ||
513 | proc_dointvec(table, write, buffer, length, ppos); | ||
514 | if (!!old_state == !!nmi_watchdog_enabled) | ||
515 | return 0; | ||
516 | |||
517 | if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { | ||
518 | printk(KERN_WARNING | ||
519 | "NMI watchdog is permanently disabled\n"); | ||
520 | return -EIO; | ||
521 | } | ||
522 | |||
523 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
524 | if (nmi_watchdog_enabled) | ||
525 | enable_lapic_nmi_watchdog(); | ||
526 | else | ||
527 | disable_lapic_nmi_watchdog(); | ||
528 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
529 | if (nmi_watchdog_enabled) | ||
530 | enable_ioapic_nmi_watchdog(); | ||
531 | else | ||
532 | disable_ioapic_nmi_watchdog(); | ||
533 | } else { | ||
534 | printk(KERN_WARNING | ||
535 | "NMI watchdog doesn't know what hardware to touch\n"); | ||
536 | return -EIO; | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | #endif /* CONFIG_SYSCTL */ | ||
542 | |||
543 | int do_nmi_callback(struct pt_regs *regs, int cpu) | ||
544 | { | ||
545 | #ifdef CONFIG_SYSCTL | ||
546 | if (unknown_nmi_panic) | ||
547 | return unknown_nmi_panic_callback(regs, cpu); | ||
548 | #endif | ||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | void arch_trigger_all_cpu_backtrace(void) | ||
553 | { | ||
554 | int i; | ||
555 | |||
556 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | ||
557 | |||
558 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
559 | apic->send_IPI_all(NMI_VECTOR); | ||
560 | |||
561 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
562 | for (i = 0; i < 10 * 1000; i++) { | ||
563 | if (cpumask_empty(to_cpumask(backtrace_mask))) | ||
564 | break; | ||
565 | mdelay(1); | ||
566 | } | ||
567 | } | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b68bda30938..1d59834396bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void) | |||
894 | #else | 894 | #else |
895 | vgetcpu_set_mode(); | 895 | vgetcpu_set_mode(); |
896 | #endif | 896 | #endif |
897 | init_hw_perf_events(); | ||
898 | } | 897 | } |
899 | 898 | ||
900 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 899 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6d75b9145b13..0a360d146596 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void) | |||
330 | { | 330 | { |
331 | int i; | 331 | int i; |
332 | 332 | ||
333 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
334 | disable_lapic_nmi_watchdog(); | ||
335 | |||
336 | for (i = 0; i < x86_pmu.num_counters; i++) { | 333 | for (i = 0; i < x86_pmu.num_counters; i++) { |
337 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
338 | goto perfctr_fail; | 335 | goto perfctr_fail; |
@@ -355,9 +352,6 @@ perfctr_fail: | |||
355 | for (i--; i >= 0; i--) | 352 | for (i--; i >= 0; i--) |
356 | release_perfctr_nmi(x86_pmu.perfctr + i); | 353 | release_perfctr_nmi(x86_pmu.perfctr + i); |
357 | 354 | ||
358 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
359 | enable_lapic_nmi_watchdog(); | ||
360 | |||
361 | return false; | 355 | return false; |
362 | } | 356 | } |
363 | 357 | ||
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void) | |||
369 | release_perfctr_nmi(x86_pmu.perfctr + i); | 363 | release_perfctr_nmi(x86_pmu.perfctr + i); |
370 | release_evntsel_nmi(x86_pmu.eventsel + i); | 364 | release_evntsel_nmi(x86_pmu.eventsel + i); |
371 | } | 365 | } |
372 | |||
373 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
374 | enable_lapic_nmi_watchdog(); | ||
375 | } | 366 | } |
376 | 367 | ||
377 | #else | 368 | #else |
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {} | |||
384 | static bool check_hw_exists(void) | 375 | static bool check_hw_exists(void) |
385 | { | 376 | { |
386 | u64 val, val_new = 0; | 377 | u64 val, val_new = 0; |
387 | int ret = 0; | 378 | int i, reg, ret = 0; |
379 | |||
380 | /* | ||
381 | * Check to see if the BIOS enabled any of the counters, if so | ||
382 | * complain and bail. | ||
383 | */ | ||
384 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
385 | reg = x86_pmu.eventsel + i; | ||
386 | ret = rdmsrl_safe(reg, &val); | ||
387 | if (ret) | ||
388 | goto msr_fail; | ||
389 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
390 | goto bios_fail; | ||
391 | } | ||
388 | 392 | ||
393 | if (x86_pmu.num_counters_fixed) { | ||
394 | reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
395 | ret = rdmsrl_safe(reg, &val); | ||
396 | if (ret) | ||
397 | goto msr_fail; | ||
398 | for (i = 0; i < x86_pmu.num_counters_fixed; i++) { | ||
399 | if (val & (0x03 << i*4)) | ||
400 | goto bios_fail; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * Now write a value and read it back to see if it matches, | ||
406 | * this is needed to detect certain hardware emulators (qemu/kvm) | ||
407 | * that don't trap on the MSR access and always return 0s. | ||
408 | */ | ||
389 | val = 0xabcdUL; | 409 | val = 0xabcdUL; |
390 | ret |= checking_wrmsrl(x86_pmu.perfctr, val); | 410 | ret = checking_wrmsrl(x86_pmu.perfctr, val); |
391 | ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); | 411 | ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); |
392 | if (ret || val != val_new) | 412 | if (ret || val != val_new) |
393 | return false; | 413 | goto msr_fail; |
394 | 414 | ||
395 | return true; | 415 | return true; |
416 | |||
417 | bios_fail: | ||
418 | printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); | ||
419 | printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); | ||
420 | return false; | ||
421 | |||
422 | msr_fail: | ||
423 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); | ||
424 | return false; | ||
396 | } | 425 | } |
397 | 426 | ||
398 | static void reserve_ds_buffers(void); | 427 | static void reserve_ds_buffers(void); |
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
451 | struct hw_perf_event *hwc = &event->hw; | 480 | struct hw_perf_event *hwc = &event->hw; |
452 | u64 config; | 481 | u64 config; |
453 | 482 | ||
454 | if (!hwc->sample_period) { | 483 | if (!is_sampling_event(event)) { |
455 | hwc->sample_period = x86_pmu.max_period; | 484 | hwc->sample_period = x86_pmu.max_period; |
456 | hwc->last_period = hwc->sample_period; | 485 | hwc->last_period = hwc->sample_period; |
457 | local64_set(&hwc->period_left, hwc->sample_period); | 486 | local64_set(&hwc->period_left, hwc->sample_period); |
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void) | |||
1362 | pr_info("no hardware sampling interrupt available.\n"); | 1391 | pr_info("no hardware sampling interrupt available.\n"); |
1363 | } | 1392 | } |
1364 | 1393 | ||
1365 | void __init init_hw_perf_events(void) | 1394 | int __init init_hw_perf_events(void) |
1366 | { | 1395 | { |
1367 | struct event_constraint *c; | 1396 | struct event_constraint *c; |
1368 | int err; | 1397 | int err; |
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void) | |||
1377 | err = amd_pmu_init(); | 1406 | err = amd_pmu_init(); |
1378 | break; | 1407 | break; |
1379 | default: | 1408 | default: |
1380 | return; | 1409 | return 0; |
1381 | } | 1410 | } |
1382 | if (err != 0) { | 1411 | if (err != 0) { |
1383 | pr_cont("no PMU driver, software events only.\n"); | 1412 | pr_cont("no PMU driver, software events only.\n"); |
1384 | return; | 1413 | return 0; |
1385 | } | 1414 | } |
1386 | 1415 | ||
1387 | pmu_check_apic(); | 1416 | pmu_check_apic(); |
1388 | 1417 | ||
1389 | /* sanity check that the hardware exists or is emulated */ | 1418 | /* sanity check that the hardware exists or is emulated */ |
1390 | if (!check_hw_exists()) { | 1419 | if (!check_hw_exists()) |
1391 | pr_cont("Broken PMU hardware detected, software events only.\n"); | 1420 | return 0; |
1392 | return; | ||
1393 | } | ||
1394 | 1421 | ||
1395 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1422 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1396 | 1423 | ||
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void) | |||
1438 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); | 1465 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1439 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); | 1466 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1440 | 1467 | ||
1441 | perf_pmu_register(&pmu); | 1468 | perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); |
1442 | perf_cpu_notifier(x86_pmu_notifier); | 1469 | perf_cpu_notifier(x86_pmu_notifier); |
1470 | |||
1471 | return 0; | ||
1443 | } | 1472 | } |
1473 | early_initcall(init_hw_perf_events); | ||
1444 | 1474 | ||
1445 | static inline void x86_pmu_read(struct perf_event *event) | 1475 | static inline void x86_pmu_read(struct perf_event *event) |
1446 | { | 1476 | { |
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1686 | 1716 | ||
1687 | perf_callchain_store(entry, regs->ip); | 1717 | perf_callchain_store(entry, regs->ip); |
1688 | 1718 | ||
1689 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1719 | dump_trace(NULL, regs, NULL, &backtrace_ops, entry); |
1690 | } | 1720 | } |
1691 | 1721 | ||
1692 | #ifdef CONFIG_COMPAT | 1722 | #ifdef CONFIG_COMPAT |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index e421b8cd6944..67e2202a6039 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -1,7 +1,5 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_AMD | 1 | #ifdef CONFIG_CPU_SUP_AMD |
2 | 2 | ||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | ||
4 | |||
5 | static __initconst const u64 amd_hw_cache_event_ids | 3 | static __initconst const u64 amd_hw_cache_event_ids |
6 | [PERF_COUNT_HW_CACHE_MAX] | 4 | [PERF_COUNT_HW_CACHE_MAX] |
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 5 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -275,7 +273,7 @@ done: | |||
275 | return &emptyconstraint; | 273 | return &emptyconstraint; |
276 | } | 274 | } |
277 | 275 | ||
278 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | 276 | static struct amd_nb *amd_alloc_nb(int cpu) |
279 | { | 277 | { |
280 | struct amd_nb *nb; | 278 | struct amd_nb *nb; |
281 | int i; | 279 | int i; |
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
285 | if (!nb) | 283 | if (!nb) |
286 | return NULL; | 284 | return NULL; |
287 | 285 | ||
288 | nb->nb_id = nb_id; | 286 | nb->nb_id = -1; |
289 | 287 | ||
290 | /* | 288 | /* |
291 | * initialize all possible NB constraints | 289 | * initialize all possible NB constraints |
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu) | |||
306 | if (boot_cpu_data.x86_max_cores < 2) | 304 | if (boot_cpu_data.x86_max_cores < 2) |
307 | return NOTIFY_OK; | 305 | return NOTIFY_OK; |
308 | 306 | ||
309 | cpuc->amd_nb = amd_alloc_nb(cpu, -1); | 307 | cpuc->amd_nb = amd_alloc_nb(cpu); |
310 | if (!cpuc->amd_nb) | 308 | if (!cpuc->amd_nb) |
311 | return NOTIFY_BAD; | 309 | return NOTIFY_BAD; |
312 | 310 | ||
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu) | |||
325 | nb_id = amd_get_nb_id(cpu); | 323 | nb_id = amd_get_nb_id(cpu); |
326 | WARN_ON_ONCE(nb_id == BAD_APICID); | 324 | WARN_ON_ONCE(nb_id == BAD_APICID); |
327 | 325 | ||
328 | raw_spin_lock(&amd_nb_lock); | ||
329 | |||
330 | for_each_online_cpu(i) { | 326 | for_each_online_cpu(i) { |
331 | nb = per_cpu(cpu_hw_events, i).amd_nb; | 327 | nb = per_cpu(cpu_hw_events, i).amd_nb; |
332 | if (WARN_ON_ONCE(!nb)) | 328 | if (WARN_ON_ONCE(!nb)) |
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu) | |||
341 | 337 | ||
342 | cpuc->amd_nb->nb_id = nb_id; | 338 | cpuc->amd_nb->nb_id = nb_id; |
343 | cpuc->amd_nb->refcnt++; | 339 | cpuc->amd_nb->refcnt++; |
344 | |||
345 | raw_spin_unlock(&amd_nb_lock); | ||
346 | } | 340 | } |
347 | 341 | ||
348 | static void amd_pmu_cpu_dead(int cpu) | 342 | static void amd_pmu_cpu_dead(int cpu) |
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu) | |||
354 | 348 | ||
355 | cpuhw = &per_cpu(cpu_hw_events, cpu); | 349 | cpuhw = &per_cpu(cpu_hw_events, cpu); |
356 | 350 | ||
357 | raw_spin_lock(&amd_nb_lock); | ||
358 | |||
359 | if (cpuhw->amd_nb) { | 351 | if (cpuhw->amd_nb) { |
360 | struct amd_nb *nb = cpuhw->amd_nb; | 352 | struct amd_nb *nb = cpuhw->amd_nb; |
361 | 353 | ||
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu) | |||
364 | 356 | ||
365 | cpuhw->amd_nb = NULL; | 357 | cpuhw->amd_nb = NULL; |
366 | } | 358 | } |
367 | |||
368 | raw_spin_unlock(&amd_nb_lock); | ||
369 | } | 359 | } |
370 | 360 | ||
371 | static __initconst const struct x86_pmu amd_pmu = { | 361 | static __initconst const struct x86_pmu amd_pmu = { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c8f5c088cad1..24e390e40f2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
816 | if (ret) | 816 | if (ret) |
817 | return ret; | 817 | return ret; |
818 | 818 | ||
819 | if (event->attr.precise_ip && | ||
820 | (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
821 | /* | ||
822 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | ||
823 | * (0x003c) so that we can use it with PEBS. | ||
824 | * | ||
825 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't | ||
826 | * PEBS capable. However we can use INST_RETIRED.ANY_P | ||
827 | * (0x00c0), which is a PEBS capable event, to get the same | ||
828 | * count. | ||
829 | * | ||
830 | * INST_RETIRED.ANY_P counts the number of cycles that retires | ||
831 | * CNTMASK instructions. By setting CNTMASK to a value (16) | ||
832 | * larger than the maximum number of instructions that can be | ||
833 | * retired per cycle (4) and then inverting the condition, we | ||
834 | * count all cycles that retire 16 or less instructions, which | ||
835 | * is every cycle. | ||
836 | * | ||
837 | * Thereby we gain a PEBS capable cycle counter. | ||
838 | */ | ||
839 | u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ | ||
840 | |||
841 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | ||
842 | event->hw.config = alt_config; | ||
843 | } | ||
844 | |||
819 | if (event->attr.type != PERF_TYPE_RAW) | 845 | if (event->attr.type != PERF_TYPE_RAW) |
820 | return 0; | 846 | return 0; |
821 | 847 | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d9f4ff8fcd69..d5a236615501 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -16,32 +16,12 @@ | |||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/nmi.h> | 19 | #include <asm/nmi.h> |
20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
21 | 21 | ||
22 | #include <asm/apic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/perf_event.h> | 23 | #include <asm/perf_event.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | ||
26 | unsigned int cccr_msr; | ||
27 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
28 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
29 | }; | ||
30 | |||
31 | /* Interface defining a CPU specific perfctr watchdog */ | ||
32 | struct wd_ops { | ||
33 | int (*reserve)(void); | ||
34 | void (*unreserve)(void); | ||
35 | int (*setup)(unsigned nmi_hz); | ||
36 | void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); | ||
37 | void (*stop)(void); | ||
38 | unsigned perfctr; | ||
39 | unsigned evntsel; | ||
40 | u64 checkbit; | ||
41 | }; | ||
42 | |||
43 | static const struct wd_ops *wd_ops; | ||
44 | |||
45 | /* | 25 | /* |
46 | * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 26 | * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
47 | * offset from MSR_P4_BSU_ESCR0. | 27 | * offset from MSR_P4_BSU_ESCR0. |
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops; | |||
60 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); | 40 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); |
61 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); | 41 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); |
62 | 42 | ||
63 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
64 | |||
65 | /* converts an msr to an appropriate reservation bit */ | 43 | /* converts an msr to an appropriate reservation bit */ |
66 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | 44 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) |
67 | { | 45 | { |
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr) | |||
172 | clear_bit(counter, evntsel_nmi_owner); | 150 | clear_bit(counter, evntsel_nmi_owner); |
173 | } | 151 | } |
174 | EXPORT_SYMBOL(release_evntsel_nmi); | 152 | EXPORT_SYMBOL(release_evntsel_nmi); |
175 | |||
176 | void disable_lapic_nmi_watchdog(void) | ||
177 | { | ||
178 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
179 | |||
180 | if (atomic_read(&nmi_active) <= 0) | ||
181 | return; | ||
182 | |||
183 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
184 | |||
185 | if (wd_ops) | ||
186 | wd_ops->unreserve(); | ||
187 | |||
188 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
189 | } | ||
190 | |||
191 | void enable_lapic_nmi_watchdog(void) | ||
192 | { | ||
193 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
194 | |||
195 | /* are we already enabled */ | ||
196 | if (atomic_read(&nmi_active) != 0) | ||
197 | return; | ||
198 | |||
199 | /* are we lapic aware */ | ||
200 | if (!wd_ops) | ||
201 | return; | ||
202 | if (!wd_ops->reserve()) { | ||
203 | printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); | ||
204 | return; | ||
205 | } | ||
206 | |||
207 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); | ||
208 | touch_nmi_watchdog(); | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Activate the NMI watchdog via the local APIC. | ||
213 | */ | ||
214 | |||
215 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
216 | { | ||
217 | u64 counter_val; | ||
218 | unsigned int retval = hz; | ||
219 | |||
220 | /* | ||
221 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
222 | * are writable, with higher bits sign extending from bit 31. | ||
223 | * So, we can only program the counter with 31 bit values and | ||
224 | * 32nd bit should be 1, for 33.. to be 1. | ||
225 | * Find the appropriate nmi_hz | ||
226 | */ | ||
227 | counter_val = (u64)cpu_khz * 1000; | ||
228 | do_div(counter_val, retval); | ||
229 | if (counter_val > 0x7fffffffULL) { | ||
230 | u64 count = (u64)cpu_khz * 1000; | ||
231 | do_div(count, 0x7fffffffUL); | ||
232 | retval = count + 1; | ||
233 | } | ||
234 | return retval; | ||
235 | } | ||
236 | |||
237 | static void write_watchdog_counter(unsigned int perfctr_msr, | ||
238 | const char *descr, unsigned nmi_hz) | ||
239 | { | ||
240 | u64 count = (u64)cpu_khz * 1000; | ||
241 | |||
242 | do_div(count, nmi_hz); | ||
243 | if (descr) | ||
244 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | ||
245 | wrmsrl(perfctr_msr, 0 - count); | ||
246 | } | ||
247 | |||
248 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
249 | const char *descr, unsigned nmi_hz) | ||
250 | { | ||
251 | u64 count = (u64)cpu_khz * 1000; | ||
252 | |||
253 | do_div(count, nmi_hz); | ||
254 | if (descr) | ||
255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | ||
256 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * AMD K7/K8/Family10h/Family11h support. | ||
261 | * AMD keeps this interface nicely stable so there is not much variety | ||
262 | */ | ||
263 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
264 | #define K7_EVNTSEL_INT (1 << 20) | ||
265 | #define K7_EVNTSEL_OS (1 << 17) | ||
266 | #define K7_EVNTSEL_USR (1 << 16) | ||
267 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
268 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
269 | |||
270 | static int setup_k7_watchdog(unsigned nmi_hz) | ||
271 | { | ||
272 | unsigned int perfctr_msr, evntsel_msr; | ||
273 | unsigned int evntsel; | ||
274 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
275 | |||
276 | perfctr_msr = wd_ops->perfctr; | ||
277 | evntsel_msr = wd_ops->evntsel; | ||
278 | |||
279 | wrmsrl(perfctr_msr, 0UL); | ||
280 | |||
281 | evntsel = K7_EVNTSEL_INT | ||
282 | | K7_EVNTSEL_OS | ||
283 | | K7_EVNTSEL_USR | ||
284 | | K7_NMI_EVENT; | ||
285 | |||
286 | /* setup the timer */ | ||
287 | wrmsr(evntsel_msr, evntsel, 0); | ||
288 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); | ||
289 | |||
290 | /* initialize the wd struct before enabling */ | ||
291 | wd->perfctr_msr = perfctr_msr; | ||
292 | wd->evntsel_msr = evntsel_msr; | ||
293 | wd->cccr_msr = 0; /* unused */ | ||
294 | |||
295 | /* ok, everything is initialized, announce that we're set */ | ||
296 | cpu_nmi_set_wd_enabled(); | ||
297 | |||
298 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
299 | evntsel |= K7_EVNTSEL_ENABLE; | ||
300 | wrmsr(evntsel_msr, evntsel, 0); | ||
301 | |||
302 | return 1; | ||
303 | } | ||
304 | |||
305 | static void single_msr_stop_watchdog(void) | ||
306 | { | ||
307 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
308 | |||
309 | wrmsr(wd->evntsel_msr, 0, 0); | ||
310 | } | ||
311 | |||
312 | static int single_msr_reserve(void) | ||
313 | { | ||
314 | if (!reserve_perfctr_nmi(wd_ops->perfctr)) | ||
315 | return 0; | ||
316 | |||
317 | if (!reserve_evntsel_nmi(wd_ops->evntsel)) { | ||
318 | release_perfctr_nmi(wd_ops->perfctr); | ||
319 | return 0; | ||
320 | } | ||
321 | return 1; | ||
322 | } | ||
323 | |||
324 | static void single_msr_unreserve(void) | ||
325 | { | ||
326 | release_evntsel_nmi(wd_ops->evntsel); | ||
327 | release_perfctr_nmi(wd_ops->perfctr); | ||
328 | } | ||
329 | |||
330 | static void __kprobes | ||
331 | single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
332 | { | ||
333 | /* start the cycle over again */ | ||
334 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
335 | } | ||
336 | |||
337 | static const struct wd_ops k7_wd_ops = { | ||
338 | .reserve = single_msr_reserve, | ||
339 | .unreserve = single_msr_unreserve, | ||
340 | .setup = setup_k7_watchdog, | ||
341 | .rearm = single_msr_rearm, | ||
342 | .stop = single_msr_stop_watchdog, | ||
343 | .perfctr = MSR_K7_PERFCTR0, | ||
344 | .evntsel = MSR_K7_EVNTSEL0, | ||
345 | .checkbit = 1ULL << 47, | ||
346 | }; | ||
347 | |||
348 | /* | ||
349 | * Intel Model 6 (PPro+,P2,P3,P-M,Core1) | ||
350 | */ | ||
351 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
352 | #define P6_EVNTSEL_INT (1 << 20) | ||
353 | #define P6_EVNTSEL_OS (1 << 17) | ||
354 | #define P6_EVNTSEL_USR (1 << 16) | ||
355 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
356 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
357 | |||
358 | static int setup_p6_watchdog(unsigned nmi_hz) | ||
359 | { | ||
360 | unsigned int perfctr_msr, evntsel_msr; | ||
361 | unsigned int evntsel; | ||
362 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
363 | |||
364 | perfctr_msr = wd_ops->perfctr; | ||
365 | evntsel_msr = wd_ops->evntsel; | ||
366 | |||
367 | /* KVM doesn't implement this MSR */ | ||
368 | if (wrmsr_safe(perfctr_msr, 0, 0) < 0) | ||
369 | return 0; | ||
370 | |||
371 | evntsel = P6_EVNTSEL_INT | ||
372 | | P6_EVNTSEL_OS | ||
373 | | P6_EVNTSEL_USR | ||
374 | | P6_NMI_EVENT; | ||
375 | |||
376 | /* setup the timer */ | ||
377 | wrmsr(evntsel_msr, evntsel, 0); | ||
378 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
379 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); | ||
380 | |||
381 | /* initialize the wd struct before enabling */ | ||
382 | wd->perfctr_msr = perfctr_msr; | ||
383 | wd->evntsel_msr = evntsel_msr; | ||
384 | wd->cccr_msr = 0; /* unused */ | ||
385 | |||
386 | /* ok, everything is initialized, announce that we're set */ | ||
387 | cpu_nmi_set_wd_enabled(); | ||
388 | |||
389 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
390 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
391 | wrmsr(evntsel_msr, evntsel, 0); | ||
392 | |||
393 | return 1; | ||
394 | } | ||
395 | |||
396 | static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
397 | { | ||
398 | /* | ||
399 | * P6 based Pentium M need to re-unmask | ||
400 | * the apic vector but it doesn't hurt | ||
401 | * other P6 variant. | ||
402 | * ArchPerfom/Core Duo also needs this | ||
403 | */ | ||
404 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
405 | |||
406 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
407 | write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); | ||
408 | } | ||
409 | |||
410 | static const struct wd_ops p6_wd_ops = { | ||
411 | .reserve = single_msr_reserve, | ||
412 | .unreserve = single_msr_unreserve, | ||
413 | .setup = setup_p6_watchdog, | ||
414 | .rearm = p6_rearm, | ||
415 | .stop = single_msr_stop_watchdog, | ||
416 | .perfctr = MSR_P6_PERFCTR0, | ||
417 | .evntsel = MSR_P6_EVNTSEL0, | ||
418 | .checkbit = 1ULL << 39, | ||
419 | }; | ||
420 | |||
421 | /* | ||
422 | * Intel P4 performance counters. | ||
423 | * By far the most complicated of all. | ||
424 | */ | ||
425 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) | ||
426 | #define P4_ESCR_EVENT_SELECT(N) ((N) << 25) | ||
427 | #define P4_ESCR_OS (1 << 3) | ||
428 | #define P4_ESCR_USR (1 << 2) | ||
429 | #define P4_CCCR_OVF_PMI0 (1 << 26) | ||
430 | #define P4_CCCR_OVF_PMI1 (1 << 27) | ||
431 | #define P4_CCCR_THRESHOLD(N) ((N) << 20) | ||
432 | #define P4_CCCR_COMPLEMENT (1 << 19) | ||
433 | #define P4_CCCR_COMPARE (1 << 18) | ||
434 | #define P4_CCCR_REQUIRED (3 << 16) | ||
435 | #define P4_CCCR_ESCR_SELECT(N) ((N) << 13) | ||
436 | #define P4_CCCR_ENABLE (1 << 12) | ||
437 | #define P4_CCCR_OVF (1 << 31) | ||
438 | |||
439 | #define P4_CONTROLS 18 | ||
440 | static unsigned int p4_controls[18] = { | ||
441 | MSR_P4_BPU_CCCR0, | ||
442 | MSR_P4_BPU_CCCR1, | ||
443 | MSR_P4_BPU_CCCR2, | ||
444 | MSR_P4_BPU_CCCR3, | ||
445 | MSR_P4_MS_CCCR0, | ||
446 | MSR_P4_MS_CCCR1, | ||
447 | MSR_P4_MS_CCCR2, | ||
448 | MSR_P4_MS_CCCR3, | ||
449 | MSR_P4_FLAME_CCCR0, | ||
450 | MSR_P4_FLAME_CCCR1, | ||
451 | MSR_P4_FLAME_CCCR2, | ||
452 | MSR_P4_FLAME_CCCR3, | ||
453 | MSR_P4_IQ_CCCR0, | ||
454 | MSR_P4_IQ_CCCR1, | ||
455 | MSR_P4_IQ_CCCR2, | ||
456 | MSR_P4_IQ_CCCR3, | ||
457 | MSR_P4_IQ_CCCR4, | ||
458 | MSR_P4_IQ_CCCR5, | ||
459 | }; | ||
460 | /* | ||
461 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
462 | * CRU_ESCR0 (with any non-null event selector) through a complemented | ||
463 | * max threshold. [IA32-Vol3, Section 14.9.9] | ||
464 | */ | ||
465 | static int setup_p4_watchdog(unsigned nmi_hz) | ||
466 | { | ||
467 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
468 | unsigned int evntsel, cccr_val; | ||
469 | unsigned int misc_enable, dummy; | ||
470 | unsigned int ht_num; | ||
471 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
472 | |||
473 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
474 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
475 | return 0; | ||
476 | |||
477 | #ifdef CONFIG_SMP | ||
478 | /* detect which hyperthread we are on */ | ||
479 | if (smp_num_siblings == 2) { | ||
480 | unsigned int ebx, apicid; | ||
481 | |||
482 | ebx = cpuid_ebx(1); | ||
483 | apicid = (ebx >> 24) & 0xff; | ||
484 | ht_num = apicid & 1; | ||
485 | } else | ||
486 | #endif | ||
487 | ht_num = 0; | ||
488 | |||
489 | /* | ||
490 | * performance counters are shared resources | ||
491 | * assign each hyperthread its own set | ||
492 | * (re-use the ESCR0 register, seems safe | ||
493 | * and keeps the cccr_val the same) | ||
494 | */ | ||
495 | if (!ht_num) { | ||
496 | /* logical cpu 0 */ | ||
497 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
498 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
499 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
500 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
501 | |||
502 | /* | ||
503 | * If we're on the kdump kernel or other situation, we may | ||
504 | * still have other performance counter registers set to | ||
505 | * interrupt and they'll keep interrupting forever because | ||
506 | * of the P4_CCCR_OVF quirk. So we need to ACK all the | ||
507 | * pending interrupts and disable all the registers here, | ||
508 | * before reenabling the NMI delivery. Refer to p4_rearm() | ||
509 | * about the P4_CCCR_OVF quirk. | ||
510 | */ | ||
511 | if (reset_devices) { | ||
512 | unsigned int low, high; | ||
513 | int i; | ||
514 | |||
515 | for (i = 0; i < P4_CONTROLS; i++) { | ||
516 | rdmsr(p4_controls[i], low, high); | ||
517 | low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); | ||
518 | wrmsr(p4_controls[i], low, high); | ||
519 | } | ||
520 | } | ||
521 | } else { | ||
522 | /* logical cpu 1 */ | ||
523 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
524 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
525 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
526 | |||
527 | /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ | ||
528 | if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) | ||
529 | cccr_val = P4_CCCR_OVF_PMI0; | ||
530 | else | ||
531 | cccr_val = P4_CCCR_OVF_PMI1; | ||
532 | cccr_val |= P4_CCCR_ESCR_SELECT(4); | ||
533 | } | ||
534 | |||
535 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
536 | | P4_ESCR_OS | ||
537 | | P4_ESCR_USR; | ||
538 | |||
539 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
540 | | P4_CCCR_COMPLEMENT | ||
541 | | P4_CCCR_COMPARE | ||
542 | | P4_CCCR_REQUIRED; | ||
543 | |||
544 | wrmsr(evntsel_msr, evntsel, 0); | ||
545 | wrmsr(cccr_msr, cccr_val, 0); | ||
546 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | ||
547 | |||
548 | wd->perfctr_msr = perfctr_msr; | ||
549 | wd->evntsel_msr = evntsel_msr; | ||
550 | wd->cccr_msr = cccr_msr; | ||
551 | |||
552 | /* ok, everything is initialized, announce that we're set */ | ||
553 | cpu_nmi_set_wd_enabled(); | ||
554 | |||
555 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
556 | cccr_val |= P4_CCCR_ENABLE; | ||
557 | wrmsr(cccr_msr, cccr_val, 0); | ||
558 | return 1; | ||
559 | } | ||
560 | |||
561 | static void stop_p4_watchdog(void) | ||
562 | { | ||
563 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
564 | wrmsr(wd->cccr_msr, 0, 0); | ||
565 | wrmsr(wd->evntsel_msr, 0, 0); | ||
566 | } | ||
567 | |||
568 | static int p4_reserve(void) | ||
569 | { | ||
570 | if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) | ||
571 | return 0; | ||
572 | #ifdef CONFIG_SMP | ||
573 | if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) | ||
574 | goto fail1; | ||
575 | #endif | ||
576 | if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | ||
577 | goto fail2; | ||
578 | /* RED-PEN why is ESCR1 not reserved here? */ | ||
579 | return 1; | ||
580 | fail2: | ||
581 | #ifdef CONFIG_SMP | ||
582 | if (smp_num_siblings > 1) | ||
583 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
584 | fail1: | ||
585 | #endif | ||
586 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | static void p4_unreserve(void) | ||
591 | { | ||
592 | #ifdef CONFIG_SMP | ||
593 | if (smp_num_siblings > 1) | ||
594 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
595 | #endif | ||
596 | release_evntsel_nmi(MSR_P4_CRU_ESCR0); | ||
597 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
598 | } | ||
599 | |||
600 | static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
601 | { | ||
602 | unsigned dummy; | ||
603 | /* | ||
604 | * P4 quirks: | ||
605 | * - An overflown perfctr will assert its interrupt | ||
606 | * until the OVF flag in its CCCR is cleared. | ||
607 | * - LVTPC is masked on interrupt and must be | ||
608 | * unmasked by the LVTPC handler. | ||
609 | */ | ||
610 | rdmsrl(wd->cccr_msr, dummy); | ||
611 | dummy &= ~P4_CCCR_OVF; | ||
612 | wrmsrl(wd->cccr_msr, dummy); | ||
613 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
614 | /* start the cycle over again */ | ||
615 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
616 | } | ||
617 | |||
618 | static const struct wd_ops p4_wd_ops = { | ||
619 | .reserve = p4_reserve, | ||
620 | .unreserve = p4_unreserve, | ||
621 | .setup = setup_p4_watchdog, | ||
622 | .rearm = p4_rearm, | ||
623 | .stop = stop_p4_watchdog, | ||
624 | /* RED-PEN this is wrong for the other sibling */ | ||
625 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
626 | .evntsel = MSR_P4_BSU_ESCR0, | ||
627 | .checkbit = 1ULL << 39, | ||
628 | }; | ||
629 | |||
630 | /* | ||
631 | * Watchdog using the Intel architected PerfMon. | ||
632 | * Used for Core2 and hopefully all future Intel CPUs. | ||
633 | */ | ||
634 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
635 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
636 | |||
637 | static struct wd_ops intel_arch_wd_ops; | ||
638 | |||
639 | static int setup_intel_arch_watchdog(unsigned nmi_hz) | ||
640 | { | ||
641 | unsigned int ebx; | ||
642 | union cpuid10_eax eax; | ||
643 | unsigned int unused; | ||
644 | unsigned int perfctr_msr, evntsel_msr; | ||
645 | unsigned int evntsel; | ||
646 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
647 | |||
648 | /* | ||
649 | * Check whether the Architectural PerfMon supports | ||
650 | * Unhalted Core Cycles Event or not. | ||
651 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
652 | */ | ||
653 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
654 | if ((eax.split.mask_length < | ||
655 | (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
656 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
657 | return 0; | ||
658 | |||
659 | perfctr_msr = wd_ops->perfctr; | ||
660 | evntsel_msr = wd_ops->evntsel; | ||
661 | |||
662 | wrmsrl(perfctr_msr, 0UL); | ||
663 | |||
664 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
665 | | ARCH_PERFMON_EVENTSEL_OS | ||
666 | | ARCH_PERFMON_EVENTSEL_USR | ||
667 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
668 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
669 | |||
670 | /* setup the timer */ | ||
671 | wrmsr(evntsel_msr, evntsel, 0); | ||
672 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
673 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | ||
674 | |||
675 | wd->perfctr_msr = perfctr_msr; | ||
676 | wd->evntsel_msr = evntsel_msr; | ||
677 | wd->cccr_msr = 0; /* unused */ | ||
678 | |||
679 | /* ok, everything is initialized, announce that we're set */ | ||
680 | cpu_nmi_set_wd_enabled(); | ||
681 | |||
682 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
683 | evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
684 | wrmsr(evntsel_msr, evntsel, 0); | ||
685 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | ||
686 | return 1; | ||
687 | } | ||
688 | |||
689 | static struct wd_ops intel_arch_wd_ops __read_mostly = { | ||
690 | .reserve = single_msr_reserve, | ||
691 | .unreserve = single_msr_unreserve, | ||
692 | .setup = setup_intel_arch_watchdog, | ||
693 | .rearm = p6_rearm, | ||
694 | .stop = single_msr_stop_watchdog, | ||
695 | .perfctr = MSR_ARCH_PERFMON_PERFCTR1, | ||
696 | .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, | ||
697 | }; | ||
698 | |||
699 | static void probe_nmi_watchdog(void) | ||
700 | { | ||
701 | switch (boot_cpu_data.x86_vendor) { | ||
702 | case X86_VENDOR_AMD: | ||
703 | if (boot_cpu_data.x86 == 6 || | ||
704 | (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) | ||
705 | wd_ops = &k7_wd_ops; | ||
706 | return; | ||
707 | case X86_VENDOR_INTEL: | ||
708 | /* Work around where perfctr1 doesn't have a working enable | ||
709 | * bit as described in the following errata: | ||
710 | * AE49 Core Duo and Intel Core Solo 65 nm | ||
711 | * AN49 Intel Pentium Dual-Core | ||
712 | * AF49 Dual-Core Intel Xeon Processor LV | ||
713 | */ | ||
714 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || | ||
715 | ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && | ||
716 | boot_cpu_data.x86_mask == 4))) { | ||
717 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; | ||
718 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; | ||
719 | } | ||
720 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
721 | wd_ops = &intel_arch_wd_ops; | ||
722 | break; | ||
723 | } | ||
724 | switch (boot_cpu_data.x86) { | ||
725 | case 6: | ||
726 | if (boot_cpu_data.x86_model > 13) | ||
727 | return; | ||
728 | |||
729 | wd_ops = &p6_wd_ops; | ||
730 | break; | ||
731 | case 15: | ||
732 | wd_ops = &p4_wd_ops; | ||
733 | break; | ||
734 | default: | ||
735 | return; | ||
736 | } | ||
737 | break; | ||
738 | } | ||
739 | } | ||
740 | |||
741 | /* Interface to nmi.c */ | ||
742 | |||
743 | int lapic_watchdog_init(unsigned nmi_hz) | ||
744 | { | ||
745 | if (!wd_ops) { | ||
746 | probe_nmi_watchdog(); | ||
747 | if (!wd_ops) { | ||
748 | printk(KERN_INFO "NMI watchdog: CPU not supported\n"); | ||
749 | return -1; | ||
750 | } | ||
751 | |||
752 | if (!wd_ops->reserve()) { | ||
753 | printk(KERN_ERR | ||
754 | "NMI watchdog: cannot reserve perfctrs\n"); | ||
755 | return -1; | ||
756 | } | ||
757 | } | ||
758 | |||
759 | if (!(wd_ops->setup(nmi_hz))) { | ||
760 | printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", | ||
761 | raw_smp_processor_id()); | ||
762 | return -1; | ||
763 | } | ||
764 | |||
765 | return 0; | ||
766 | } | ||
767 | |||
768 | void lapic_watchdog_stop(void) | ||
769 | { | ||
770 | if (wd_ops) | ||
771 | wd_ops->stop(); | ||
772 | } | ||
773 | |||
774 | unsigned lapic_adjust_nmi_hz(unsigned hz) | ||
775 | { | ||
776 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
777 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
778 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) | ||
779 | hz = adjust_for_32bit_ctr(hz); | ||
780 | return hz; | ||
781 | } | ||
782 | |||
783 | int __kprobes lapic_wd_event(unsigned nmi_hz) | ||
784 | { | ||
785 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
786 | u64 ctr; | ||
787 | |||
788 | rdmsrl(wd->perfctr_msr, ctr); | ||
789 | if (ctr & wd_ops->checkbit) /* perfctr still running? */ | ||
790 | return 0; | ||
791 | |||
792 | wd_ops->rearm(wd, nmi_hz); | ||
793 | return 1; | ||
794 | } | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6e8752c1bd52..8474c998cbd4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { | |||
175 | 175 | ||
176 | void | 176 | void |
177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
178 | unsigned long *stack, unsigned long bp, char *log_lvl) | 178 | unsigned long *stack, char *log_lvl) |
179 | { | 179 | { |
180 | printk("%sCall Trace:\n", log_lvl); | 180 | printk("%sCall Trace:\n", log_lvl); |
181 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 181 | dump_trace(task, regs, stack, &print_trace_ops, log_lvl); |
182 | } | 182 | } |
183 | 183 | ||
184 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 184 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
185 | unsigned long *stack, unsigned long bp) | 185 | unsigned long *stack) |
186 | { | 186 | { |
187 | show_trace_log_lvl(task, regs, stack, bp, ""); | 187 | show_trace_log_lvl(task, regs, stack, ""); |
188 | } | 188 | } |
189 | 189 | ||
190 | void show_stack(struct task_struct *task, unsigned long *sp) | 190 | void show_stack(struct task_struct *task, unsigned long *sp) |
191 | { | 191 | { |
192 | show_stack_log_lvl(task, NULL, sp, 0, ""); | 192 | show_stack_log_lvl(task, NULL, sp, ""); |
193 | } | 193 | } |
194 | 194 | ||
195 | /* | 195 | /* |
@@ -210,7 +210,7 @@ void dump_stack(void) | |||
210 | init_utsname()->release, | 210 | init_utsname()->release, |
211 | (int)strcspn(init_utsname()->version, " "), | 211 | (int)strcspn(init_utsname()->version, " "), |
212 | init_utsname()->version); | 212 | init_utsname()->version); |
213 | show_trace(NULL, NULL, &stack, bp); | 213 | show_trace(NULL, NULL, &stack); |
214 | } | 214 | } |
215 | EXPORT_SYMBOL(dump_stack); | 215 | EXPORT_SYMBOL(dump_stack); |
216 | 216 | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1bc7f75a5bda..74cc1eda384b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,11 +17,12 @@ | |||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | 19 | ||
20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 20 | void dump_trace(struct task_struct *task, |
21 | unsigned long *stack, unsigned long bp, | 21 | struct pt_regs *regs, unsigned long *stack, |
22 | const struct stacktrace_ops *ops, void *data) | 22 | const struct stacktrace_ops *ops, void *data) |
23 | { | 23 | { |
24 | int graph = 0; | 24 | int graph = 0; |
25 | unsigned long bp; | ||
25 | 26 | ||
26 | if (!task) | 27 | if (!task) |
27 | task = current; | 28 | task = current; |
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
34 | stack = (unsigned long *)task->thread.sp; | 35 | stack = (unsigned long *)task->thread.sp; |
35 | } | 36 | } |
36 | 37 | ||
37 | #ifdef CONFIG_FRAME_POINTER | 38 | bp = stack_frame(task, regs); |
38 | if (!bp) { | ||
39 | if (task == current) { | ||
40 | /* Grab bp right from our regs */ | ||
41 | get_bp(bp); | ||
42 | } else { | ||
43 | /* bp is the last reg pushed by switch_to */ | ||
44 | bp = *(unsigned long *) task->thread.sp; | ||
45 | } | ||
46 | } | ||
47 | #endif | ||
48 | |||
49 | for (;;) { | 39 | for (;;) { |
50 | struct thread_info *context; | 40 | struct thread_info *context; |
51 | 41 | ||
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace); | |||
65 | 55 | ||
66 | void | 56 | void |
67 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 57 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
68 | unsigned long *sp, unsigned long bp, char *log_lvl) | 58 | unsigned long *sp, char *log_lvl) |
69 | { | 59 | { |
70 | unsigned long *stack; | 60 | unsigned long *stack; |
71 | int i; | 61 | int i; |
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
87 | touch_nmi_watchdog(); | 77 | touch_nmi_watchdog(); |
88 | } | 78 | } |
89 | printk(KERN_CONT "\n"); | 79 | printk(KERN_CONT "\n"); |
90 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 80 | show_trace_log_lvl(task, regs, sp, log_lvl); |
91 | } | 81 | } |
92 | 82 | ||
93 | 83 | ||
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs) | |||
112 | u8 *ip; | 102 | u8 *ip; |
113 | 103 | ||
114 | printk(KERN_EMERG "Stack:\n"); | 104 | printk(KERN_EMERG "Stack:\n"); |
115 | show_stack_log_lvl(NULL, regs, ®s->sp, | 105 | show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); |
116 | 0, KERN_EMERG); | ||
117 | 106 | ||
118 | printk(KERN_EMERG "Code: "); | 107 | printk(KERN_EMERG "Code: "); |
119 | 108 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6a340485249a..64101335de19 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | |||
139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
140 | */ | 140 | */ |
141 | 141 | ||
142 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 142 | void dump_trace(struct task_struct *task, |
143 | unsigned long *stack, unsigned long bp, | 143 | struct pt_regs *regs, unsigned long *stack, |
144 | const struct stacktrace_ops *ops, void *data) | 144 | const struct stacktrace_ops *ops, void *data) |
145 | { | 145 | { |
146 | const unsigned cpu = get_cpu(); | 146 | const unsigned cpu = get_cpu(); |
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
149 | unsigned used = 0; | 149 | unsigned used = 0; |
150 | struct thread_info *tinfo; | 150 | struct thread_info *tinfo; |
151 | int graph = 0; | 151 | int graph = 0; |
152 | unsigned long bp; | ||
152 | 153 | ||
153 | if (!task) | 154 | if (!task) |
154 | task = current; | 155 | task = current; |
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
160 | stack = (unsigned long *)task->thread.sp; | 161 | stack = (unsigned long *)task->thread.sp; |
161 | } | 162 | } |
162 | 163 | ||
163 | #ifdef CONFIG_FRAME_POINTER | 164 | bp = stack_frame(task, regs); |
164 | if (!bp) { | ||
165 | if (task == current) { | ||
166 | /* Grab bp right from our regs */ | ||
167 | get_bp(bp); | ||
168 | } else { | ||
169 | /* bp is the last reg pushed by switch_to */ | ||
170 | bp = *(unsigned long *) task->thread.sp; | ||
171 | } | ||
172 | } | ||
173 | #endif | ||
174 | |||
175 | /* | 165 | /* |
176 | * Print function call entries in all stacks, starting at the | 166 | * Print function call entries in all stacks, starting at the |
177 | * current stack address. If the stacks consist of nested | 167 | * current stack address. If the stacks consist of nested |
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); | |||
235 | 225 | ||
236 | void | 226 | void |
237 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 227 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
238 | unsigned long *sp, unsigned long bp, char *log_lvl) | 228 | unsigned long *sp, char *log_lvl) |
239 | { | 229 | { |
240 | unsigned long *irq_stack_end; | 230 | unsigned long *irq_stack_end; |
241 | unsigned long *irq_stack; | 231 | unsigned long *irq_stack; |
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
279 | preempt_enable(); | 269 | preempt_enable(); |
280 | 270 | ||
281 | printk(KERN_CONT "\n"); | 271 | printk(KERN_CONT "\n"); |
282 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 272 | show_trace_log_lvl(task, regs, sp, log_lvl); |
283 | } | 273 | } |
284 | 274 | ||
285 | void show_registers(struct pt_regs *regs) | 275 | void show_registers(struct pt_regs *regs) |
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs) | |||
308 | 298 | ||
309 | printk(KERN_EMERG "Stack:\n"); | 299 | printk(KERN_EMERG "Stack:\n"); |
310 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 300 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
311 | regs->bp, KERN_EMERG); | 301 | KERN_EMERG); |
312 | 302 | ||
313 | printk(KERN_EMERG "Code: "); | 303 | printk(KERN_EMERG "Code: "); |
314 | 304 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1cbd54c0df99..5940282bd2f9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1184 | { | 1184 | { |
1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
1186 | 1186 | ||
1187 | /* This is possible if op is under delayed unoptimizing */ | ||
1188 | if (kprobe_disabled(&op->kp)) | ||
1189 | return; | ||
1190 | |||
1187 | preempt_disable(); | 1191 | preempt_disable(); |
1188 | if (kprobe_running()) { | 1192 | if (kprobe_running()) { |
1189 | kprobes_inc_nmissed_count(&op->kp); | 1193 | kprobes_inc_nmissed_count(&op->kp); |
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | |||
1401 | return 0; | 1405 | return 0; |
1402 | } | 1406 | } |
1403 | 1407 | ||
1404 | /* Replace a breakpoint (int3) with a relative jump. */ | 1408 | #define MAX_OPTIMIZE_PROBES 256 |
1405 | int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | 1409 | static struct text_poke_param *jump_poke_params; |
1410 | static struct jump_poke_buffer { | ||
1411 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1412 | } *jump_poke_bufs; | ||
1413 | |||
1414 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
1415 | u8 *insn_buf, | ||
1416 | struct optimized_kprobe *op) | ||
1406 | { | 1417 | { |
1407 | unsigned char jmp_code[RELATIVEJUMP_SIZE]; | ||
1408 | s32 rel = (s32)((long)op->optinsn.insn - | 1418 | s32 rel = (s32)((long)op->optinsn.insn - |
1409 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | 1419 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); |
1410 | 1420 | ||
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | |||
1412 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | 1422 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, |
1413 | RELATIVE_ADDR_SIZE); | 1423 | RELATIVE_ADDR_SIZE); |
1414 | 1424 | ||
1415 | jmp_code[0] = RELATIVEJUMP_OPCODE; | 1425 | insn_buf[0] = RELATIVEJUMP_OPCODE; |
1416 | *(s32 *)(&jmp_code[1]) = rel; | 1426 | *(s32 *)(&insn_buf[1]) = rel; |
1427 | |||
1428 | tprm->addr = op->kp.addr; | ||
1429 | tprm->opcode = insn_buf; | ||
1430 | tprm->len = RELATIVEJUMP_SIZE; | ||
1431 | } | ||
1432 | |||
1433 | /* | ||
1434 | * Replace breakpoints (int3) with relative jumps. | ||
1435 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
1436 | */ | ||
1437 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
1438 | { | ||
1439 | struct optimized_kprobe *op, *tmp; | ||
1440 | int c = 0; | ||
1441 | |||
1442 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1443 | WARN_ON(kprobe_disabled(&op->kp)); | ||
1444 | /* Setup param */ | ||
1445 | setup_optimize_kprobe(&jump_poke_params[c], | ||
1446 | jump_poke_bufs[c].buf, op); | ||
1447 | list_del_init(&op->list); | ||
1448 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1449 | break; | ||
1450 | } | ||
1417 | 1451 | ||
1418 | /* | 1452 | /* |
1419 | * text_poke_smp doesn't support NMI/MCE code modifying. | 1453 | * text_poke_smp doesn't support NMI/MCE code modifying. |
1420 | * However, since kprobes itself also doesn't support NMI/MCE | 1454 | * However, since kprobes itself also doesn't support NMI/MCE |
1421 | * code probing, it's not a problem. | 1455 | * code probing, it's not a problem. |
1422 | */ | 1456 | */ |
1423 | text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); | 1457 | text_poke_smp_batch(jump_poke_params, c); |
1424 | return 0; | 1458 | } |
1459 | |||
1460 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
1461 | u8 *insn_buf, | ||
1462 | struct optimized_kprobe *op) | ||
1463 | { | ||
1464 | /* Set int3 to first byte for kprobes */ | ||
1465 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
1466 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1467 | |||
1468 | tprm->addr = op->kp.addr; | ||
1469 | tprm->opcode = insn_buf; | ||
1470 | tprm->len = RELATIVEJUMP_SIZE; | ||
1471 | } | ||
1472 | |||
1473 | /* | ||
1474 | * Recover original instructions and breakpoints from relative jumps. | ||
1475 | * Caller must call with locking kprobe_mutex. | ||
1476 | */ | ||
1477 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
1478 | struct list_head *done_list) | ||
1479 | { | ||
1480 | struct optimized_kprobe *op, *tmp; | ||
1481 | int c = 0; | ||
1482 | |||
1483 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1484 | /* Setup param */ | ||
1485 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
1486 | jump_poke_bufs[c].buf, op); | ||
1487 | list_move(&op->list, done_list); | ||
1488 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1489 | break; | ||
1490 | } | ||
1491 | |||
1492 | /* | ||
1493 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1494 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1495 | * code probing, it's not a problem. | ||
1496 | */ | ||
1497 | text_poke_smp_batch(jump_poke_params, c); | ||
1425 | } | 1498 | } |
1426 | 1499 | ||
1427 | /* Replace a relative jump with a breakpoint (int3). */ | 1500 | /* Replace a relative jump with a breakpoint (int3). */ |
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p, | |||
1453 | } | 1526 | } |
1454 | return 0; | 1527 | return 0; |
1455 | } | 1528 | } |
1529 | |||
1530 | static int __kprobes init_poke_params(void) | ||
1531 | { | ||
1532 | /* Allocate code buffer and parameter array */ | ||
1533 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
1534 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1535 | if (!jump_poke_bufs) | ||
1536 | return -ENOMEM; | ||
1537 | |||
1538 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
1539 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1540 | if (!jump_poke_params) { | ||
1541 | kfree(jump_poke_bufs); | ||
1542 | jump_poke_bufs = NULL; | ||
1543 | return -ENOMEM; | ||
1544 | } | ||
1545 | |||
1546 | return 0; | ||
1547 | } | ||
1548 | #else /* !CONFIG_OPTPROBES */ | ||
1549 | static int __kprobes init_poke_params(void) | ||
1550 | { | ||
1551 | return 0; | ||
1552 | } | ||
1456 | #endif | 1553 | #endif |
1457 | 1554 | ||
1458 | int __init arch_init_kprobes(void) | 1555 | int __init arch_init_kprobes(void) |
1459 | { | 1556 | { |
1460 | return 0; | 1557 | return init_poke_params(); |
1461 | } | 1558 | } |
1462 | 1559 | ||
1463 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1560 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868a86aa..96ed1aac543a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -91,8 +91,7 @@ void exit_thread(void) | |||
91 | void show_regs(struct pt_regs *regs) | 91 | void show_regs(struct pt_regs *regs) |
92 | { | 92 | { |
93 | show_registers(regs); | 93 | show_registers(regs); |
94 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), | 94 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); |
95 | regs->bp); | ||
96 | } | 95 | } |
97 | 96 | ||
98 | void show_regs_common(void) | 97 | void show_regs_common(void) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 083e99d1b7df..68f61ac632e1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void) | |||
281 | */ | 281 | */ |
282 | smp_store_cpu_info(cpuid); | 282 | smp_store_cpu_info(cpuid); |
283 | 283 | ||
284 | /* | ||
285 | * This must be done before setting cpu_online_mask | ||
286 | * or calling notify_cpu_starting. | ||
287 | */ | ||
288 | set_cpu_sibling_map(raw_smp_processor_id()); | ||
289 | wmb(); | ||
290 | |||
284 | notify_cpu_starting(cpuid); | 291 | notify_cpu_starting(cpuid); |
285 | 292 | ||
286 | /* | 293 | /* |
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
316 | */ | 323 | */ |
317 | check_tsc_sync_target(); | 324 | check_tsc_sync_target(); |
318 | 325 | ||
319 | if (nmi_watchdog == NMI_IO_APIC) { | ||
320 | legacy_pic->mask(0); | ||
321 | enable_NMI_through_LVT0(); | ||
322 | legacy_pic->unmask(0); | ||
323 | } | ||
324 | |||
325 | /* This must be done before setting cpu_online_mask */ | ||
326 | set_cpu_sibling_map(raw_smp_processor_id()); | ||
327 | wmb(); | ||
328 | |||
329 | /* | 326 | /* |
330 | * We need to hold call_lock, so there is no inconsistency | 327 | * We need to hold call_lock, so there is no inconsistency |
331 | * between the time smp_call_function() determines number of | 328 | * between the time smp_call_function() determines number of |
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1061 | printk(KERN_INFO "SMP mode deactivated.\n"); | 1058 | printk(KERN_INFO "SMP mode deactivated.\n"); |
1062 | smpboot_clear_io_apic(); | 1059 | smpboot_clear_io_apic(); |
1063 | 1060 | ||
1064 | localise_nmi_watchdog(); | ||
1065 | |||
1066 | connect_bsp_APIC(); | 1061 | connect_bsp_APIC(); |
1067 | setup_local_APIC(); | 1062 | setup_local_APIC(); |
1068 | end_local_APIC_setup(); | 1063 | end_local_APIC_setup(); |
@@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1196 | #ifdef CONFIG_X86_IO_APIC | 1191 | #ifdef CONFIG_X86_IO_APIC |
1197 | setup_ioapic_dest(); | 1192 | setup_ioapic_dest(); |
1198 | #endif | 1193 | #endif |
1199 | check_nmi_watchdog(); | ||
1200 | mtrr_aps_init(); | 1194 | mtrr_aps_init(); |
1201 | } | 1195 | } |
1202 | 1196 | ||
@@ -1341,8 +1335,6 @@ int native_cpu_disable(void) | |||
1341 | if (cpu == 0) | 1335 | if (cpu == 0) |
1342 | return -EBUSY; | 1336 | return -EBUSY; |
1343 | 1337 | ||
1344 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1345 | stop_apic_nmi_watchdog(NULL); | ||
1346 | clear_local_APIC(); | 1338 | clear_local_APIC(); |
1347 | 1339 | ||
1348 | cpu_disable_common(); | 1340 | cpu_disable_common(); |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b53c525368a7..938c8e10a19a 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = { | |||
73 | */ | 73 | */ |
74 | void save_stack_trace(struct stack_trace *trace) | 74 | void save_stack_trace(struct stack_trace *trace) |
75 | { | 75 | { |
76 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); | 76 | dump_trace(current, NULL, NULL, &save_stack_ops, trace); |
77 | if (trace->nr_entries < trace->max_entries) | 77 | if (trace->nr_entries < trace->max_entries) |
78 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 78 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
79 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(save_stack_trace); | 80 | EXPORT_SYMBOL_GPL(save_stack_trace); |
81 | 81 | ||
82 | void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) | 82 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) |
83 | { | 83 | { |
84 | dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); | 84 | dump_trace(current, regs, NULL, &save_stack_ops, trace); |
85 | if (trace->nr_entries < trace->max_entries) | 85 | if (trace->nr_entries < trace->max_entries) |
86 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 86 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
87 | } | 87 | } |
88 | 88 | ||
89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
90 | { | 90 | { |
91 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 91 | dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); |
92 | if (trace->nr_entries < trace->max_entries) | 92 | if (trace->nr_entries < trace->max_entries) |
93 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 93 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
94 | } | 94 | } |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fb5cc5e14cfa..25a28a245937 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -22,10 +22,6 @@ | |||
22 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
24 | 24 | ||
25 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
26 | int timer_ack; | ||
27 | #endif | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | 25 | #ifdef CONFIG_X86_64 |
30 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | 26 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
31 | #endif | 27 | #endif |
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
63 | /* Keep nmi watchdog up to date */ | 59 | /* Keep nmi watchdog up to date */ |
64 | inc_irq_stat(irq0_irqs); | 60 | inc_irq_stat(irq0_irqs); |
65 | 61 | ||
66 | /* Optimized out for !IO_APIC and x86_64 */ | ||
67 | if (timer_ack) { | ||
68 | /* | ||
69 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
70 | * manually to deassert NMI lines for the watchdog if run | ||
71 | * on an 82489DX-based system. | ||
72 | */ | ||
73 | raw_spin_lock(&i8259A_lock); | ||
74 | outb(0x0c, PIC_MASTER_OCW3); | ||
75 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
76 | inb(PIC_MASTER_POLL); | ||
77 | raw_spin_unlock(&i8259A_lock); | ||
78 | } | ||
79 | |||
80 | global_clock_event->event_handler(global_clock_event); | 62 | global_clock_event->event_handler(global_clock_event); |
81 | 63 | ||
82 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ | 64 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb838ca42c96..bb6f04167361 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors); | |||
83 | 83 | ||
84 | static int ignore_nmis; | 84 | static int ignore_nmis; |
85 | 85 | ||
86 | int unknown_nmi_panic; | ||
87 | |||
86 | static inline void conditional_sti(struct pt_regs *regs) | 88 | static inline void conditional_sti(struct pt_regs *regs) |
87 | { | 89 | { |
88 | if (regs->flags & X86_EFLAGS_IF) | 90 | if (regs->flags & X86_EFLAGS_IF) |
@@ -300,6 +302,13 @@ gp_in_kernel: | |||
300 | die("general protection fault", regs, error_code); | 302 | die("general protection fault", regs, error_code); |
301 | } | 303 | } |
302 | 304 | ||
305 | static int __init setup_unknown_nmi_panic(char *str) | ||
306 | { | ||
307 | unknown_nmi_panic = 1; | ||
308 | return 1; | ||
309 | } | ||
310 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
311 | |||
303 | static notrace __kprobes void | 312 | static notrace __kprobes void |
304 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | 313 | mem_parity_error(unsigned char reason, struct pt_regs *regs) |
305 | { | 314 | { |
@@ -371,7 +380,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
371 | reason, smp_processor_id()); | 380 | reason, smp_processor_id()); |
372 | 381 | ||
373 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | 382 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); |
374 | if (panic_on_unrecovered_nmi) | 383 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) |
375 | panic("NMI: Not continuing"); | 384 | panic("NMI: Not continuing"); |
376 | 385 | ||
377 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 386 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
@@ -397,20 +406,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
397 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 406 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
398 | == NOTIFY_STOP) | 407 | == NOTIFY_STOP) |
399 | return; | 408 | return; |
400 | |||
401 | #ifndef CONFIG_LOCKUP_DETECTOR | ||
402 | /* | ||
403 | * Ok, so this is none of the documented NMI sources, | ||
404 | * so it must be the NMI watchdog. | ||
405 | */ | ||
406 | if (nmi_watchdog_tick(regs, reason)) | ||
407 | return; | ||
408 | if (!do_nmi_callback(regs, cpu)) | ||
409 | #endif /* !CONFIG_LOCKUP_DETECTOR */ | ||
410 | unknown_nmi_error(reason, regs); | ||
411 | #else | ||
412 | unknown_nmi_error(reason, regs); | ||
413 | #endif | 409 | #endif |
410 | unknown_nmi_error(reason, regs); | ||
414 | 411 | ||
415 | return; | 412 | return; |
416 | } | 413 | } |
@@ -446,14 +443,12 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
446 | 443 | ||
447 | void stop_nmi(void) | 444 | void stop_nmi(void) |
448 | { | 445 | { |
449 | acpi_nmi_disable(); | ||
450 | ignore_nmis++; | 446 | ignore_nmis++; |
451 | } | 447 | } |
452 | 448 | ||
453 | void restart_nmi(void) | 449 | void restart_nmi(void) |
454 | { | 450 | { |
455 | ignore_nmis--; | 451 | ignore_nmis--; |
456 | acpi_nmi_enable(); | ||
457 | } | 452 | } |
458 | 453 | ||
459 | /* May run on IST stack. */ | 454 | /* May run on IST stack. */ |
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436f..704a37cedddb 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, | |||
185 | e->trace.entries = e->trace_entries; | 185 | e->trace.entries = e->trace_entries; |
186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | 186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); |
187 | e->trace.skip = 0; | 187 | e->trace.skip = 0; |
188 | save_stack_trace_bp(&e->trace, regs->bp); | 188 | save_stack_trace_regs(&e->trace, regs); |
189 | 189 | ||
190 | /* Round address down to nearest 16 bytes */ | 190 | /* Round address down to nearest 16 bytes */ |
191 | shadow_copy = kmemcheck_shadow_lookup(address | 191 | shadow_copy = kmemcheck_shadow_lookup(address |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..72cbec14d783 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
126 | if (!user_mode_vm(regs)) { | 126 | if (!user_mode_vm(regs)) { |
127 | unsigned long stack = kernel_stack_pointer(regs); | 127 | unsigned long stack = kernel_stack_pointer(regs); |
128 | if (depth) | 128 | if (depth) |
129 | dump_trace(NULL, regs, (unsigned long *)stack, 0, | 129 | dump_trace(NULL, regs, (unsigned long *)stack, |
130 | &backtrace_ops, &depth); | 130 | &backtrace_ops, &depth); |
131 | return; | 131 | return; |
132 | } | 132 | } |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 4e8baad36d37..358c8b9c96a7 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
732 | case 0x14: | 732 | case 0x14: |
733 | cpu_type = "x86-64/family14h"; | 733 | cpu_type = "x86-64/family14h"; |
734 | break; | 734 | break; |
735 | case 0x15: | ||
736 | cpu_type = "x86-64/family15h"; | ||
737 | break; | ||
735 | default: | 738 | default: |
736 | return -ENODEV; | 739 | return -ENODEV; |
737 | } | 740 | } |
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index e3ecb71b5790..0636dd93cef8 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c | |||
@@ -58,9 +58,6 @@ static void timer_stop(void) | |||
58 | 58 | ||
59 | int __init op_nmi_timer_init(struct oprofile_operations *ops) | 59 | int __init op_nmi_timer_init(struct oprofile_operations *ops) |
60 | { | 60 | { |
61 | if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) | ||
62 | return -ENODEV; | ||
63 | |||
64 | ops->start = timer_start; | 61 | ops->start = timer_start; |
65 | ops->stop = timer_stop; | 62 | ops->stop = timer_stop; |
66 | ops->cpu_type = "timer"; | 63 | ops->cpu_type = "timer"; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index a011bcc0f943..f2984d43a6b3 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -29,11 +29,12 @@ | |||
29 | #include "op_x86_model.h" | 29 | #include "op_x86_model.h" |
30 | #include "op_counter.h" | 30 | #include "op_counter.h" |
31 | 31 | ||
32 | #define NUM_COUNTERS 4 | 32 | #define NUM_COUNTERS 4 |
33 | #define NUM_COUNTERS_F15H 6 | ||
33 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 34 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
34 | #define NUM_VIRT_COUNTERS 32 | 35 | #define NUM_VIRT_COUNTERS 32 |
35 | #else | 36 | #else |
36 | #define NUM_VIRT_COUNTERS NUM_COUNTERS | 37 | #define NUM_VIRT_COUNTERS 0 |
37 | #endif | 38 | #endif |
38 | 39 | ||
39 | #define OP_EVENT_MASK 0x0FFF | 40 | #define OP_EVENT_MASK 0x0FFF |
@@ -41,7 +42,8 @@ | |||
41 | 42 | ||
42 | #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) | 43 | #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) |
43 | 44 | ||
44 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; | 45 | static int num_counters; |
46 | static unsigned long reset_value[OP_MAX_COUNTER]; | ||
45 | 47 | ||
46 | #define IBS_FETCH_SIZE 6 | 48 | #define IBS_FETCH_SIZE 6 |
47 | #define IBS_OP_SIZE 12 | 49 | #define IBS_OP_SIZE 12 |
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
387 | int i; | 389 | int i; |
388 | 390 | ||
389 | /* enable active counters */ | 391 | /* enable active counters */ |
390 | for (i = 0; i < NUM_COUNTERS; ++i) { | 392 | for (i = 0; i < num_counters; ++i) { |
391 | int virt = op_x86_phys_to_virt(i); | 393 | int virt = op_x86_phys_to_virt(i); |
392 | if (!reset_value[virt]) | 394 | if (!reset_value[virt]) |
393 | continue; | 395 | continue; |
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
406 | { | 408 | { |
407 | int i; | 409 | int i; |
408 | 410 | ||
409 | for (i = 0; i < NUM_COUNTERS; ++i) { | 411 | for (i = 0; i < num_counters; ++i) { |
410 | if (!msrs->counters[i].addr) | 412 | if (!msrs->counters[i].addr) |
411 | continue; | 413 | continue; |
412 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | 414 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); |
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
418 | { | 420 | { |
419 | int i; | 421 | int i; |
420 | 422 | ||
421 | for (i = 0; i < NUM_COUNTERS; i++) { | 423 | for (i = 0; i < num_counters; i++) { |
422 | if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 424 | if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) |
423 | goto fail; | 425 | goto fail; |
424 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { | 426 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { |
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
426 | goto fail; | 428 | goto fail; |
427 | } | 429 | } |
428 | /* both registers must be reserved */ | 430 | /* both registers must be reserved */ |
429 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | 431 | if (num_counters == NUM_COUNTERS_F15H) { |
430 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | 432 | msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); |
433 | msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); | ||
434 | } else { | ||
435 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
436 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
437 | } | ||
431 | continue; | 438 | continue; |
432 | fail: | 439 | fail: |
433 | if (!counter_config[i].enabled) | 440 | if (!counter_config[i].enabled) |
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
447 | int i; | 454 | int i; |
448 | 455 | ||
449 | /* setup reset_value */ | 456 | /* setup reset_value */ |
450 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | 457 | for (i = 0; i < OP_MAX_COUNTER; ++i) { |
451 | if (counter_config[i].enabled | 458 | if (counter_config[i].enabled |
452 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | 459 | && msrs->counters[op_x86_virt_to_phys(i)].addr) |
453 | reset_value[i] = counter_config[i].count; | 460 | reset_value[i] = counter_config[i].count; |
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
456 | } | 463 | } |
457 | 464 | ||
458 | /* clear all counters */ | 465 | /* clear all counters */ |
459 | for (i = 0; i < NUM_COUNTERS; ++i) { | 466 | for (i = 0; i < num_counters; ++i) { |
460 | if (!msrs->controls[i].addr) | 467 | if (!msrs->controls[i].addr) |
461 | continue; | 468 | continue; |
462 | rdmsrl(msrs->controls[i].addr, val); | 469 | rdmsrl(msrs->controls[i].addr, val); |
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
472 | } | 479 | } |
473 | 480 | ||
474 | /* enable active counters */ | 481 | /* enable active counters */ |
475 | for (i = 0; i < NUM_COUNTERS; ++i) { | 482 | for (i = 0; i < num_counters; ++i) { |
476 | int virt = op_x86_phys_to_virt(i); | 483 | int virt = op_x86_phys_to_virt(i); |
477 | if (!reset_value[virt]) | 484 | if (!reset_value[virt]) |
478 | continue; | 485 | continue; |
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, | |||
503 | u64 val; | 510 | u64 val; |
504 | int i; | 511 | int i; |
505 | 512 | ||
506 | for (i = 0; i < NUM_COUNTERS; ++i) { | 513 | for (i = 0; i < num_counters; ++i) { |
507 | int virt = op_x86_phys_to_virt(i); | 514 | int virt = op_x86_phys_to_virt(i); |
508 | if (!reset_value[virt]) | 515 | if (!reset_value[virt]) |
509 | continue; | 516 | continue; |
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs) | |||
526 | u64 val; | 533 | u64 val; |
527 | int i; | 534 | int i; |
528 | 535 | ||
529 | for (i = 0; i < NUM_COUNTERS; ++i) { | 536 | for (i = 0; i < num_counters; ++i) { |
530 | if (!reset_value[op_x86_phys_to_virt(i)]) | 537 | if (!reset_value[op_x86_phys_to_virt(i)]) |
531 | continue; | 538 | continue; |
532 | rdmsrl(msrs->controls[i].addr, val); | 539 | rdmsrl(msrs->controls[i].addr, val); |
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
546 | * Subtle: stop on all counters to avoid race with setting our | 553 | * Subtle: stop on all counters to avoid race with setting our |
547 | * pm callback | 554 | * pm callback |
548 | */ | 555 | */ |
549 | for (i = 0; i < NUM_COUNTERS; ++i) { | 556 | for (i = 0; i < num_counters; ++i) { |
550 | if (!reset_value[op_x86_phys_to_virt(i)]) | 557 | if (!reset_value[op_x86_phys_to_virt(i)]) |
551 | continue; | 558 | continue; |
552 | rdmsrl(msrs->controls[i].addr, val); | 559 | rdmsrl(msrs->controls[i].addr, val); |
@@ -698,18 +705,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
698 | return 0; | 705 | return 0; |
699 | } | 706 | } |
700 | 707 | ||
708 | struct op_x86_model_spec op_amd_spec; | ||
709 | |||
701 | static int op_amd_init(struct oprofile_operations *ops) | 710 | static int op_amd_init(struct oprofile_operations *ops) |
702 | { | 711 | { |
703 | init_ibs(); | 712 | init_ibs(); |
704 | create_arch_files = ops->create_files; | 713 | create_arch_files = ops->create_files; |
705 | ops->create_files = setup_ibs_files; | 714 | ops->create_files = setup_ibs_files; |
715 | |||
716 | if (boot_cpu_data.x86 == 0x15) { | ||
717 | num_counters = NUM_COUNTERS_F15H; | ||
718 | } else { | ||
719 | num_counters = NUM_COUNTERS; | ||
720 | } | ||
721 | |||
722 | op_amd_spec.num_counters = num_counters; | ||
723 | op_amd_spec.num_controls = num_counters; | ||
724 | op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS); | ||
725 | |||
706 | return 0; | 726 | return 0; |
707 | } | 727 | } |
708 | 728 | ||
709 | struct op_x86_model_spec op_amd_spec = { | 729 | struct op_x86_model_spec op_amd_spec = { |
710 | .num_counters = NUM_COUNTERS, | 730 | /* num_counters/num_controls filled in at runtime */ |
711 | .num_controls = NUM_COUNTERS, | ||
712 | .num_virt_counters = NUM_VIRT_COUNTERS, | ||
713 | .reserved = MSR_AMD_EVENTSEL_RESERVED, | 731 | .reserved = MSR_AMD_EVENTSEL_RESERVED, |
714 | .event_mask = OP_EVENT_MASK, | 732 | .event_mask = OP_EVENT_MASK, |
715 | .init = op_amd_init, | 733 | .init = op_amd_init, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 182558dd5515..9fadec074142 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/oprofile.h> | 11 | #include <linux/oprofile.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
14 | #include <linux/nmi.h> | 14 | #include <asm/nmi.h> |
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
17 | #include <asm/apic.h> | 17 | #include <asm/apic.h> |