diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-01-07 08:14:15 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-01-07 08:14:15 -0500 |
commit | 1c2a48cf65580a276552151eb8f78d78c55b828e (patch) | |
tree | 68ed0628a276b33cb5aa0ad4899c1afe0a33a69d /arch/x86 | |
parent | 0aa002fe602939370e9476e5ec32b562000a0425 (diff) | |
parent | cb600d2f83c854ec3d6660063e4466431999489b (diff) |
Merge branch 'linus' into x86/apic-cleanups
Conflicts:
arch/x86/include/asm/io_apic.h
Merge reason: Resolve the conflict, update to a more recent -rc base
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
118 files changed, 1437 insertions, 2006 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 58ecad57aad1..b6fccb07123e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -21,7 +21,7 @@ config X86 | |||
21 | select HAVE_UNSTABLE_SCHED_CLOCK | 21 | select HAVE_UNSTABLE_SCHED_CLOCK |
22 | select HAVE_IDE | 22 | select HAVE_IDE |
23 | select HAVE_OPROFILE | 23 | select HAVE_OPROFILE |
24 | select HAVE_PERF_EVENTS if (!M386 && !M486) | 24 | select HAVE_PERF_EVENTS |
25 | select HAVE_IRQ_WORK | 25 | select HAVE_IRQ_WORK |
26 | select HAVE_IOREMAP_PROT | 26 | select HAVE_IOREMAP_PROT |
27 | select HAVE_KPROBES | 27 | select HAVE_KPROBES |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b59ee765414e..45143bbcfe5e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST | |||
117 | feature as well as for the change_page_attr() infrastructure. | 117 | feature as well as for the change_page_attr() infrastructure. |
118 | If in doubt, say "N" | 118 | If in doubt, say "N" |
119 | 119 | ||
120 | config DEBUG_SET_MODULE_RONX | ||
121 | bool "Set loadable kernel module data as NX and text as RO" | ||
122 | depends on MODULES | ||
123 | ---help--- | ||
124 | This option helps catch unintended modifications to loadable | ||
125 | kernel module's text and read-only data. It also prevents execution | ||
126 | of module data. Such protection may interfere with run-time code | ||
127 | patching and dynamic kernel tracing - and they might also protect | ||
128 | against certain classes of kernel exploits. | ||
129 | If in doubt, say "N". | ||
130 | |||
120 | config DEBUG_NX_TEST | 131 | config DEBUG_NX_TEST |
121 | tristate "Testcase for the NX non-executable stack feature" | 132 | tristate "Testcase for the NX non-executable stack feature" |
122 | depends on DEBUG_KERNEL && m | 133 | depends on DEBUG_KERNEL && m |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 52f85a196fa0..35af09d13dc1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -182,7 +182,7 @@ no_longmode: | |||
182 | hlt | 182 | hlt |
183 | jmp 1b | 183 | jmp 1b |
184 | 184 | ||
185 | #include "../../kernel/verify_cpu_64.S" | 185 | #include "../../kernel/verify_cpu.S" |
186 | 186 | ||
187 | /* | 187 | /* |
188 | * Be careful here startup_64 needs to be at a predictable | 188 | * Be careful here startup_64 needs to be at a predictable |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 23f315c9f215..325c05294fc4 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
355 | if (heap > 0x3fffffffffffUL) | 355 | if (heap > 0x3fffffffffffUL) |
356 | error("Destination address too large"); | 356 | error("Destination address too large"); |
357 | #else | 357 | #else |
358 | if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) | 358 | if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) |
359 | error("Destination address too large"); | 359 | error("Destination address too large"); |
360 | #endif | 360 | #endif |
361 | #ifndef CONFIG_RELOCATABLE | 361 | #ifndef CONFIG_RELOCATABLE |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index cbcc8d8ea93a..7a6e68e4f748 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * by the Free Software Foundation. | 10 | * by the Free Software Foundation. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/err.h> | ||
13 | #include <linux/module.h> | 14 | #include <linux/module.h> |
14 | #include <linux/init.h> | 15 | #include <linux/init.h> |
15 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 849813f398e7..5852519b2d0f 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
29 | #include <linux/times.h> | 29 | #include <linux/times.h> |
30 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/mm.h> | 31 | #include <linux/mm.h> |
33 | #include <linux/uio.h> | 32 | #include <linux/uio.h> |
34 | #include <linux/poll.h> | 33 | #include <linux/poll.h> |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 76561d20ea2f..13009d1af99a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -66,6 +66,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, | |||
66 | extern void alternatives_smp_module_del(struct module *mod); | 66 | extern void alternatives_smp_module_del(struct module *mod); |
67 | extern void alternatives_smp_switch(int smp); | 67 | extern void alternatives_smp_switch(int smp); |
68 | extern int alternatives_text_reserved(void *start, void *end); | 68 | extern int alternatives_text_reserved(void *start, void *end); |
69 | extern bool skip_smp_alternatives; | ||
69 | #else | 70 | #else |
70 | static inline void alternatives_smp_module_add(struct module *mod, char *name, | 71 | static inline void alternatives_smp_module_add(struct module *mod, char *name, |
71 | void *locks, void *locks_end, | 72 | void *locks, void *locks_end, |
@@ -180,8 +181,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); | |||
180 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an | 181 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an |
181 | * inconsistent instruction while you patch. | 182 | * inconsistent instruction while you patch. |
182 | */ | 183 | */ |
184 | struct text_poke_param { | ||
185 | void *addr; | ||
186 | const void *opcode; | ||
187 | size_t len; | ||
188 | }; | ||
189 | |||
183 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 190 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
184 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | 191 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); |
192 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); | ||
185 | 193 | ||
186 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | 194 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) |
187 | #define IDEAL_NOP_SIZE_5 5 | 195 | #define IDEAL_NOP_SIZE_5 5 |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index a859ca461fb0..47a30ff8e517 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -145,6 +145,7 @@ | |||
145 | 145 | ||
146 | #ifdef CONFIG_X86_32 | 146 | #ifdef CONFIG_X86_32 |
147 | # define MAX_IO_APICS 64 | 147 | # define MAX_IO_APICS 64 |
148 | # define MAX_LOCAL_APIC 256 | ||
148 | #else | 149 | #else |
149 | # define MAX_IO_APICS 128 | 150 | # define MAX_IO_APICS 128 |
150 | # define MAX_LOCAL_APIC 32768 | 151 | # define MAX_LOCAL_APIC 32768 |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 5be1542fbfaf..e99d55d74df5 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -72,6 +72,9 @@ struct e820map { | |||
72 | #define BIOS_BEGIN 0x000a0000 | 72 | #define BIOS_BEGIN 0x000a0000 |
73 | #define BIOS_END 0x00100000 | 73 | #define BIOS_END 0x00100000 |
74 | 74 | ||
75 | #define BIOS_ROM_BASE 0xffe00000 | ||
76 | #define BIOS_ROM_END 0xffffffff | ||
77 | |||
75 | #ifdef __KERNEL__ | 78 | #ifdef __KERNEL__ |
76 | /* see comment in arch/x86/kernel/e820.c */ | 79 | /* see comment in arch/x86/kernel/e820.c */ |
77 | extern struct e820map e820; | 80 | extern struct e820map e820; |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 139591a933f6..0141b234406f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -220,8 +220,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) | |||
220 | } | 220 | } |
221 | 221 | ||
222 | /* Return an pointer with offset calculated */ | 222 | /* Return an pointer with offset calculated */ |
223 | static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx, | 223 | static __always_inline unsigned long |
224 | phys_addr_t phys, pgprot_t flags) | 224 | __set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) |
225 | { | 225 | { |
226 | __set_fixmap(idx, phys, flags); | 226 | __set_fixmap(idx, phys, flags); |
227 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); | 227 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 4aa2bb3b242a..ef328901c802 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
93 | int err; | 93 | int err; |
94 | 94 | ||
95 | /* See comment in fxsave() below. */ | 95 | /* See comment in fxsave() below. */ |
96 | #ifdef CONFIG_AS_FXSAVEQ | ||
97 | asm volatile("1: fxrstorq %[fx]\n\t" | ||
98 | "2:\n" | ||
99 | ".section .fixup,\"ax\"\n" | ||
100 | "3: movl $-1,%[err]\n" | ||
101 | " jmp 2b\n" | ||
102 | ".previous\n" | ||
103 | _ASM_EXTABLE(1b, 3b) | ||
104 | : [err] "=r" (err) | ||
105 | : [fx] "m" (*fx), "0" (0)); | ||
106 | #else | ||
96 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" | 107 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" |
97 | "2:\n" | 108 | "2:\n" |
98 | ".section .fixup,\"ax\"\n" | 109 | ".section .fixup,\"ax\"\n" |
@@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
102 | _ASM_EXTABLE(1b, 3b) | 113 | _ASM_EXTABLE(1b, 3b) |
103 | : [err] "=r" (err) | 114 | : [err] "=r" (err) |
104 | : [fx] "R" (fx), "m" (*fx), "0" (0)); | 115 | : [fx] "R" (fx), "m" (*fx), "0" (0)); |
116 | #endif | ||
105 | return err; | 117 | return err; |
106 | } | 118 | } |
107 | 119 | ||
@@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
119 | return -EFAULT; | 131 | return -EFAULT; |
120 | 132 | ||
121 | /* See comment in fxsave() below. */ | 133 | /* See comment in fxsave() below. */ |
134 | #ifdef CONFIG_AS_FXSAVEQ | ||
135 | asm volatile("1: fxsaveq %[fx]\n\t" | ||
136 | "2:\n" | ||
137 | ".section .fixup,\"ax\"\n" | ||
138 | "3: movl $-1,%[err]\n" | ||
139 | " jmp 2b\n" | ||
140 | ".previous\n" | ||
141 | _ASM_EXTABLE(1b, 3b) | ||
142 | : [err] "=r" (err), [fx] "=m" (*fx) | ||
143 | : "0" (0)); | ||
144 | #else | ||
122 | asm volatile("1: rex64/fxsave (%[fx])\n\t" | 145 | asm volatile("1: rex64/fxsave (%[fx])\n\t" |
123 | "2:\n" | 146 | "2:\n" |
124 | ".section .fixup,\"ax\"\n" | 147 | ".section .fixup,\"ax\"\n" |
@@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
128 | _ASM_EXTABLE(1b, 3b) | 151 | _ASM_EXTABLE(1b, 3b) |
129 | : [err] "=r" (err), "=m" (*fx) | 152 | : [err] "=r" (err), "=m" (*fx) |
130 | : [fx] "R" (fx), "0" (0)); | 153 | : [fx] "R" (fx), "0" (0)); |
154 | #endif | ||
131 | if (unlikely(err) && | 155 | if (unlikely(err) && |
132 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) | 156 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) |
133 | err = -EFAULT; | 157 | err = -EFAULT; |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 13b0ebaa512f..ba870bb6dd8e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq) | |||
15 | return ((irq == 2) ? 9 : irq); | 15 | return ((irq == 2) ? 9 : irq); |
16 | } | 16 | } |
17 | 17 | ||
18 | #ifdef CONFIG_X86_LOCAL_APIC | ||
19 | # define ARCH_HAS_NMI_WATCHDOG | ||
20 | #endif | ||
21 | |||
22 | #ifdef CONFIG_X86_32 | 18 | #ifdef CONFIG_X86_32 |
23 | extern void irq_ctx_init(int cpu); | 19 | extern void irq_ctx_init(int cpu); |
24 | #else | 20 | #else |
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca86581b..f23eb2528464 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long); | |||
28 | extern int __must_check __die(const char *, struct pt_regs *, long); | 28 | extern int __must_check __die(const char *, struct pt_regs *, long); |
29 | extern void show_registers(struct pt_regs *regs); | 29 | extern void show_registers(struct pt_regs *regs); |
30 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, | 30 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, |
31 | unsigned long *sp, unsigned long bp); | 31 | unsigned long *sp); |
32 | extern void __show_regs(struct pt_regs *regs, int all); | 32 | extern void __show_regs(struct pt_regs *regs, int all); |
33 | extern void show_regs(struct pt_regs *regs); | 33 | extern void show_regs(struct pt_regs *regs); |
34 | extern unsigned long oops_begin(void); | 34 | extern unsigned long oops_begin(void); |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9e6fe391094e..f702f82aa1eb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -79,7 +79,7 @@ | |||
79 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) | 79 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) |
80 | #define KVM_MIN_FREE_MMU_PAGES 5 | 80 | #define KVM_MIN_FREE_MMU_PAGES 5 |
81 | #define KVM_REFILL_PAGES 25 | 81 | #define KVM_REFILL_PAGES 25 |
82 | #define KVM_MAX_CPUID_ENTRIES 40 | 82 | #define KVM_MAX_CPUID_ENTRIES 80 |
83 | #define KVM_NR_FIXED_MTRR_REGION 88 | 83 | #define KVM_NR_FIXED_MTRR_REGION 88 |
84 | #define KVM_NR_VAR_MTRR 8 | 84 | #define KVM_NR_VAR_MTRR 8 |
85 | 85 | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c62c13cb9788..eb16e94ae04f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c); | |||
223 | 223 | ||
224 | void mce_log_therm_throt_event(__u64 status); | 224 | void mce_log_therm_throt_event(__u64 status); |
225 | 225 | ||
226 | /* Interrupt Handler for core thermal thresholds */ | ||
227 | extern int (*platform_thermal_notify)(__u64 msr_val); | ||
228 | |||
226 | #ifdef CONFIG_X86_THERMAL_VECTOR | 229 | #ifdef CONFIG_X86_THERMAL_VECTOR |
227 | extern void mcheck_intel_therm_init(void); | 230 | extern void mcheck_intel_therm_init(void); |
228 | #else | 231 | #else |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b501e22a..24215072d0e1 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void) | |||
48 | 48 | ||
49 | #ifdef CONFIG_MICROCODE_AMD | 49 | #ifdef CONFIG_MICROCODE_AMD |
50 | extern struct microcode_ops * __init init_amd_microcode(void); | 50 | extern struct microcode_ops * __init init_amd_microcode(void); |
51 | |||
52 | static inline void get_ucode_data(void *to, const u8 *from, size_t n) | ||
53 | { | ||
54 | memcpy(to, from, n); | ||
55 | } | ||
56 | |||
51 | #else | 57 | #else |
52 | static inline struct microcode_ops * __init init_amd_microcode(void) | 58 | static inline struct microcode_ops * __init init_amd_microcode(void) |
53 | { | 59 | { |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c82868e9f905..0c90dd9f0505 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -5,8 +5,9 @@ | |||
5 | 5 | ||
6 | #include <asm/mpspec_def.h> | 6 | #include <asm/mpspec_def.h> |
7 | #include <asm/x86_init.h> | 7 | #include <asm/x86_init.h> |
8 | #include <asm/apicdef.h> | ||
8 | 9 | ||
9 | extern int apic_version[MAX_APICS]; | 10 | extern int apic_version[]; |
10 | extern int pic_mode; | 11 | extern int pic_mode; |
11 | 12 | ||
12 | #ifdef CONFIG_X86_32 | 13 | #ifdef CONFIG_X86_32 |
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, | |||
107 | int active_high_low); | 108 | int active_high_low); |
108 | #endif /* CONFIG_ACPI */ | 109 | #endif /* CONFIG_ACPI */ |
109 | 110 | ||
110 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) | 111 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) |
111 | 112 | ||
112 | struct physid_mask { | 113 | struct physid_mask { |
113 | unsigned long mask[PHYSID_ARRAY_SIZE]; | 114 | unsigned long mask[PHYSID_ARRAY_SIZE]; |
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t; | |||
122 | test_and_set_bit(physid, (map).mask) | 123 | test_and_set_bit(physid, (map).mask) |
123 | 124 | ||
124 | #define physids_and(dst, src1, src2) \ | 125 | #define physids_and(dst, src1, src2) \ |
125 | bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) | 126 | bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) |
126 | 127 | ||
127 | #define physids_or(dst, src1, src2) \ | 128 | #define physids_or(dst, src1, src2) \ |
128 | bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) | 129 | bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) |
129 | 130 | ||
130 | #define physids_clear(map) \ | 131 | #define physids_clear(map) \ |
131 | bitmap_zero((map).mask, MAX_APICS) | 132 | bitmap_zero((map).mask, MAX_LOCAL_APIC) |
132 | 133 | ||
133 | #define physids_complement(dst, src) \ | 134 | #define physids_complement(dst, src) \ |
134 | bitmap_complement((dst).mask, (src).mask, MAX_APICS) | 135 | bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC) |
135 | 136 | ||
136 | #define physids_empty(map) \ | 137 | #define physids_empty(map) \ |
137 | bitmap_empty((map).mask, MAX_APICS) | 138 | bitmap_empty((map).mask, MAX_LOCAL_APIC) |
138 | 139 | ||
139 | #define physids_equal(map1, map2) \ | 140 | #define physids_equal(map1, map2) \ |
140 | bitmap_equal((map1).mask, (map2).mask, MAX_APICS) | 141 | bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC) |
141 | 142 | ||
142 | #define physids_weight(map) \ | 143 | #define physids_weight(map) \ |
143 | bitmap_weight((map).mask, MAX_APICS) | 144 | bitmap_weight((map).mask, MAX_LOCAL_APIC) |
144 | 145 | ||
145 | #define physids_shift_right(d, s, n) \ | 146 | #define physids_shift_right(d, s, n) \ |
146 | bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) | 147 | bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC) |
147 | 148 | ||
148 | #define physids_shift_left(d, s, n) \ | 149 | #define physids_shift_left(d, s, n) \ |
149 | bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) | 150 | bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC) |
150 | 151 | ||
151 | static inline unsigned long physids_coerce(physid_mask_t *map) | 152 | static inline unsigned long physids_coerce(physid_mask_t *map) |
152 | { | 153 | { |
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map) | |||
159 | map->mask[0] = physids; | 160 | map->mask[0] = physids; |
160 | } | 161 | } |
161 | 162 | ||
162 | /* Note: will create very large stack frames if physid_mask_t is big */ | ||
163 | #define physid_mask_of_physid(physid) \ | ||
164 | ({ \ | ||
165 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ | ||
166 | physid_set(physid, __physid_mask); \ | ||
167 | __physid_mask; \ | ||
168 | }) | ||
169 | |||
170 | static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) | 163 | static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) |
171 | { | 164 | { |
172 | physids_clear(*map); | 165 | physids_clear(*map); |
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 4a7f96d7c188..c0a955a9a087 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h | |||
@@ -15,13 +15,6 @@ | |||
15 | 15 | ||
16 | #ifdef CONFIG_X86_32 | 16 | #ifdef CONFIG_X86_32 |
17 | # define MAX_MPC_ENTRY 1024 | 17 | # define MAX_MPC_ENTRY 1024 |
18 | # define MAX_APICS 256 | ||
19 | #else | ||
20 | # if NR_CPUS <= 255 | ||
21 | # define MAX_APICS 255 | ||
22 | # else | ||
23 | # define MAX_APICS 32768 | ||
24 | # endif | ||
25 | #endif | 18 | #endif |
26 | 19 | ||
27 | /* Intel MP Floating Pointer Structure */ | 20 | /* Intel MP Floating Pointer Structure */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ea3dc487047..4d0dfa0d998e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -123,12 +123,16 @@ | |||
123 | #define MSR_AMD64_IBSCTL 0xc001103a | 123 | #define MSR_AMD64_IBSCTL 0xc001103a |
124 | #define MSR_AMD64_IBSBRTARGET 0xc001103b | 124 | #define MSR_AMD64_IBSBRTARGET 0xc001103b |
125 | 125 | ||
126 | /* Fam 15h MSRs */ | ||
127 | #define MSR_F15H_PERF_CTL 0xc0010200 | ||
128 | #define MSR_F15H_PERF_CTR 0xc0010201 | ||
129 | |||
126 | /* Fam 10h MSRs */ | 130 | /* Fam 10h MSRs */ |
127 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 | 131 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 |
128 | #define FAM10H_MMIO_CONF_ENABLE (1<<0) | 132 | #define FAM10H_MMIO_CONF_ENABLE (1<<0) |
129 | #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf | 133 | #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf |
130 | #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 | 134 | #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 |
131 | #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff | 135 | #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL |
132 | #define FAM10H_MMIO_CONF_BASE_SHIFT 20 | 136 | #define FAM10H_MMIO_CONF_BASE_SHIFT 20 |
133 | #define MSR_FAM10H_NODE_ID 0xc001100c | 137 | #define MSR_FAM10H_NODE_ID 0xc001100c |
134 | 138 | ||
@@ -253,6 +257,18 @@ | |||
253 | #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) | 257 | #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) |
254 | #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) | 258 | #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) |
255 | 259 | ||
260 | /* Thermal Thresholds Support */ | ||
261 | #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) | ||
262 | #define THERM_SHIFT_THRESHOLD0 8 | ||
263 | #define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) | ||
264 | #define THERM_INT_THRESHOLD1_ENABLE (1 << 23) | ||
265 | #define THERM_SHIFT_THRESHOLD1 16 | ||
266 | #define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) | ||
267 | #define THERM_STATUS_THRESHOLD0 (1 << 6) | ||
268 | #define THERM_LOG_THRESHOLD0 (1 << 7) | ||
269 | #define THERM_STATUS_THRESHOLD1 (1 << 8) | ||
270 | #define THERM_LOG_THRESHOLD1 (1 << 9) | ||
271 | |||
256 | /* MISC_ENABLE bits: architectural */ | 272 | /* MISC_ENABLE bits: architectural */ |
257 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) | 273 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) |
258 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) | 274 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f86b4b7..c4021b953510 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -5,41 +5,15 @@ | |||
5 | #include <asm/irq.h> | 5 | #include <asm/irq.h> |
6 | #include <asm/io.h> | 6 | #include <asm/io.h> |
7 | 7 | ||
8 | #ifdef ARCH_HAS_NMI_WATCHDOG | 8 | #ifdef CONFIG_X86_LOCAL_APIC |
9 | |||
10 | /** | ||
11 | * do_nmi_callback | ||
12 | * | ||
13 | * Check to see if a callback exists and execute it. Return 1 | ||
14 | * if the handler exists and was handled successfully. | ||
15 | */ | ||
16 | int do_nmi_callback(struct pt_regs *regs, int cpu); | ||
17 | 9 | ||
18 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); | 10 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); |
19 | extern int check_nmi_watchdog(void); | ||
20 | #if !defined(CONFIG_LOCKUP_DETECTOR) | ||
21 | extern int nmi_watchdog_enabled; | ||
22 | #endif | ||
23 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); | 11 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); |
24 | extern int reserve_perfctr_nmi(unsigned int); | 12 | extern int reserve_perfctr_nmi(unsigned int); |
25 | extern void release_perfctr_nmi(unsigned int); | 13 | extern void release_perfctr_nmi(unsigned int); |
26 | extern int reserve_evntsel_nmi(unsigned int); | 14 | extern int reserve_evntsel_nmi(unsigned int); |
27 | extern void release_evntsel_nmi(unsigned int); | 15 | extern void release_evntsel_nmi(unsigned int); |
28 | 16 | ||
29 | extern void setup_apic_nmi_watchdog(void *); | ||
30 | extern void stop_apic_nmi_watchdog(void *); | ||
31 | extern void disable_timer_nmi_watchdog(void); | ||
32 | extern void enable_timer_nmi_watchdog(void); | ||
33 | extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); | ||
34 | extern void cpu_nmi_set_wd_enabled(void); | ||
35 | |||
36 | extern atomic_t nmi_active; | ||
37 | extern unsigned int nmi_watchdog; | ||
38 | #define NMI_NONE 0 | ||
39 | #define NMI_IO_APIC 1 | ||
40 | #define NMI_LOCAL_APIC 2 | ||
41 | #define NMI_INVALID 3 | ||
42 | |||
43 | struct ctl_table; | 17 | struct ctl_table; |
44 | extern int proc_nmi_enabled(struct ctl_table *, int , | 18 | extern int proc_nmi_enabled(struct ctl_table *, int , |
45 | void __user *, size_t *, loff_t *); | 19 | void __user *, size_t *, loff_t *); |
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic; | |||
47 | 21 | ||
48 | void arch_trigger_all_cpu_backtrace(void); | 22 | void arch_trigger_all_cpu_backtrace(void); |
49 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | 23 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
50 | |||
51 | static inline void localise_nmi_watchdog(void) | ||
52 | { | ||
53 | if (nmi_watchdog == NMI_IO_APIC) | ||
54 | nmi_watchdog = NMI_LOCAL_APIC; | ||
55 | } | ||
56 | |||
57 | /* check if nmi_watchdog is active (ie was specified at boot) */ | ||
58 | static inline int nmi_watchdog_active(void) | ||
59 | { | ||
60 | /* | ||
61 | * actually it should be: | ||
62 | * return (nmi_watchdog == NMI_LOCAL_APIC || | ||
63 | * nmi_watchdog == NMI_IO_APIC) | ||
64 | * but since they are power of two we could use a | ||
65 | * cheaper way --cvg | ||
66 | */ | ||
67 | return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); | ||
68 | } | ||
69 | #endif | 24 | #endif |
70 | 25 | ||
71 | void lapic_watchdog_stop(void); | ||
72 | int lapic_watchdog_init(unsigned nmi_hz); | ||
73 | int lapic_wd_event(unsigned nmi_hz); | ||
74 | unsigned lapic_adjust_nmi_hz(unsigned hz); | ||
75 | void disable_lapic_nmi_watchdog(void); | ||
76 | void enable_lapic_nmi_watchdog(void); | ||
77 | void stop_nmi(void); | 26 | void stop_nmi(void); |
78 | void restart_nmi(void); | 27 | void restart_nmi(void); |
79 | 28 | ||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 18e3b8a8709f..7709c12431b8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -112,7 +112,7 @@ static inline void arch_safe_halt(void) | |||
112 | 112 | ||
113 | static inline void halt(void) | 113 | static inline void halt(void) |
114 | { | 114 | { |
115 | PVOP_VCALL0(pv_irq_ops.safe_halt); | 115 | PVOP_VCALL0(pv_irq_ops.halt); |
116 | } | 116 | } |
117 | 117 | ||
118 | static inline void wbinvd(void) | 118 | static inline void wbinvd(void) |
@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) | |||
824 | #define __PV_IS_CALLEE_SAVE(func) \ | 824 | #define __PV_IS_CALLEE_SAVE(func) \ |
825 | ((struct paravirt_callee_save) { func }) | 825 | ((struct paravirt_callee_save) { func }) |
826 | 826 | ||
827 | static inline unsigned long arch_local_save_flags(void) | 827 | static inline notrace unsigned long arch_local_save_flags(void) |
828 | { | 828 | { |
829 | return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); | 829 | return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); |
830 | } | 830 | } |
831 | 831 | ||
832 | static inline void arch_local_irq_restore(unsigned long f) | 832 | static inline notrace void arch_local_irq_restore(unsigned long f) |
833 | { | 833 | { |
834 | PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); | 834 | PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); |
835 | } | 835 | } |
836 | 836 | ||
837 | static inline void arch_local_irq_disable(void) | 837 | static inline notrace void arch_local_irq_disable(void) |
838 | { | 838 | { |
839 | PVOP_VCALLEE0(pv_irq_ops.irq_disable); | 839 | PVOP_VCALLEE0(pv_irq_ops.irq_disable); |
840 | } | 840 | } |
841 | 841 | ||
842 | static inline void arch_local_irq_enable(void) | 842 | static inline notrace void arch_local_irq_enable(void) |
843 | { | 843 | { |
844 | PVOP_VCALLEE0(pv_irq_ops.irq_enable); | 844 | PVOP_VCALLEE0(pv_irq_ops.irq_enable); |
845 | } | 845 | } |
846 | 846 | ||
847 | static inline unsigned long arch_local_irq_save(void) | 847 | static inline notrace unsigned long arch_local_irq_save(void) |
848 | { | 848 | { |
849 | unsigned long f; | 849 | unsigned long f; |
850 | 850 | ||
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ca0437c714b2..676129229630 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start; | |||
65 | 65 | ||
66 | #define PCIBIOS_MIN_CARDBUS_IO 0x4000 | 66 | #define PCIBIOS_MIN_CARDBUS_IO 0x4000 |
67 | 67 | ||
68 | extern int pcibios_enabled; | ||
68 | void pcibios_config_init(void); | 69 | void pcibios_config_init(void); |
69 | struct pci_bus *pcibios_scan_root(int bus); | 70 | struct pci_bus *pcibios_scan_root(int bus); |
70 | 71 | ||
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 550e26b1dbb3..d9d4dae305f6 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -125,7 +125,6 @@ union cpuid10_edx { | |||
125 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ | 125 | #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ |
126 | 126 | ||
127 | #ifdef CONFIG_PERF_EVENTS | 127 | #ifdef CONFIG_PERF_EVENTS |
128 | extern void init_hw_perf_events(void); | ||
129 | extern void perf_events_lapic_init(void); | 128 | extern void perf_events_lapic_init(void); |
130 | 129 | ||
131 | #define PERF_EVENT_INDEX_OFFSET 0 | 130 | #define PERF_EVENT_INDEX_OFFSET 0 |
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); | |||
156 | } | 155 | } |
157 | 156 | ||
158 | #else | 157 | #else |
159 | static inline void init_hw_perf_events(void) { } | ||
160 | static inline void perf_events_lapic_init(void) { } | 158 | static inline void perf_events_lapic_init(void) { } |
161 | #endif | 159 | #endif |
162 | 160 | ||
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index a70cd216be5d..295e2ff18a6a 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS { | |||
744 | }; | 744 | }; |
745 | 745 | ||
746 | /* | 746 | /* |
747 | * P4 PEBS specifics (Replay Event only) | ||
748 | * | ||
749 | * Format (bits): | ||
750 | * 0-6: metric from P4_PEBS_METRIC enum | ||
751 | * 7 : reserved | ||
752 | * 8 : reserved | ||
753 | * 9-11 : reserved | ||
754 | * | ||
755 | * Note we have UOP and PEBS bits reserved for now | 747 | * Note we have UOP and PEBS bits reserved for now |
756 | * just in case if we will need them once | 748 | * just in case if we will need them once |
757 | */ | 749 | */ |
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC { | |||
788 | P4_PEBS_METRIC__max | 780 | P4_PEBS_METRIC__max |
789 | }; | 781 | }; |
790 | 782 | ||
783 | /* | ||
784 | * Notes on internal configuration of ESCR+CCCR tuples | ||
785 | * | ||
786 | * Since P4 has quite the different architecture of | ||
787 | * performance registers in compare with "architectural" | ||
788 | * once and we have on 64 bits to keep configuration | ||
789 | * of performance event, the following trick is used. | ||
790 | * | ||
791 | * 1) Since both ESCR and CCCR registers have only low | ||
792 | * 32 bits valuable, we pack them into a single 64 bit | ||
793 | * configuration. Low 32 bits of such config correspond | ||
794 | * to low 32 bits of CCCR register and high 32 bits | ||
795 | * correspond to low 32 bits of ESCR register. | ||
796 | * | ||
797 | * 2) The meaning of every bit of such config field can | ||
798 | * be found in Intel SDM but it should be noted that | ||
799 | * we "borrow" some reserved bits for own usage and | ||
800 | * clean them or set to a proper value when we do | ||
801 | * a real write to hardware registers. | ||
802 | * | ||
803 | * 3) The format of bits of config is the following | ||
804 | * and should be either 0 or set to some predefined | ||
805 | * values: | ||
806 | * | ||
807 | * Low 32 bits | ||
808 | * ----------- | ||
809 | * 0-6: P4_PEBS_METRIC enum | ||
810 | * 7-11: reserved | ||
811 | * 12: reserved (Enable) | ||
812 | * 13-15: reserved (ESCR select) | ||
813 | * 16-17: Active Thread | ||
814 | * 18: Compare | ||
815 | * 19: Complement | ||
816 | * 20-23: Threshold | ||
817 | * 24: Edge | ||
818 | * 25: reserved (FORCE_OVF) | ||
819 | * 26: reserved (OVF_PMI_T0) | ||
820 | * 27: reserved (OVF_PMI_T1) | ||
821 | * 28-29: reserved | ||
822 | * 30: reserved (Cascade) | ||
823 | * 31: reserved (OVF) | ||
824 | * | ||
825 | * High 32 bits | ||
826 | * ------------ | ||
827 | * 0: reserved (T1_USR) | ||
828 | * 1: reserved (T1_OS) | ||
829 | * 2: reserved (T0_USR) | ||
830 | * 3: reserved (T0_OS) | ||
831 | * 4: Tag Enable | ||
832 | * 5-8: Tag Value | ||
833 | * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper) | ||
834 | * 25-30: enum P4_EVENTS | ||
835 | * 31: reserved (HT thread) | ||
836 | */ | ||
837 | |||
791 | #endif /* PERF_EVENT_P4_H */ | 838 | #endif /* PERF_EVENT_P4_H */ |
792 | 839 | ||
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7f7e577a0e39..31d84acc1512 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -11,6 +11,7 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); | |||
11 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | 11 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, |
12 | struct pvclock_vcpu_time_info *vcpu, | 12 | struct pvclock_vcpu_time_info *vcpu, |
13 | struct timespec *ts); | 13 | struct timespec *ts); |
14 | void pvclock_resume(void); | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 17 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 1def60114906..6c22bf353f26 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h | |||
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void) | |||
48 | setup_IO_APIC(); | 48 | setup_IO_APIC(); |
49 | else { | 49 | else { |
50 | nr_ioapics = 0; | 50 | nr_ioapics = 0; |
51 | localise_nmi_watchdog(); | ||
52 | } | 51 | } |
53 | #endif | 52 | #endif |
54 | } | 53 | } |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 2b16a2ad23dc..52b5c7ed3608 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #define _ASM_X86_STACKTRACE_H | 7 | #define _ASM_X86_STACKTRACE_H |
8 | 8 | ||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <linux/ptrace.h> | ||
10 | 11 | ||
11 | extern int kstack_depth_to_print; | 12 | extern int kstack_depth_to_print; |
12 | 13 | ||
@@ -46,7 +47,7 @@ struct stacktrace_ops { | |||
46 | }; | 47 | }; |
47 | 48 | ||
48 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 49 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
49 | unsigned long *stack, unsigned long bp, | 50 | unsigned long *stack, |
50 | const struct stacktrace_ops *ops, void *data); | 51 | const struct stacktrace_ops *ops, void *data); |
51 | 52 | ||
52 | #ifdef CONFIG_X86_32 | 53 | #ifdef CONFIG_X86_32 |
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
57 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | 58 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) |
58 | #endif | 59 | #endif |
59 | 60 | ||
61 | #ifdef CONFIG_FRAME_POINTER | ||
62 | static inline unsigned long | ||
63 | stack_frame(struct task_struct *task, struct pt_regs *regs) | ||
64 | { | ||
65 | unsigned long bp; | ||
66 | |||
67 | if (regs) | ||
68 | return regs->bp; | ||
69 | |||
70 | if (task == current) { | ||
71 | /* Grab bp right from our regs */ | ||
72 | get_bp(bp); | ||
73 | return bp; | ||
74 | } | ||
75 | |||
76 | /* bp is the last reg pushed by switch_to */ | ||
77 | return *(unsigned long *)task->thread.sp; | ||
78 | } | ||
79 | #else | ||
80 | static inline unsigned long | ||
81 | stack_frame(struct task_struct *task, struct pt_regs *regs) | ||
82 | { | ||
83 | return 0; | ||
84 | } | ||
85 | #endif | ||
86 | |||
60 | extern void | 87 | extern void |
61 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 88 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
62 | unsigned long *stack, unsigned long bp, char *log_lvl); | 89 | unsigned long *stack, char *log_lvl); |
63 | 90 | ||
64 | extern void | 91 | extern void |
65 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
66 | unsigned long *sp, unsigned long bp, char *log_lvl); | 93 | unsigned long *sp, char *log_lvl); |
67 | 94 | ||
68 | extern unsigned int code_bytes; | 95 | extern unsigned int code_bytes; |
69 | 96 | ||
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 5469630b27f5..fa7b9176b76c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -10,12 +10,6 @@ | |||
10 | unsigned long long native_sched_clock(void); | 10 | unsigned long long native_sched_clock(void); |
11 | extern int recalibrate_cpu_khz(void); | 11 | extern int recalibrate_cpu_khz(void); |
12 | 12 | ||
13 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
14 | extern int timer_ack; | ||
15 | #else | ||
16 | # define timer_ack (0) | ||
17 | #endif | ||
18 | |||
19 | extern int no_timer_check; | 13 | extern int no_timer_check; |
20 | 14 | ||
21 | /* Accelerators for sched_clock() | 15 | /* Accelerators for sched_clock() |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 42d412fd8b02..ce1d54c8a433 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -26,20 +26,22 @@ | |||
26 | * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, | 26 | * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, |
27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. | 27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. |
28 | * | 28 | * |
29 | * We will use 31 sets, one for sending BAU messages from each of the 32 | 29 | * We will use one set for sending BAU messages from each of the |
30 | * cpu's on the uvhub. | 30 | * cpu's on the uvhub. |
31 | * | 31 | * |
32 | * TLB shootdown will use the first of the 8 descriptors of each set. | 32 | * TLB shootdown will use the first of the 8 descriptors of each set. |
33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). | 33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define MAX_CPUS_PER_UVHUB 64 | ||
37 | #define MAX_CPUS_PER_SOCKET 32 | ||
38 | #define UV_ADP_SIZE 64 /* hardware-provided max. */ | ||
39 | #define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ | ||
36 | #define UV_ITEMS_PER_DESCRIPTOR 8 | 40 | #define UV_ITEMS_PER_DESCRIPTOR 8 |
37 | /* the 'throttle' to prevent the hardware stay-busy bug */ | 41 | /* the 'throttle' to prevent the hardware stay-busy bug */ |
38 | #define MAX_BAU_CONCURRENT 3 | 42 | #define MAX_BAU_CONCURRENT 3 |
39 | #define UV_CPUS_PER_ACT_STATUS 32 | ||
40 | #define UV_ACT_STATUS_MASK 0x3 | 43 | #define UV_ACT_STATUS_MASK 0x3 |
41 | #define UV_ACT_STATUS_SIZE 2 | 44 | #define UV_ACT_STATUS_SIZE 2 |
42 | #define UV_ADP_SIZE 32 | ||
43 | #define UV_DISTRIBUTION_SIZE 256 | 45 | #define UV_DISTRIBUTION_SIZE 256 |
44 | #define UV_SW_ACK_NPENDING 8 | 46 | #define UV_SW_ACK_NPENDING 8 |
45 | #define UV_NET_ENDPOINT_INTD 0x38 | 47 | #define UV_NET_ENDPOINT_INTD 0x38 |
@@ -100,7 +102,6 @@ | |||
100 | * number of destination side software ack resources | 102 | * number of destination side software ack resources |
101 | */ | 103 | */ |
102 | #define DEST_NUM_RESOURCES 8 | 104 | #define DEST_NUM_RESOURCES 8 |
103 | #define MAX_CPUS_PER_NODE 32 | ||
104 | /* | 105 | /* |
105 | * completion statuses for sending a TLB flush message | 106 | * completion statuses for sending a TLB flush message |
106 | */ | 107 | */ |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index e969f691cbfd..a501741c2335 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -199,6 +199,8 @@ union uvh_apicid { | |||
199 | #define UVH_APICID 0x002D0E00L | 199 | #define UVH_APICID 0x002D0E00L |
200 | #define UV_APIC_PNODE_SHIFT 6 | 200 | #define UV_APIC_PNODE_SHIFT 6 |
201 | 201 | ||
202 | #define UV_APICID_HIBIT_MASK 0xffff0000 | ||
203 | |||
202 | /* Local Bus from cpu's perspective */ | 204 | /* Local Bus from cpu's perspective */ |
203 | #define LOCAL_BUS_BASE 0x1c00000 | 205 | #define LOCAL_BUS_BASE 0x1c00000 |
204 | #define LOCAL_BUS_SIZE (4 * 1024 * 1024) | 206 | #define LOCAL_BUS_SIZE (4 * 1024 * 1024) |
@@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) | |||
491 | } | 493 | } |
492 | } | 494 | } |
493 | 495 | ||
496 | extern unsigned int uv_apicid_hibits; | ||
494 | static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) | 497 | static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) |
495 | { | 498 | { |
499 | apicid |= uv_apicid_hibits; | ||
496 | return (1UL << UVH_IPI_INT_SEND_SHFT) | | 500 | return (1UL << UVH_IPI_INT_SEND_SHFT) | |
497 | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | | 501 | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | |
498 | (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | | 502 | (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 6d90adf4428a..20cafeac7455 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV MMR definitions | 6 | * SGI UV MMR definitions |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _ASM_X86_UV_UV_MMRS_H | 11 | #ifndef _ASM_X86_UV_UV_MMRS_H |
@@ -754,6 +754,23 @@ union uvh_lb_bau_sb_descriptor_base_u { | |||
754 | }; | 754 | }; |
755 | 755 | ||
756 | /* ========================================================================= */ | 756 | /* ========================================================================= */ |
757 | /* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */ | ||
758 | /* ========================================================================= */ | ||
759 | #define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL | ||
760 | #define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0 | ||
761 | |||
762 | #define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 | ||
763 | #define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL | ||
764 | |||
765 | union uvh_lb_target_physical_apic_id_mask_u { | ||
766 | unsigned long v; | ||
767 | struct uvh_lb_target_physical_apic_id_mask_s { | ||
768 | unsigned long bit_enables : 32; /* RW */ | ||
769 | unsigned long rsvd_32_63 : 32; /* */ | ||
770 | } s; | ||
771 | }; | ||
772 | |||
773 | /* ========================================================================= */ | ||
757 | /* UVH_NODE_ID */ | 774 | /* UVH_NODE_ID */ |
758 | /* ========================================================================= */ | 775 | /* ========================================================================= */ |
759 | #define UVH_NODE_ID 0x0UL | 776 | #define UVH_NODE_ID 0x0UL |
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1f0c55..1c10c88ee4e1 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h | |||
@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); | |||
61 | #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) | 61 | #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) |
62 | #endif | 62 | #endif |
63 | 63 | ||
64 | #ifndef machine_to_phys_mapping | 64 | #define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) |
65 | #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) | 65 | #define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) |
66 | #endif | 66 | #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) |
67 | 67 | ||
68 | /* Maximum number of virtual CPUs in multi-processor guests. */ | 68 | /* Maximum number of virtual CPUs in multi-processor guests. */ |
69 | #define MAX_VIRT_CPUS 32 | 69 | #define MAX_VIRT_CPUS 32 |
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e004ae5c..8413688b2571 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h | |||
@@ -32,6 +32,11 @@ | |||
32 | /* And the trap vector is... */ | 32 | /* And the trap vector is... */ |
33 | #define TRAP_INSTR "int $0x82" | 33 | #define TRAP_INSTR "int $0x82" |
34 | 34 | ||
35 | #define __MACH2PHYS_VIRT_START 0xF5800000 | ||
36 | #define __MACH2PHYS_VIRT_END 0xF6800000 | ||
37 | |||
38 | #define __MACH2PHYS_SHIFT 2 | ||
39 | |||
35 | /* | 40 | /* |
36 | * Virtual addresses beyond this are not modifiable by guest OSes. The | 41 | * Virtual addresses beyond this are not modifiable by guest OSes. The |
37 | * machine->physical mapping table starts at this address, read-only. | 42 | * machine->physical mapping table starts at this address, read-only. |
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d2662b97c..839a4811cf98 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h | |||
@@ -39,18 +39,7 @@ | |||
39 | #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 | 39 | #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 |
40 | #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 | 40 | #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 |
41 | #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 | 41 | #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 |
42 | 42 | #define __MACH2PHYS_SHIFT 3 | |
43 | #ifndef HYPERVISOR_VIRT_START | ||
44 | #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) | ||
45 | #define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) | ||
46 | #endif | ||
47 | |||
48 | #define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) | ||
49 | #define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) | ||
50 | #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) | ||
51 | #ifndef machine_to_phys_mapping | ||
52 | #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) | ||
53 | #endif | ||
54 | 43 | ||
55 | /* | 44 | /* |
56 | * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) | 45 | * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c1414b3d5..8760cc60a21c 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
7 | #include <linux/pfn.h> | 7 | #include <linux/pfn.h> |
8 | #include <linux/mm.h> | ||
8 | 9 | ||
9 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
10 | #include <asm/page.h> | 11 | #include <asm/page.h> |
@@ -35,6 +36,8 @@ typedef struct xpaddr { | |||
35 | #define MAX_DOMAIN_PAGES \ | 36 | #define MAX_DOMAIN_PAGES \ |
36 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) | 37 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) |
37 | 38 | ||
39 | extern unsigned long *machine_to_phys_mapping; | ||
40 | extern unsigned int machine_to_phys_order; | ||
38 | 41 | ||
39 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 42 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
40 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
@@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
69 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 72 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
70 | return mfn; | 73 | return mfn; |
71 | 74 | ||
72 | #if 0 | ||
73 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | 75 | if (unlikely((mfn >> machine_to_phys_order) != 0)) |
74 | return max_mapnr; | 76 | return ~0; |
75 | #endif | ||
76 | 77 | ||
77 | pfn = 0; | 78 | pfn = 0; |
78 | /* | 79 | /* |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f60153d5de57..34244b2cd880 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | |||
45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
46 | obj-y += tsc.o io_delay.o rtc.o | 46 | obj-y += tsc.o io_delay.o rtc.o |
47 | obj-y += pci-iommu_table.o | 47 | obj-y += pci-iommu_table.o |
48 | obj-y += resource.o | ||
48 | 49 | ||
49 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 50 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
50 | obj-y += process.o | 51 | obj-y += process.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1a5b9a8e6c4f..ec881c6bfee0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) | |||
198 | { | 198 | { |
199 | unsigned int ver = 0; | 199 | unsigned int ver = 0; |
200 | 200 | ||
201 | if (id >= (MAX_LOCAL_APIC-1)) { | ||
202 | printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); | ||
203 | return; | ||
204 | } | ||
205 | |||
201 | if (!enabled) { | 206 | if (!enabled) { |
202 | ++disabled_cpus; | 207 | ++disabled_cpus; |
203 | return; | 208 | return; |
@@ -898,13 +903,13 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
898 | register_lapic_address(acpi_lapic_addr); | 903 | register_lapic_address(acpi_lapic_addr); |
899 | 904 | ||
900 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, | 905 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, |
901 | acpi_parse_sapic, MAX_APICS); | 906 | acpi_parse_sapic, MAX_LOCAL_APIC); |
902 | 907 | ||
903 | if (!count) { | 908 | if (!count) { |
904 | x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, | 909 | x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, |
905 | acpi_parse_x2apic, MAX_APICS); | 910 | acpi_parse_x2apic, MAX_LOCAL_APIC); |
906 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, | 911 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, |
907 | acpi_parse_lapic, MAX_APICS); | 912 | acpi_parse_lapic, MAX_LOCAL_APIC); |
908 | } | 913 | } |
909 | if (!count && !x2count) { | 914 | if (!count && !x2count) { |
910 | printk(KERN_ERR PREFIX "No LAPIC entries present\n"); | 915 | printk(KERN_ERR PREFIX "No LAPIC entries present\n"); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5079f24c955a..123608531c8f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) | |||
353 | mutex_unlock(&smp_alt); | 353 | mutex_unlock(&smp_alt); |
354 | } | 354 | } |
355 | 355 | ||
356 | bool skip_smp_alternatives; | ||
356 | void alternatives_smp_switch(int smp) | 357 | void alternatives_smp_switch(int smp) |
357 | { | 358 | { |
358 | struct smp_alt_module *mod; | 359 | struct smp_alt_module *mod; |
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp) | |||
368 | printk("lockdep: fixing up alternatives.\n"); | 369 | printk("lockdep: fixing up alternatives.\n"); |
369 | #endif | 370 | #endif |
370 | 371 | ||
371 | if (noreplace_smp || smp_alt_once) | 372 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) |
372 | return; | 373 | return; |
373 | BUG_ON(!smp && (num_online_cpus() > 1)); | 374 | BUG_ON(!smp && (num_online_cpus() > 1)); |
374 | 375 | ||
@@ -591,17 +592,21 @@ static atomic_t stop_machine_first; | |||
591 | static int wrote_text; | 592 | static int wrote_text; |
592 | 593 | ||
593 | struct text_poke_params { | 594 | struct text_poke_params { |
594 | void *addr; | 595 | struct text_poke_param *params; |
595 | const void *opcode; | 596 | int nparams; |
596 | size_t len; | ||
597 | }; | 597 | }; |
598 | 598 | ||
599 | static int __kprobes stop_machine_text_poke(void *data) | 599 | static int __kprobes stop_machine_text_poke(void *data) |
600 | { | 600 | { |
601 | struct text_poke_params *tpp = data; | 601 | struct text_poke_params *tpp = data; |
602 | struct text_poke_param *p; | ||
603 | int i; | ||
602 | 604 | ||
603 | if (atomic_dec_and_test(&stop_machine_first)) { | 605 | if (atomic_dec_and_test(&stop_machine_first)) { |
604 | text_poke(tpp->addr, tpp->opcode, tpp->len); | 606 | for (i = 0; i < tpp->nparams; i++) { |
607 | p = &tpp->params[i]; | ||
608 | text_poke(p->addr, p->opcode, p->len); | ||
609 | } | ||
605 | smp_wmb(); /* Make sure other cpus see that this has run */ | 610 | smp_wmb(); /* Make sure other cpus see that this has run */ |
606 | wrote_text = 1; | 611 | wrote_text = 1; |
607 | } else { | 612 | } else { |
@@ -610,8 +615,12 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
610 | smp_mb(); /* Load wrote_text before following execution */ | 615 | smp_mb(); /* Load wrote_text before following execution */ |
611 | } | 616 | } |
612 | 617 | ||
613 | flush_icache_range((unsigned long)tpp->addr, | 618 | for (i = 0; i < tpp->nparams; i++) { |
614 | (unsigned long)tpp->addr + tpp->len); | 619 | p = &tpp->params[i]; |
620 | flush_icache_range((unsigned long)p->addr, | ||
621 | (unsigned long)p->addr + p->len); | ||
622 | } | ||
623 | |||
615 | return 0; | 624 | return 0; |
616 | } | 625 | } |
617 | 626 | ||
@@ -631,10 +640,13 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
631 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | 640 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) |
632 | { | 641 | { |
633 | struct text_poke_params tpp; | 642 | struct text_poke_params tpp; |
643 | struct text_poke_param p; | ||
634 | 644 | ||
635 | tpp.addr = addr; | 645 | p.addr = addr; |
636 | tpp.opcode = opcode; | 646 | p.opcode = opcode; |
637 | tpp.len = len; | 647 | p.len = len; |
648 | tpp.params = &p; | ||
649 | tpp.nparams = 1; | ||
638 | atomic_set(&stop_machine_first, 1); | 650 | atomic_set(&stop_machine_first, 1); |
639 | wrote_text = 0; | 651 | wrote_text = 0; |
640 | /* Use __stop_machine() because the caller already got online_cpus. */ | 652 | /* Use __stop_machine() because the caller already got online_cpus. */ |
@@ -642,6 +654,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | |||
642 | return addr; | 654 | return addr; |
643 | } | 655 | } |
644 | 656 | ||
657 | /** | ||
658 | * text_poke_smp_batch - Update instructions on a live kernel on SMP | ||
659 | * @params: an array of text_poke parameters | ||
660 | * @n: the number of elements in params. | ||
661 | * | ||
662 | * Modify multi-byte instruction by using stop_machine() on SMP. Since the | ||
663 | * stop_machine() is heavy task, it is better to aggregate text_poke requests | ||
664 | * and do it once if possible. | ||
665 | * | ||
666 | * Note: Must be called under get_online_cpus() and text_mutex. | ||
667 | */ | ||
668 | void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) | ||
669 | { | ||
670 | struct text_poke_params tpp = {.params = params, .nparams = n}; | ||
671 | |||
672 | atomic_set(&stop_machine_first, 1); | ||
673 | wrote_text = 0; | ||
674 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
675 | } | ||
676 | |||
645 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) | 677 | #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) |
646 | 678 | ||
647 | #ifdef CONFIG_X86_64 | 679 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 910f20b457c4..3966b564ea47 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -3,10 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o |
6 | ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) | 6 | obj-y += hw_nmi.o |
7 | obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o | ||
8 | endif | ||
9 | obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o | ||
10 | 7 | ||
11 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 8 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
12 | obj-$(CONFIG_SMP) += ipi.o | 9 | obj-$(CONFIG_SMP) += ipi.o |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c0f6426cd337..ce65d449b750 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/cpu.h> | 32 | #include <linux/cpu.h> |
33 | #include <linux/dmi.h> | 33 | #include <linux/dmi.h> |
34 | #include <linux/nmi.h> | ||
35 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
36 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
37 | 36 | ||
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) | |||
432 | reserved = reserve_eilvt_offset(offset, new); | 431 | reserved = reserve_eilvt_offset(offset, new); |
433 | 432 | ||
434 | if (reserved != new) { | 433 | if (reserved != new) { |
435 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " | 434 | pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " |
436 | "vector 0x%x was already reserved by another core, " | 435 | "vector 0x%x, but the register is already in use for " |
437 | "APIC%lX=0x%x\n", | 436 | "vector 0x%x on another cpu\n", |
438 | smp_processor_id(), new, reserved, reg, old); | 437 | smp_processor_id(), reg, offset, new, reserved); |
439 | return -EINVAL; | 438 | return -EINVAL; |
440 | } | 439 | } |
441 | 440 | ||
442 | if (!eilvt_entry_is_changeable(old, new)) { | 441 | if (!eilvt_entry_is_changeable(old, new)) { |
443 | pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " | 442 | pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " |
444 | "register already in use, APIC%lX=0x%x\n", | 443 | "vector 0x%x, but the register is already in use for " |
445 | smp_processor_id(), new, reg, old); | 444 | "vector 0x%x on this cpu\n", |
445 | smp_processor_id(), reg, offset, new, old); | ||
446 | return -EBUSY; | 446 | return -EBUSY; |
447 | } | 447 | } |
448 | 448 | ||
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void) | |||
799 | * PIT/HPET going. Otherwise register lapic as a dummy | 799 | * PIT/HPET going. Otherwise register lapic as a dummy |
800 | * device. | 800 | * device. |
801 | */ | 801 | */ |
802 | if (nmi_watchdog != NMI_IO_APIC) | 802 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; |
803 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
804 | else | ||
805 | pr_warning("APIC timer registered as dummy," | ||
806 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
807 | 803 | ||
808 | /* Setup the lapic or request the broadcast */ | 804 | /* Setup the lapic or request the broadcast */ |
809 | setup_APIC_timer(); | 805 | setup_APIC_timer(); |
@@ -1384,8 +1380,15 @@ void __cpuinit end_local_APIC_setup(void) | |||
1384 | } | 1380 | } |
1385 | #endif | 1381 | #endif |
1386 | 1382 | ||
1387 | setup_apic_nmi_watchdog(NULL); | ||
1388 | apic_pm_activate(); | 1383 | apic_pm_activate(); |
1384 | |||
1385 | /* | ||
1386 | * Now that local APIC setup is completed for BP, configure the fault | ||
1387 | * handling for interrupt remapping. | ||
1388 | */ | ||
1389 | if (!smp_processor_id() && intr_remapping_enabled) | ||
1390 | enable_drhd_fault_handling(); | ||
1391 | |||
1389 | } | 1392 | } |
1390 | 1393 | ||
1391 | #ifdef CONFIG_X86_X2APIC | 1394 | #ifdef CONFIG_X86_X2APIC |
@@ -1694,7 +1697,7 @@ void __init register_lapic_address(unsigned long address) | |||
1694 | * This initializes the IO-APIC and APIC hardware if this is | 1697 | * This initializes the IO-APIC and APIC hardware if this is |
1695 | * a UP kernel. | 1698 | * a UP kernel. |
1696 | */ | 1699 | */ |
1697 | int apic_version[MAX_APICS]; | 1700 | int apic_version[MAX_LOCAL_APIC]; |
1698 | 1701 | ||
1699 | int __init APIC_init_uniprocessor(void) | 1702 | int __init APIC_init_uniprocessor(void) |
1700 | { | 1703 | { |
@@ -1759,17 +1762,10 @@ int __init APIC_init_uniprocessor(void) | |||
1759 | setup_IO_APIC(); | 1762 | setup_IO_APIC(); |
1760 | else { | 1763 | else { |
1761 | nr_ioapics = 0; | 1764 | nr_ioapics = 0; |
1762 | localise_nmi_watchdog(); | ||
1763 | } | 1765 | } |
1764 | #else | ||
1765 | localise_nmi_watchdog(); | ||
1766 | #endif | 1766 | #endif |
1767 | 1767 | ||
1768 | x86_init.timers.setup_percpu_clockev(); | 1768 | x86_init.timers.setup_percpu_clockev(); |
1769 | #ifdef CONFIG_X86_64 | ||
1770 | check_nmi_watchdog(); | ||
1771 | #endif | ||
1772 | |||
1773 | return 0; | 1769 | return 0; |
1774 | } | 1770 | } |
1775 | 1771 | ||
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd6942f0e9..72ec29e1ae06 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -17,19 +17,31 @@ | |||
17 | #include <linux/nmi.h> | 17 | #include <linux/nmi.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | 19 | ||
20 | /* For reliability, we're prepared to waste bits here. */ | 20 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
21 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
22 | |||
23 | u64 hw_nmi_get_sample_period(void) | 21 | u64 hw_nmi_get_sample_period(void) |
24 | { | 22 | { |
25 | return (u64)(cpu_khz) * 1000 * 60; | 23 | return (u64)(cpu_khz) * 1000 * 60; |
26 | } | 24 | } |
25 | #endif | ||
26 | |||
27 | #ifdef arch_trigger_all_cpu_backtrace | ||
28 | /* For reliability, we're prepared to waste bits here. */ | ||
29 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
30 | |||
31 | /* "in progress" flag of arch_trigger_all_cpu_backtrace */ | ||
32 | static unsigned long backtrace_flag; | ||
27 | 33 | ||
28 | #ifdef ARCH_HAS_NMI_WATCHDOG | ||
29 | void arch_trigger_all_cpu_backtrace(void) | 34 | void arch_trigger_all_cpu_backtrace(void) |
30 | { | 35 | { |
31 | int i; | 36 | int i; |
32 | 37 | ||
38 | if (test_and_set_bit(0, &backtrace_flag)) | ||
39 | /* | ||
40 | * If there is already a trigger_all_cpu_backtrace() in progress | ||
41 | * (backtrace_flag == 1), don't output double cpu dump infos. | ||
42 | */ | ||
43 | return; | ||
44 | |||
33 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | 45 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); |
34 | 46 | ||
35 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | 47 | printk(KERN_INFO "sending NMI to all CPUs:\n"); |
@@ -41,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void) | |||
41 | break; | 53 | break; |
42 | mdelay(1); | 54 | mdelay(1); |
43 | } | 55 | } |
56 | |||
57 | clear_bit(0, &backtrace_flag); | ||
58 | smp_mb__after_clear_bit(); | ||
44 | } | 59 | } |
45 | 60 | ||
46 | static int __kprobes | 61 | static int __kprobes |
@@ -49,7 +64,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | |||
49 | { | 64 | { |
50 | struct die_args *args = __args; | 65 | struct die_args *args = __args; |
51 | struct pt_regs *regs; | 66 | struct pt_regs *regs; |
52 | int cpu = smp_processor_id(); | 67 | int cpu; |
53 | 68 | ||
54 | switch (cmd) { | 69 | switch (cmd) { |
55 | case DIE_NMI: | 70 | case DIE_NMI: |
@@ -61,6 +76,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | |||
61 | } | 76 | } |
62 | 77 | ||
63 | regs = args->regs; | 78 | regs = args->regs; |
79 | cpu = smp_processor_id(); | ||
64 | 80 | ||
65 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | 81 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
66 | static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; | 82 | static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; |
@@ -90,18 +106,3 @@ static int __init register_trigger_all_cpu_backtrace(void) | |||
90 | } | 106 | } |
91 | early_initcall(register_trigger_all_cpu_backtrace); | 107 | early_initcall(register_trigger_all_cpu_backtrace); |
92 | #endif | 108 | #endif |
93 | |||
94 | /* STUB calls to mimic old nmi_watchdog behaviour */ | ||
95 | #if defined(CONFIG_X86_LOCAL_APIC) | ||
96 | unsigned int nmi_watchdog = NMI_NONE; | ||
97 | EXPORT_SYMBOL(nmi_watchdog); | ||
98 | void acpi_nmi_enable(void) { return; } | ||
99 | void acpi_nmi_disable(void) { return; } | ||
100 | #endif | ||
101 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
102 | EXPORT_SYMBOL(nmi_active); | ||
103 | int unknown_nmi_panic; | ||
104 | void cpu_nmi_set_wd_enabled(void) { return; } | ||
105 | void stop_apic_nmi_watchdog(void *unused) { return; } | ||
106 | void setup_apic_nmi_watchdog(void *unused) { return; } | ||
107 | int __init check_nmi_watchdog(void) { return 0; } | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bb61a552d8c6..52735a710c30 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -54,7 +54,6 @@ | |||
54 | #include <asm/dma.h> | 54 | #include <asm/dma.h> |
55 | #include <asm/timer.h> | 55 | #include <asm/timer.h> |
56 | #include <asm/i8259.h> | 56 | #include <asm/i8259.h> |
57 | #include <asm/nmi.h> | ||
58 | #include <asm/msidef.h> | 57 | #include <asm/msidef.h> |
59 | #include <asm/hypertransport.h> | 58 | #include <asm/hypertransport.h> |
60 | #include <asm/setup.h> | 59 | #include <asm/setup.h> |
@@ -2458,13 +2457,12 @@ static void ack_apic_level(struct irq_data *data) | |||
2458 | { | 2457 | { |
2459 | struct irq_cfg *cfg = data->chip_data; | 2458 | struct irq_cfg *cfg = data->chip_data; |
2460 | int i, do_unmask_irq = 0, irq = data->irq; | 2459 | int i, do_unmask_irq = 0, irq = data->irq; |
2461 | struct irq_desc *desc = irq_to_desc(irq); | ||
2462 | unsigned long v; | 2460 | unsigned long v; |
2463 | 2461 | ||
2464 | irq_complete_move(cfg); | 2462 | irq_complete_move(cfg); |
2465 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2463 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
2466 | /* If we are moving the irq we need to mask it */ | 2464 | /* If we are moving the irq we need to mask it */ |
2467 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { | 2465 | if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { |
2468 | do_unmask_irq = 1; | 2466 | do_unmask_irq = 1; |
2469 | mask_ioapic(cfg); | 2467 | mask_ioapic(cfg); |
2470 | } | 2468 | } |
@@ -2671,24 +2669,6 @@ static void lapic_register_intr(int irq) | |||
2671 | "edge"); | 2669 | "edge"); |
2672 | } | 2670 | } |
2673 | 2671 | ||
2674 | static void __init setup_nmi(void) | ||
2675 | { | ||
2676 | /* | ||
2677 | * Dirty trick to enable the NMI watchdog ... | ||
2678 | * We put the 8259A master into AEOI mode and | ||
2679 | * unmask on all local APICs LVT0 as NMI. | ||
2680 | * | ||
2681 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | ||
2682 | * is from Maciej W. Rozycki - so we do not have to EOI from | ||
2683 | * the NMI handler or the timer interrupt. | ||
2684 | */ | ||
2685 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | ||
2686 | |||
2687 | enable_NMI_through_LVT0(); | ||
2688 | |||
2689 | apic_printk(APIC_VERBOSE, " done.\n"); | ||
2690 | } | ||
2691 | |||
2692 | /* | 2672 | /* |
2693 | * This looks a bit hackish but it's about the only one way of sending | 2673 | * This looks a bit hackish but it's about the only one way of sending |
2694 | * a few INTA cycles to 8259As and any associated glue logic. ICR does | 2674 | * a few INTA cycles to 8259As and any associated glue logic. ICR does |
@@ -2794,15 +2774,6 @@ static inline void __init check_timer(void) | |||
2794 | */ | 2774 | */ |
2795 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2775 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2796 | legacy_pic->init(1); | 2776 | legacy_pic->init(1); |
2797 | #ifdef CONFIG_X86_32 | ||
2798 | { | ||
2799 | unsigned int ver; | ||
2800 | |||
2801 | ver = apic_read(APIC_LVR); | ||
2802 | ver = GET_APIC_VERSION(ver); | ||
2803 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
2804 | } | ||
2805 | #endif | ||
2806 | 2777 | ||
2807 | pin1 = find_isa_irq_pin(0, mp_INT); | 2778 | pin1 = find_isa_irq_pin(0, mp_INT); |
2808 | apic1 = find_isa_irq_apic(0, mp_INT); | 2779 | apic1 = find_isa_irq_apic(0, mp_INT); |
@@ -2850,10 +2821,6 @@ static inline void __init check_timer(void) | |||
2850 | unmask_ioapic(cfg); | 2821 | unmask_ioapic(cfg); |
2851 | } | 2822 | } |
2852 | if (timer_irq_works()) { | 2823 | if (timer_irq_works()) { |
2853 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2854 | setup_nmi(); | ||
2855 | legacy_pic->unmask(0); | ||
2856 | } | ||
2857 | if (disable_timer_pin_1 > 0) | 2824 | if (disable_timer_pin_1 > 0) |
2858 | clear_IO_APIC_pin(0, pin1); | 2825 | clear_IO_APIC_pin(0, pin1); |
2859 | goto out; | 2826 | goto out; |
@@ -2879,11 +2846,6 @@ static inline void __init check_timer(void) | |||
2879 | if (timer_irq_works()) { | 2846 | if (timer_irq_works()) { |
2880 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2847 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
2881 | timer_through_8259 = 1; | 2848 | timer_through_8259 = 1; |
2882 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2883 | legacy_pic->mask(0); | ||
2884 | setup_nmi(); | ||
2885 | legacy_pic->unmask(0); | ||
2886 | } | ||
2887 | goto out; | 2849 | goto out; |
2888 | } | 2850 | } |
2889 | /* | 2851 | /* |
@@ -2895,15 +2857,6 @@ static inline void __init check_timer(void) | |||
2895 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 2857 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
2896 | } | 2858 | } |
2897 | 2859 | ||
2898 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2899 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " | ||
2900 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
2901 | nmi_watchdog = NMI_NONE; | ||
2902 | } | ||
2903 | #ifdef CONFIG_X86_32 | ||
2904 | timer_ack = 0; | ||
2905 | #endif | ||
2906 | |||
2907 | apic_printk(APIC_QUIET, KERN_INFO | 2860 | apic_printk(APIC_QUIET, KERN_INFO |
2908 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 2861 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
2909 | 2862 | ||
@@ -3441,6 +3394,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
3441 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3394 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
3442 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3395 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
3443 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3396 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3397 | msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); | ||
3444 | 3398 | ||
3445 | dmar_msi_write(irq, &msg); | 3399 | dmar_msi_write(irq, &msg); |
3446 | 3400 | ||
@@ -4133,7 +4087,8 @@ void __init pre_init_apic_IRQ0(void) | |||
4133 | 4087 | ||
4134 | printk(KERN_INFO "Early APIC setup for system timer0\n"); | 4088 | printk(KERN_INFO "Early APIC setup for system timer0\n"); |
4135 | #ifndef CONFIG_SMP | 4089 | #ifndef CONFIG_SMP |
4136 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | 4090 | physid_set_mask_of_physid(boot_cpu_physical_apicid, |
4091 | &phys_cpu_present_map); | ||
4137 | #endif | 4092 | #endif |
4138 | /* Make sure the irq descriptor is set up */ | 4093 | /* Make sure the irq descriptor is set up */ |
4139 | cfg = alloc_irq_and_cfg_at(0, 0); | 4094 | cfg = alloc_irq_and_cfg_at(0, 0); |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c deleted file mode 100644 index c90041ccb742..000000000000 --- a/arch/x86/kernel/apic/nmi.c +++ /dev/null | |||
@@ -1,567 +0,0 @@ | |||
1 | /* | ||
2 | * NMI watchdog support on APIC systems | ||
3 | * | ||
4 | * Started by Ingo Molnar <mingo@redhat.com> | ||
5 | * | ||
6 | * Fixes: | ||
7 | * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. | ||
8 | * Mikael Pettersson : Power Management for local APIC NMI watchdog. | ||
9 | * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. | ||
10 | * Pavel Machek and | ||
11 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. | ||
12 | */ | ||
13 | |||
14 | #include <asm/apic.h> | ||
15 | |||
16 | #include <linux/nmi.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/delay.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/sysdev.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/percpu.h> | ||
25 | #include <linux/kprobes.h> | ||
26 | #include <linux/cpumask.h> | ||
27 | #include <linux/kernel_stat.h> | ||
28 | #include <linux/kdebug.h> | ||
29 | #include <linux/smp.h> | ||
30 | |||
31 | #include <asm/i8259.h> | ||
32 | #include <asm/io_apic.h> | ||
33 | #include <asm/proto.h> | ||
34 | #include <asm/timer.h> | ||
35 | |||
36 | #include <asm/mce.h> | ||
37 | |||
38 | #include <asm/mach_traps.h> | ||
39 | |||
40 | int unknown_nmi_panic; | ||
41 | int nmi_watchdog_enabled; | ||
42 | |||
43 | /* For reliability, we're prepared to waste bits here. */ | ||
44 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
45 | |||
46 | /* nmi_active: | ||
47 | * >0: the lapic NMI watchdog is active, but can be disabled | ||
48 | * <0: the lapic NMI watchdog has not been set up, and cannot | ||
49 | * be enabled | ||
50 | * 0: the lapic NMI watchdog is disabled, but can be enabled | ||
51 | */ | ||
52 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | ||
53 | EXPORT_SYMBOL(nmi_active); | ||
54 | |||
55 | unsigned int nmi_watchdog = NMI_NONE; | ||
56 | EXPORT_SYMBOL(nmi_watchdog); | ||
57 | |||
58 | static int panic_on_timeout; | ||
59 | |||
60 | static unsigned int nmi_hz = HZ; | ||
61 | static DEFINE_PER_CPU(short, wd_enabled); | ||
62 | static int endflag __initdata; | ||
63 | |||
64 | static inline unsigned int get_nmi_count(int cpu) | ||
65 | { | ||
66 | return per_cpu(irq_stat, cpu).__nmi_count; | ||
67 | } | ||
68 | |||
69 | static inline int mce_in_progress(void) | ||
70 | { | ||
71 | #if defined(CONFIG_X86_MCE) | ||
72 | return atomic_read(&mce_entry) > 0; | ||
73 | #endif | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Take the local apic timer and PIT/HPET into account. We don't | ||
79 | * know which one is active, when we have highres/dyntick on | ||
80 | */ | ||
81 | static inline unsigned int get_timer_irqs(int cpu) | ||
82 | { | ||
83 | return per_cpu(irq_stat, cpu).apic_timer_irqs + | ||
84 | per_cpu(irq_stat, cpu).irq0_irqs; | ||
85 | } | ||
86 | |||
87 | #ifdef CONFIG_SMP | ||
88 | /* | ||
89 | * The performance counters used by NMI_LOCAL_APIC don't trigger when | ||
90 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | ||
91 | * CPUs during the test make them busy. | ||
92 | */ | ||
93 | static __init void nmi_cpu_busy(void *data) | ||
94 | { | ||
95 | local_irq_enable_in_hardirq(); | ||
96 | /* | ||
97 | * Intentionally don't use cpu_relax here. This is | ||
98 | * to make sure that the performance counter really ticks, | ||
99 | * even if there is a simulator or similar that catches the | ||
100 | * pause instruction. On a real HT machine this is fine because | ||
101 | * all other CPUs are busy with "useless" delay loops and don't | ||
102 | * care if they get somewhat less cycles. | ||
103 | */ | ||
104 | while (endflag == 0) | ||
105 | mb(); | ||
106 | } | ||
107 | #endif | ||
108 | |||
109 | static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) | ||
110 | { | ||
111 | printk(KERN_CONT "\n"); | ||
112 | |||
113 | printk(KERN_WARNING | ||
114 | "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
115 | cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); | ||
116 | |||
117 | printk(KERN_WARNING | ||
118 | "Please report this to bugzilla.kernel.org,\n"); | ||
119 | printk(KERN_WARNING | ||
120 | "and attach the output of the 'dmesg' command.\n"); | ||
121 | |||
122 | per_cpu(wd_enabled, cpu) = 0; | ||
123 | atomic_dec(&nmi_active); | ||
124 | } | ||
125 | |||
126 | static void __acpi_nmi_disable(void *__unused) | ||
127 | { | ||
128 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
129 | } | ||
130 | |||
131 | int __init check_nmi_watchdog(void) | ||
132 | { | ||
133 | unsigned int *prev_nmi_count; | ||
134 | int cpu; | ||
135 | |||
136 | if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) | ||
137 | return 0; | ||
138 | |||
139 | prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); | ||
140 | if (!prev_nmi_count) | ||
141 | goto error; | ||
142 | |||
143 | printk(KERN_INFO "Testing NMI watchdog ... "); | ||
144 | |||
145 | #ifdef CONFIG_SMP | ||
146 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
147 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); | ||
148 | #endif | ||
149 | |||
150 | for_each_possible_cpu(cpu) | ||
151 | prev_nmi_count[cpu] = get_nmi_count(cpu); | ||
152 | local_irq_enable(); | ||
153 | mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ | ||
154 | |||
155 | for_each_online_cpu(cpu) { | ||
156 | if (!per_cpu(wd_enabled, cpu)) | ||
157 | continue; | ||
158 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) | ||
159 | report_broken_nmi(cpu, prev_nmi_count); | ||
160 | } | ||
161 | endflag = 1; | ||
162 | if (!atomic_read(&nmi_active)) { | ||
163 | kfree(prev_nmi_count); | ||
164 | atomic_set(&nmi_active, -1); | ||
165 | goto error; | ||
166 | } | ||
167 | printk("OK.\n"); | ||
168 | |||
169 | /* | ||
170 | * now that we know it works we can reduce NMI frequency to | ||
171 | * something more reasonable; makes a difference in some configs | ||
172 | */ | ||
173 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
174 | nmi_hz = lapic_adjust_nmi_hz(1); | ||
175 | |||
176 | kfree(prev_nmi_count); | ||
177 | return 0; | ||
178 | error: | ||
179 | if (nmi_watchdog == NMI_IO_APIC) { | ||
180 | if (!timer_through_8259) | ||
181 | legacy_pic->mask(0); | ||
182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
183 | } | ||
184 | |||
185 | #ifdef CONFIG_X86_32 | ||
186 | timer_ack = 0; | ||
187 | #endif | ||
188 | return -1; | ||
189 | } | ||
190 | |||
191 | static int __init setup_nmi_watchdog(char *str) | ||
192 | { | ||
193 | unsigned int nmi; | ||
194 | |||
195 | if (!strncmp(str, "panic", 5)) { | ||
196 | panic_on_timeout = 1; | ||
197 | str = strchr(str, ','); | ||
198 | if (!str) | ||
199 | return 1; | ||
200 | ++str; | ||
201 | } | ||
202 | |||
203 | if (!strncmp(str, "lapic", 5)) | ||
204 | nmi_watchdog = NMI_LOCAL_APIC; | ||
205 | else if (!strncmp(str, "ioapic", 6)) | ||
206 | nmi_watchdog = NMI_IO_APIC; | ||
207 | else { | ||
208 | get_option(&str, &nmi); | ||
209 | if (nmi >= NMI_INVALID) | ||
210 | return 0; | ||
211 | nmi_watchdog = nmi; | ||
212 | } | ||
213 | |||
214 | return 1; | ||
215 | } | ||
216 | __setup("nmi_watchdog=", setup_nmi_watchdog); | ||
217 | |||
218 | /* | ||
219 | * Suspend/resume support | ||
220 | */ | ||
221 | #ifdef CONFIG_PM | ||
222 | |||
223 | static int nmi_pm_active; /* nmi_active before suspend */ | ||
224 | |||
225 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | ||
226 | { | ||
227 | /* only CPU0 goes here, other CPUs should be offline */ | ||
228 | nmi_pm_active = atomic_read(&nmi_active); | ||
229 | stop_apic_nmi_watchdog(NULL); | ||
230 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int lapic_nmi_resume(struct sys_device *dev) | ||
235 | { | ||
236 | /* only CPU0 goes here, other CPUs should be offline */ | ||
237 | if (nmi_pm_active > 0) { | ||
238 | setup_apic_nmi_watchdog(NULL); | ||
239 | touch_nmi_watchdog(); | ||
240 | } | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static struct sysdev_class nmi_sysclass = { | ||
245 | .name = "lapic_nmi", | ||
246 | .resume = lapic_nmi_resume, | ||
247 | .suspend = lapic_nmi_suspend, | ||
248 | }; | ||
249 | |||
250 | static struct sys_device device_lapic_nmi = { | ||
251 | .id = 0, | ||
252 | .cls = &nmi_sysclass, | ||
253 | }; | ||
254 | |||
255 | static int __init init_lapic_nmi_sysfs(void) | ||
256 | { | ||
257 | int error; | ||
258 | |||
259 | /* | ||
260 | * should really be a BUG_ON but b/c this is an | ||
261 | * init call, it just doesn't work. -dcz | ||
262 | */ | ||
263 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
264 | return 0; | ||
265 | |||
266 | if (atomic_read(&nmi_active) < 0) | ||
267 | return 0; | ||
268 | |||
269 | error = sysdev_class_register(&nmi_sysclass); | ||
270 | if (!error) | ||
271 | error = sysdev_register(&device_lapic_nmi); | ||
272 | return error; | ||
273 | } | ||
274 | |||
275 | /* must come after the local APIC's device_initcall() */ | ||
276 | late_initcall(init_lapic_nmi_sysfs); | ||
277 | |||
278 | #endif /* CONFIG_PM */ | ||
279 | |||
280 | static void __acpi_nmi_enable(void *__unused) | ||
281 | { | ||
282 | apic_write(APIC_LVT0, APIC_DM_NMI); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Enable timer based NMIs on all CPUs: | ||
287 | */ | ||
288 | void acpi_nmi_enable(void) | ||
289 | { | ||
290 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
291 | on_each_cpu(__acpi_nmi_enable, NULL, 1); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Disable timer based NMIs on all CPUs: | ||
296 | */ | ||
297 | void acpi_nmi_disable(void) | ||
298 | { | ||
299 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
300 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * This function is called as soon the LAPIC NMI watchdog driver has everything | ||
305 | * in place and it's ready to check if the NMIs belong to the NMI watchdog | ||
306 | */ | ||
307 | void cpu_nmi_set_wd_enabled(void) | ||
308 | { | ||
309 | __get_cpu_var(wd_enabled) = 1; | ||
310 | } | ||
311 | |||
312 | void setup_apic_nmi_watchdog(void *unused) | ||
313 | { | ||
314 | if (__get_cpu_var(wd_enabled)) | ||
315 | return; | ||
316 | |||
317 | /* cheap hack to support suspend/resume */ | ||
318 | /* if cpu0 is not active neither should the other cpus */ | ||
319 | if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) | ||
320 | return; | ||
321 | |||
322 | switch (nmi_watchdog) { | ||
323 | case NMI_LOCAL_APIC: | ||
324 | if (lapic_watchdog_init(nmi_hz) < 0) { | ||
325 | __get_cpu_var(wd_enabled) = 0; | ||
326 | return; | ||
327 | } | ||
328 | /* FALL THROUGH */ | ||
329 | case NMI_IO_APIC: | ||
330 | __get_cpu_var(wd_enabled) = 1; | ||
331 | atomic_inc(&nmi_active); | ||
332 | } | ||
333 | } | ||
334 | |||
335 | void stop_apic_nmi_watchdog(void *unused) | ||
336 | { | ||
337 | /* only support LOCAL and IO APICs for now */ | ||
338 | if (!nmi_watchdog_active()) | ||
339 | return; | ||
340 | if (__get_cpu_var(wd_enabled) == 0) | ||
341 | return; | ||
342 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
343 | lapic_watchdog_stop(); | ||
344 | else | ||
345 | __acpi_nmi_disable(NULL); | ||
346 | __get_cpu_var(wd_enabled) = 0; | ||
347 | atomic_dec(&nmi_active); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * the best way to detect whether a CPU has a 'hard lockup' problem | ||
352 | * is to check it's local APIC timer IRQ counts. If they are not | ||
353 | * changing then that CPU has some problem. | ||
354 | * | ||
355 | * as these watchdog NMI IRQs are generated on every CPU, we only | ||
356 | * have to check the current processor. | ||
357 | * | ||
358 | * since NMIs don't listen to _any_ locks, we have to be extremely | ||
359 | * careful not to rely on unsafe variables. The printk might lock | ||
360 | * up though, so we have to break up any console locks first ... | ||
361 | * [when there will be more tty-related locks, break them up here too!] | ||
362 | */ | ||
363 | |||
364 | static DEFINE_PER_CPU(unsigned, last_irq_sum); | ||
365 | static DEFINE_PER_CPU(long, alert_counter); | ||
366 | static DEFINE_PER_CPU(int, nmi_touch); | ||
367 | |||
368 | void touch_nmi_watchdog(void) | ||
369 | { | ||
370 | if (nmi_watchdog_active()) { | ||
371 | unsigned cpu; | ||
372 | |||
373 | /* | ||
374 | * Tell other CPUs to reset their alert counters. We cannot | ||
375 | * do it ourselves because the alert count increase is not | ||
376 | * atomic. | ||
377 | */ | ||
378 | for_each_present_cpu(cpu) { | ||
379 | if (per_cpu(nmi_touch, cpu) != 1) | ||
380 | per_cpu(nmi_touch, cpu) = 1; | ||
381 | } | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Tickle the softlockup detector too: | ||
386 | */ | ||
387 | touch_softlockup_watchdog(); | ||
388 | } | ||
389 | EXPORT_SYMBOL(touch_nmi_watchdog); | ||
390 | |||
391 | notrace __kprobes int | ||
392 | nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | ||
393 | { | ||
394 | /* | ||
395 | * Since current_thread_info()-> is always on the stack, and we | ||
396 | * always switch the stack NMI-atomically, it's safe to use | ||
397 | * smp_processor_id(). | ||
398 | */ | ||
399 | unsigned int sum; | ||
400 | int touched = 0; | ||
401 | int cpu = smp_processor_id(); | ||
402 | int rc = 0; | ||
403 | |||
404 | sum = get_timer_irqs(cpu); | ||
405 | |||
406 | if (__get_cpu_var(nmi_touch)) { | ||
407 | __get_cpu_var(nmi_touch) = 0; | ||
408 | touched = 1; | ||
409 | } | ||
410 | |||
411 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | ||
412 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | ||
413 | static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ | ||
414 | |||
415 | raw_spin_lock(&lock); | ||
416 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | ||
417 | show_regs(regs); | ||
418 | dump_stack(); | ||
419 | raw_spin_unlock(&lock); | ||
420 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | ||
421 | |||
422 | rc = 1; | ||
423 | } | ||
424 | |||
425 | /* Could check oops_in_progress here too, but it's safer not to */ | ||
426 | if (mce_in_progress()) | ||
427 | touched = 1; | ||
428 | |||
429 | /* if the none of the timers isn't firing, this cpu isn't doing much */ | ||
430 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | ||
431 | /* | ||
432 | * Ayiee, looks like this CPU is stuck ... | ||
433 | * wait a few IRQs (5 seconds) before doing the oops ... | ||
434 | */ | ||
435 | __this_cpu_inc(alert_counter); | ||
436 | if (__this_cpu_read(alert_counter) == 5 * nmi_hz) | ||
437 | /* | ||
438 | * die_nmi will return ONLY if NOTIFY_STOP happens.. | ||
439 | */ | ||
440 | die_nmi("BUG: NMI Watchdog detected LOCKUP", | ||
441 | regs, panic_on_timeout); | ||
442 | } else { | ||
443 | __get_cpu_var(last_irq_sum) = sum; | ||
444 | __this_cpu_write(alert_counter, 0); | ||
445 | } | ||
446 | |||
447 | /* see if the nmi watchdog went off */ | ||
448 | if (!__get_cpu_var(wd_enabled)) | ||
449 | return rc; | ||
450 | switch (nmi_watchdog) { | ||
451 | case NMI_LOCAL_APIC: | ||
452 | rc |= lapic_wd_event(nmi_hz); | ||
453 | break; | ||
454 | case NMI_IO_APIC: | ||
455 | /* | ||
456 | * don't know how to accurately check for this. | ||
457 | * just assume it was a watchdog timer interrupt | ||
458 | * This matches the old behaviour. | ||
459 | */ | ||
460 | rc = 1; | ||
461 | break; | ||
462 | } | ||
463 | return rc; | ||
464 | } | ||
465 | |||
466 | #ifdef CONFIG_SYSCTL | ||
467 | |||
468 | static void enable_ioapic_nmi_watchdog_single(void *unused) | ||
469 | { | ||
470 | __get_cpu_var(wd_enabled) = 1; | ||
471 | atomic_inc(&nmi_active); | ||
472 | __acpi_nmi_enable(NULL); | ||
473 | } | ||
474 | |||
475 | static void enable_ioapic_nmi_watchdog(void) | ||
476 | { | ||
477 | on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); | ||
478 | touch_nmi_watchdog(); | ||
479 | } | ||
480 | |||
481 | static void disable_ioapic_nmi_watchdog(void) | ||
482 | { | ||
483 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
484 | } | ||
485 | |||
486 | static int __init setup_unknown_nmi_panic(char *str) | ||
487 | { | ||
488 | unknown_nmi_panic = 1; | ||
489 | return 1; | ||
490 | } | ||
491 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
492 | |||
493 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | ||
494 | { | ||
495 | unsigned char reason = get_nmi_reason(); | ||
496 | char buf[64]; | ||
497 | |||
498 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | ||
499 | die_nmi(buf, regs, 1); /* Always panic here */ | ||
500 | return 0; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * proc handler for /proc/sys/kernel/nmi | ||
505 | */ | ||
506 | int proc_nmi_enabled(struct ctl_table *table, int write, | ||
507 | void __user *buffer, size_t *length, loff_t *ppos) | ||
508 | { | ||
509 | int old_state; | ||
510 | |||
511 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; | ||
512 | old_state = nmi_watchdog_enabled; | ||
513 | proc_dointvec(table, write, buffer, length, ppos); | ||
514 | if (!!old_state == !!nmi_watchdog_enabled) | ||
515 | return 0; | ||
516 | |||
517 | if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { | ||
518 | printk(KERN_WARNING | ||
519 | "NMI watchdog is permanently disabled\n"); | ||
520 | return -EIO; | ||
521 | } | ||
522 | |||
523 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
524 | if (nmi_watchdog_enabled) | ||
525 | enable_lapic_nmi_watchdog(); | ||
526 | else | ||
527 | disable_lapic_nmi_watchdog(); | ||
528 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
529 | if (nmi_watchdog_enabled) | ||
530 | enable_ioapic_nmi_watchdog(); | ||
531 | else | ||
532 | disable_ioapic_nmi_watchdog(); | ||
533 | } else { | ||
534 | printk(KERN_WARNING | ||
535 | "NMI watchdog doesn't know what hardware to touch\n"); | ||
536 | return -EIO; | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | #endif /* CONFIG_SYSCTL */ | ||
542 | |||
543 | int do_nmi_callback(struct pt_regs *regs, int cpu) | ||
544 | { | ||
545 | #ifdef CONFIG_SYSCTL | ||
546 | if (unknown_nmi_panic) | ||
547 | return unknown_nmi_panic_callback(regs, cpu); | ||
548 | #endif | ||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | void arch_trigger_all_cpu_backtrace(void) | ||
553 | { | ||
554 | int i; | ||
555 | |||
556 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | ||
557 | |||
558 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
559 | apic->send_IPI_all(NMI_VECTOR); | ||
560 | |||
561 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
562 | for (i = 0; i < 10 * 1000; i++) { | ||
563 | if (cpumask_empty(to_cpumask(backtrace_mask))) | ||
564 | break; | ||
565 | mdelay(1); | ||
566 | } | ||
567 | } | ||
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index f9e4e6a54073..d8c4a6feb286 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void) | |||
79 | /* need to update phys_pkg_id */ | 79 | /* need to update phys_pkg_id */ |
80 | apic->phys_pkg_id = apicid_phys_pkg_id; | 80 | apic->phys_pkg_id = apicid_phys_pkg_id; |
81 | } | 81 | } |
82 | |||
83 | /* | ||
84 | * Now that apic routing model is selected, configure the | ||
85 | * fault handling for intr remapping. | ||
86 | */ | ||
87 | if (intr_remapping_enabled) | ||
88 | enable_drhd_fault_handling(); | ||
89 | } | 82 | } |
90 | 83 | ||
91 | /* Same for both flat and physical. */ | 84 | /* Same for both flat and physical. */ |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 194539aea175..2a3f2a7db243 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -44,8 +44,20 @@ static u64 gru_start_paddr, gru_end_paddr; | |||
44 | static union uvh_apicid uvh_apicid; | 44 | static union uvh_apicid uvh_apicid; |
45 | int uv_min_hub_revision_id; | 45 | int uv_min_hub_revision_id; |
46 | EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); | 46 | EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); |
47 | unsigned int uv_apicid_hibits; | ||
48 | EXPORT_SYMBOL_GPL(uv_apicid_hibits); | ||
47 | static DEFINE_SPINLOCK(uv_nmi_lock); | 49 | static DEFINE_SPINLOCK(uv_nmi_lock); |
48 | 50 | ||
51 | static unsigned long __init uv_early_read_mmr(unsigned long addr) | ||
52 | { | ||
53 | unsigned long val, *mmr; | ||
54 | |||
55 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr)); | ||
56 | val = *mmr; | ||
57 | early_iounmap(mmr, sizeof(*mmr)); | ||
58 | return val; | ||
59 | } | ||
60 | |||
49 | static inline bool is_GRU_range(u64 start, u64 end) | 61 | static inline bool is_GRU_range(u64 start, u64 end) |
50 | { | 62 | { |
51 | return start >= gru_start_paddr && end <= gru_end_paddr; | 63 | return start >= gru_start_paddr && end <= gru_end_paddr; |
@@ -56,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end) | |||
56 | return is_ISA_range(start, end) || is_GRU_range(start, end); | 68 | return is_ISA_range(start, end) || is_GRU_range(start, end); |
57 | } | 69 | } |
58 | 70 | ||
59 | static int early_get_nodeid(void) | 71 | static int __init early_get_pnodeid(void) |
60 | { | 72 | { |
61 | union uvh_node_id_u node_id; | 73 | union uvh_node_id_u node_id; |
62 | unsigned long *mmr; | 74 | union uvh_rh_gam_config_mmr_u m_n_config; |
63 | 75 | int pnode; | |
64 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); | ||
65 | node_id.v = *mmr; | ||
66 | early_iounmap(mmr, sizeof(*mmr)); | ||
67 | 76 | ||
68 | /* Currently, all blades have same revision number */ | 77 | /* Currently, all blades have same revision number */ |
78 | node_id.v = uv_early_read_mmr(UVH_NODE_ID); | ||
79 | m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); | ||
69 | uv_min_hub_revision_id = node_id.s.revision; | 80 | uv_min_hub_revision_id = node_id.s.revision; |
70 | 81 | ||
71 | return node_id.s.node_id; | 82 | pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); |
83 | return pnode; | ||
72 | } | 84 | } |
73 | 85 | ||
74 | static void __init early_get_apic_pnode_shift(void) | 86 | static void __init early_get_apic_pnode_shift(void) |
75 | { | 87 | { |
76 | unsigned long *mmr; | 88 | uvh_apicid.v = uv_early_read_mmr(UVH_APICID); |
77 | |||
78 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr)); | ||
79 | uvh_apicid.v = *mmr; | ||
80 | early_iounmap(mmr, sizeof(*mmr)); | ||
81 | if (!uvh_apicid.v) | 89 | if (!uvh_apicid.v) |
82 | /* | 90 | /* |
83 | * Old bios, use default value | 91 | * Old bios, use default value |
@@ -85,12 +93,25 @@ static void __init early_get_apic_pnode_shift(void) | |||
85 | uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; | 93 | uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; |
86 | } | 94 | } |
87 | 95 | ||
96 | /* | ||
97 | * Add an extra bit as dictated by bios to the destination apicid of | ||
98 | * interrupts potentially passing through the UV HUB. This prevents | ||
99 | * a deadlock between interrupts and IO port operations. | ||
100 | */ | ||
101 | static void __init uv_set_apicid_hibit(void) | ||
102 | { | ||
103 | union uvh_lb_target_physical_apic_id_mask_u apicid_mask; | ||
104 | |||
105 | apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK); | ||
106 | uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; | ||
107 | } | ||
108 | |||
88 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 109 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
89 | { | 110 | { |
90 | int nodeid; | 111 | int pnodeid; |
91 | 112 | ||
92 | if (!strcmp(oem_id, "SGI")) { | 113 | if (!strcmp(oem_id, "SGI")) { |
93 | nodeid = early_get_nodeid(); | 114 | pnodeid = early_get_pnodeid(); |
94 | early_get_apic_pnode_shift(); | 115 | early_get_apic_pnode_shift(); |
95 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | 116 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; |
96 | x86_platform.nmi_init = uv_nmi_init; | 117 | x86_platform.nmi_init = uv_nmi_init; |
@@ -100,8 +121,9 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
100 | uv_system_type = UV_X2APIC; | 121 | uv_system_type = UV_X2APIC; |
101 | else if (!strcmp(oem_table_id, "UVH")) { | 122 | else if (!strcmp(oem_table_id, "UVH")) { |
102 | __get_cpu_var(x2apic_extra_bits) = | 123 | __get_cpu_var(x2apic_extra_bits) = |
103 | nodeid << (uvh_apicid.s.pnode_shift - 1); | 124 | pnodeid << uvh_apicid.s.pnode_shift; |
104 | uv_system_type = UV_NON_UNIQUE_APIC; | 125 | uv_system_type = UV_NON_UNIQUE_APIC; |
126 | uv_set_apicid_hibit(); | ||
105 | return 1; | 127 | return 1; |
106 | } | 128 | } |
107 | } | 129 | } |
@@ -155,6 +177,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri | |||
155 | int pnode; | 177 | int pnode; |
156 | 178 | ||
157 | pnode = uv_apicid_to_pnode(phys_apicid); | 179 | pnode = uv_apicid_to_pnode(phys_apicid); |
180 | phys_apicid |= uv_apicid_hibits; | ||
158 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 181 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
159 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 182 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
160 | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | | 183 | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | |
@@ -236,7 +259,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) | |||
236 | int cpu = cpumask_first(cpumask); | 259 | int cpu = cpumask_first(cpumask); |
237 | 260 | ||
238 | if ((unsigned)cpu < nr_cpu_ids) | 261 | if ((unsigned)cpu < nr_cpu_ids) |
239 | return per_cpu(x86_cpu_to_apicid, cpu); | 262 | return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; |
240 | else | 263 | else |
241 | return BAD_APICID; | 264 | return BAD_APICID; |
242 | } | 265 | } |
@@ -255,7 +278,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
255 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 278 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
256 | break; | 279 | break; |
257 | } | 280 | } |
258 | return per_cpu(x86_cpu_to_apicid, cpu); | 281 | return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; |
259 | } | 282 | } |
260 | 283 | ||
261 | static unsigned int x2apic_get_apic_id(unsigned long x) | 284 | static unsigned int x2apic_get_apic_id(unsigned long x) |
@@ -661,27 +684,32 @@ void uv_nmi_init(void) | |||
661 | void __init uv_system_init(void) | 684 | void __init uv_system_init(void) |
662 | { | 685 | { |
663 | union uvh_rh_gam_config_mmr_u m_n_config; | 686 | union uvh_rh_gam_config_mmr_u m_n_config; |
687 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | ||
664 | union uvh_node_id_u node_id; | 688 | union uvh_node_id_u node_id; |
665 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; | 689 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; |
666 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; | 690 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; |
667 | int gnode_extra, max_pnode = 0; | 691 | int gnode_extra, max_pnode = 0; |
668 | unsigned long mmr_base, present, paddr; | 692 | unsigned long mmr_base, present, paddr; |
669 | unsigned short pnode_mask; | 693 | unsigned short pnode_mask, pnode_io_mask; |
670 | 694 | ||
671 | map_low_mmrs(); | 695 | map_low_mmrs(); |
672 | 696 | ||
673 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); | 697 | m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); |
674 | m_val = m_n_config.s.m_skt; | 698 | m_val = m_n_config.s.m_skt; |
675 | n_val = m_n_config.s.n_skt; | 699 | n_val = m_n_config.s.n_skt; |
700 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | ||
701 | n_io = mmioh.s.n_io; | ||
676 | mmr_base = | 702 | mmr_base = |
677 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & | 703 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & |
678 | ~UV_MMR_ENABLE; | 704 | ~UV_MMR_ENABLE; |
679 | pnode_mask = (1 << n_val) - 1; | 705 | pnode_mask = (1 << n_val) - 1; |
706 | pnode_io_mask = (1 << n_io) - 1; | ||
707 | |||
680 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); | 708 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); |
681 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; | 709 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; |
682 | gnode_upper = ((unsigned long)gnode_extra << m_val); | 710 | gnode_upper = ((unsigned long)gnode_extra << m_val); |
683 | printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", | 711 | printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", |
684 | n_val, m_val, gnode_upper, gnode_extra); | 712 | n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); |
685 | 713 | ||
686 | printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); | 714 | printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); |
687 | 715 | ||
@@ -714,7 +742,7 @@ void __init uv_system_init(void) | |||
714 | for (j = 0; j < 64; j++) { | 742 | for (j = 0; j < 64; j++) { |
715 | if (!test_bit(j, &present)) | 743 | if (!test_bit(j, &present)) |
716 | continue; | 744 | continue; |
717 | pnode = (i * 64 + j); | 745 | pnode = (i * 64 + j) & pnode_mask; |
718 | uv_blade_info[blade].pnode = pnode; | 746 | uv_blade_info[blade].pnode = pnode; |
719 | uv_blade_info[blade].nr_possible_cpus = 0; | 747 | uv_blade_info[blade].nr_possible_cpus = 0; |
720 | uv_blade_info[blade].nr_online_cpus = 0; | 748 | uv_blade_info[blade].nr_online_cpus = 0; |
@@ -735,6 +763,7 @@ void __init uv_system_init(void) | |||
735 | /* | 763 | /* |
736 | * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); | 764 | * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); |
737 | */ | 765 | */ |
766 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | ||
738 | uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; | 767 | uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; |
739 | pnode = uv_apicid_to_pnode(apicid); | 768 | pnode = uv_apicid_to_pnode(apicid); |
740 | blade = boot_pnode_to_blade(pnode); | 769 | blade = boot_pnode_to_blade(pnode); |
@@ -751,7 +780,6 @@ void __init uv_system_init(void) | |||
751 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 780 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
752 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; | 781 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; |
753 | uv_cpu_hub_info(cpu)->pnode = pnode; | 782 | uv_cpu_hub_info(cpu)->pnode = pnode; |
754 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | ||
755 | uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; | 783 | uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; |
756 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 784 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
757 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | 785 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; |
@@ -775,7 +803,7 @@ void __init uv_system_init(void) | |||
775 | 803 | ||
776 | map_gru_high(max_pnode); | 804 | map_gru_high(max_pnode); |
777 | map_mmr_high(max_pnode); | 805 | map_mmr_high(max_pnode); |
778 | map_mmioh_high(max_pnode); | 806 | map_mmioh_high(max_pnode & pnode_io_mask); |
779 | 807 | ||
780 | uv_cpu_init(); | 808 | uv_cpu_init(); |
781 | uv_scir_register_cpu_notifier(); | 809 | uv_scir_register_cpu_notifier(); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b68bda30938..1d59834396bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void) | |||
894 | #else | 894 | #else |
895 | vgetcpu_set_mode(); | 895 | vgetcpu_set_mode(); |
896 | #endif | 896 | #endif |
897 | init_hw_perf_events(); | ||
898 | } | 897 | } |
899 | 898 | ||
900 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 899 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 80c482382d5c..5bf2fac52aca 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -31,8 +31,6 @@ | |||
31 | #include <asm/mce.h> | 31 | #include <asm/mce.h> |
32 | #include <asm/msr.h> | 32 | #include <asm/msr.h> |
33 | 33 | ||
34 | #define PFX "mce_threshold: " | ||
35 | #define VERSION "version 1.1.1" | ||
36 | #define NR_BANKS 6 | 34 | #define NR_BANKS 6 |
37 | #define NR_BLOCKS 9 | 35 | #define NR_BLOCKS 9 |
38 | #define THRESHOLD_MAX 0xFFF | 36 | #define THRESHOLD_MAX 0xFFF |
@@ -59,12 +57,6 @@ struct threshold_block { | |||
59 | struct list_head miscj; | 57 | struct list_head miscj; |
60 | }; | 58 | }; |
61 | 59 | ||
62 | /* defaults used early on boot */ | ||
63 | static struct threshold_block threshold_defaults = { | ||
64 | .interrupt_enable = 0, | ||
65 | .threshold_limit = THRESHOLD_MAX, | ||
66 | }; | ||
67 | |||
68 | struct threshold_bank { | 60 | struct threshold_bank { |
69 | struct kobject *kobj; | 61 | struct kobject *kobj; |
70 | struct threshold_block *blocks; | 62 | struct threshold_block *blocks; |
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void); | |||
89 | struct thresh_restart { | 81 | struct thresh_restart { |
90 | struct threshold_block *b; | 82 | struct threshold_block *b; |
91 | int reset; | 83 | int reset; |
84 | int set_lvt_off; | ||
85 | int lvt_off; | ||
92 | u16 old_limit; | 86 | u16 old_limit; |
93 | }; | 87 | }; |
94 | 88 | ||
89 | static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) | ||
90 | { | ||
91 | int msr = (hi & MASK_LVTOFF_HI) >> 20; | ||
92 | |||
93 | if (apic < 0) { | ||
94 | pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " | ||
95 | "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, | ||
96 | b->bank, b->block, b->address, hi, lo); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | if (apic != msr) { | ||
101 | pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " | ||
102 | "for bank %d, block %d (MSR%08X=0x%x%08x)\n", | ||
103 | b->cpu, apic, b->bank, b->block, b->address, hi, lo); | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | return 1; | ||
108 | }; | ||
109 | |||
95 | /* must be called with correct cpu affinity */ | 110 | /* must be called with correct cpu affinity */ |
96 | /* Called via smp_call_function_single() */ | 111 | /* Called via smp_call_function_single() */ |
97 | static void threshold_restart_bank(void *_tr) | 112 | static void threshold_restart_bank(void *_tr) |
98 | { | 113 | { |
99 | struct thresh_restart *tr = _tr; | 114 | struct thresh_restart *tr = _tr; |
100 | u32 mci_misc_hi, mci_misc_lo; | 115 | u32 hi, lo; |
101 | 116 | ||
102 | rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); | 117 | rdmsr(tr->b->address, lo, hi); |
103 | 118 | ||
104 | if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | 119 | if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) |
105 | tr->reset = 1; /* limit cannot be lower than err count */ | 120 | tr->reset = 1; /* limit cannot be lower than err count */ |
106 | 121 | ||
107 | if (tr->reset) { /* reset err count and overflow bit */ | 122 | if (tr->reset) { /* reset err count and overflow bit */ |
108 | mci_misc_hi = | 123 | hi = |
109 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | 124 | (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | |
110 | (THRESHOLD_MAX - tr->b->threshold_limit); | 125 | (THRESHOLD_MAX - tr->b->threshold_limit); |
111 | } else if (tr->old_limit) { /* change limit w/o reset */ | 126 | } else if (tr->old_limit) { /* change limit w/o reset */ |
112 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 127 | int new_count = (hi & THRESHOLD_MAX) + |
113 | (tr->old_limit - tr->b->threshold_limit); | 128 | (tr->old_limit - tr->b->threshold_limit); |
114 | 129 | ||
115 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 130 | hi = (hi & ~MASK_ERR_COUNT_HI) | |
116 | (new_count & THRESHOLD_MAX); | 131 | (new_count & THRESHOLD_MAX); |
117 | } | 132 | } |
118 | 133 | ||
134 | if (tr->set_lvt_off) { | ||
135 | if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { | ||
136 | /* set new lvt offset */ | ||
137 | hi &= ~MASK_LVTOFF_HI; | ||
138 | hi |= tr->lvt_off << 20; | ||
139 | } | ||
140 | } | ||
141 | |||
119 | tr->b->interrupt_enable ? | 142 | tr->b->interrupt_enable ? |
120 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | 143 | (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : |
121 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | 144 | (hi &= ~MASK_INT_TYPE_HI); |
122 | 145 | ||
123 | mci_misc_hi |= MASK_COUNT_EN_HI; | 146 | hi |= MASK_COUNT_EN_HI; |
124 | wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); | 147 | wrmsr(tr->b->address, lo, hi); |
148 | } | ||
149 | |||
150 | static void mce_threshold_block_init(struct threshold_block *b, int offset) | ||
151 | { | ||
152 | struct thresh_restart tr = { | ||
153 | .b = b, | ||
154 | .set_lvt_off = 1, | ||
155 | .lvt_off = offset, | ||
156 | }; | ||
157 | |||
158 | b->threshold_limit = THRESHOLD_MAX; | ||
159 | threshold_restart_bank(&tr); | ||
160 | }; | ||
161 | |||
162 | static int setup_APIC_mce(int reserved, int new) | ||
163 | { | ||
164 | if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, | ||
165 | APIC_EILVT_MSG_FIX, 0)) | ||
166 | return new; | ||
167 | |||
168 | return reserved; | ||
125 | } | 169 | } |
126 | 170 | ||
127 | /* cpu init entry point, called from mce.c with preempt off */ | 171 | /* cpu init entry point, called from mce.c with preempt off */ |
128 | void mce_amd_feature_init(struct cpuinfo_x86 *c) | 172 | void mce_amd_feature_init(struct cpuinfo_x86 *c) |
129 | { | 173 | { |
174 | struct threshold_block b; | ||
130 | unsigned int cpu = smp_processor_id(); | 175 | unsigned int cpu = smp_processor_id(); |
131 | u32 low = 0, high = 0, address = 0; | 176 | u32 low = 0, high = 0, address = 0; |
132 | unsigned int bank, block; | 177 | unsigned int bank, block; |
133 | struct thresh_restart tr; | 178 | int offset = -1; |
134 | int lvt_off = -1; | ||
135 | u8 offset; | ||
136 | 179 | ||
137 | for (bank = 0; bank < NR_BANKS; ++bank) { | 180 | for (bank = 0; bank < NR_BANKS; ++bank) { |
138 | for (block = 0; block < NR_BLOCKS; ++block) { | 181 | for (block = 0; block < NR_BLOCKS; ++block) { |
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
163 | if (shared_bank[bank] && c->cpu_core_id) | 206 | if (shared_bank[bank] && c->cpu_core_id) |
164 | break; | 207 | break; |
165 | #endif | 208 | #endif |
166 | offset = (high & MASK_LVTOFF_HI) >> 20; | 209 | offset = setup_APIC_mce(offset, |
167 | if (lvt_off < 0) { | 210 | (high & MASK_LVTOFF_HI) >> 20); |
168 | if (setup_APIC_eilvt(offset, | ||
169 | THRESHOLD_APIC_VECTOR, | ||
170 | APIC_EILVT_MSG_FIX, 0)) { | ||
171 | pr_err(FW_BUG "cpu %d, failed to " | ||
172 | "setup threshold interrupt " | ||
173 | "for bank %d, block %d " | ||
174 | "(MSR%08X=0x%x%08x)", | ||
175 | smp_processor_id(), bank, block, | ||
176 | address, high, low); | ||
177 | continue; | ||
178 | } | ||
179 | lvt_off = offset; | ||
180 | } else if (lvt_off != offset) { | ||
181 | pr_err(FW_BUG "cpu %d, invalid threshold " | ||
182 | "interrupt offset %d for bank %d," | ||
183 | "block %d (MSR%08X=0x%x%08x)", | ||
184 | smp_processor_id(), lvt_off, bank, | ||
185 | block, address, high, low); | ||
186 | continue; | ||
187 | } | ||
188 | |||
189 | high &= ~MASK_LVTOFF_HI; | ||
190 | high |= lvt_off << 20; | ||
191 | wrmsr(address, low, high); | ||
192 | 211 | ||
193 | threshold_defaults.address = address; | 212 | memset(&b, 0, sizeof(b)); |
194 | tr.b = &threshold_defaults; | 213 | b.cpu = cpu; |
195 | tr.reset = 0; | 214 | b.bank = bank; |
196 | tr.old_limit = 0; | 215 | b.block = block; |
197 | threshold_restart_bank(&tr); | 216 | b.address = address; |
198 | 217 | ||
218 | mce_threshold_block_init(&b, offset); | ||
199 | mce_threshold_vector = amd_threshold_interrupt; | 219 | mce_threshold_vector = amd_threshold_interrupt; |
200 | } | 220 | } |
201 | } | 221 | } |
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) | |||
298 | 318 | ||
299 | b->interrupt_enable = !!new; | 319 | b->interrupt_enable = !!new; |
300 | 320 | ||
321 | memset(&tr, 0, sizeof(tr)); | ||
301 | tr.b = b; | 322 | tr.b = b; |
302 | tr.reset = 0; | ||
303 | tr.old_limit = 0; | ||
304 | 323 | ||
305 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 324 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
306 | 325 | ||
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) | |||
321 | if (new < 1) | 340 | if (new < 1) |
322 | new = 1; | 341 | new = 1; |
323 | 342 | ||
343 | memset(&tr, 0, sizeof(tr)); | ||
324 | tr.old_limit = b->threshold_limit; | 344 | tr.old_limit = b->threshold_limit; |
325 | b->threshold_limit = new; | 345 | b->threshold_limit = new; |
326 | tr.b = b; | 346 | tr.b = b; |
327 | tr.reset = 0; | ||
328 | 347 | ||
329 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 348 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
330 | 349 | ||
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
603 | continue; | 622 | continue; |
604 | err = threshold_create_bank(cpu, bank); | 623 | err = threshold_create_bank(cpu, bank); |
605 | if (err) | 624 | if (err) |
606 | goto out; | 625 | return err; |
607 | } | 626 | } |
608 | out: | 627 | |
609 | return err; | 628 | return err; |
610 | } | 629 | } |
611 | 630 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 4b683267eca5..e12246ff5aa6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -53,8 +53,13 @@ struct thermal_state { | |||
53 | struct _thermal_state core_power_limit; | 53 | struct _thermal_state core_power_limit; |
54 | struct _thermal_state package_throttle; | 54 | struct _thermal_state package_throttle; |
55 | struct _thermal_state package_power_limit; | 55 | struct _thermal_state package_power_limit; |
56 | struct _thermal_state core_thresh0; | ||
57 | struct _thermal_state core_thresh1; | ||
56 | }; | 58 | }; |
57 | 59 | ||
60 | /* Callback to handle core threshold interrupts */ | ||
61 | int (*platform_thermal_notify)(__u64 msr_val); | ||
62 | |||
58 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 63 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
59 | 64 | ||
60 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 65 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
200 | return 0; | 205 | return 0; |
201 | } | 206 | } |
202 | 207 | ||
208 | static int thresh_event_valid(int event) | ||
209 | { | ||
210 | struct _thermal_state *state; | ||
211 | unsigned int this_cpu = smp_processor_id(); | ||
212 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | ||
213 | u64 now = get_jiffies_64(); | ||
214 | |||
215 | state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; | ||
216 | |||
217 | if (time_before64(now, state->next_check)) | ||
218 | return 0; | ||
219 | |||
220 | state->next_check = now + CHECK_INTERVAL; | ||
221 | return 1; | ||
222 | } | ||
223 | |||
203 | #ifdef CONFIG_SYSFS | 224 | #ifdef CONFIG_SYSFS |
204 | /* Add/Remove thermal_throttle interface for CPU device: */ | 225 | /* Add/Remove thermal_throttle interface for CPU device: */ |
205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, | 226 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, |
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device); | |||
313 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | 334 | #define PACKAGE_THROTTLED ((__u64)2 << 62) |
314 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | 335 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) |
315 | 336 | ||
337 | static void notify_thresholds(__u64 msr_val) | ||
338 | { | ||
339 | /* check whether the interrupt handler is defined; | ||
340 | * otherwise simply return | ||
341 | */ | ||
342 | if (!platform_thermal_notify) | ||
343 | return; | ||
344 | |||
345 | /* lower threshold reached */ | ||
346 | if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) | ||
347 | platform_thermal_notify(msr_val); | ||
348 | /* higher threshold reached */ | ||
349 | if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) | ||
350 | platform_thermal_notify(msr_val); | ||
351 | } | ||
352 | |||
316 | /* Thermal transition interrupt handler */ | 353 | /* Thermal transition interrupt handler */ |
317 | static void intel_thermal_interrupt(void) | 354 | static void intel_thermal_interrupt(void) |
318 | { | 355 | { |
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void) | |||
321 | 358 | ||
322 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 359 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
323 | 360 | ||
361 | /* Check for violation of core thermal thresholds*/ | ||
362 | notify_thresholds(msr_val); | ||
363 | |||
324 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, | 364 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
325 | THERMAL_THROTTLING_EVENT, | 365 | THERMAL_THROTTLING_EVENT, |
326 | CORE_LEVEL) != 0) | 366 | CORE_LEVEL) != 0) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed6310183efb..0a360d146596 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void) | |||
330 | { | 330 | { |
331 | int i; | 331 | int i; |
332 | 332 | ||
333 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
334 | disable_lapic_nmi_watchdog(); | ||
335 | |||
336 | for (i = 0; i < x86_pmu.num_counters; i++) { | 333 | for (i = 0; i < x86_pmu.num_counters; i++) { |
337 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
338 | goto perfctr_fail; | 335 | goto perfctr_fail; |
@@ -355,9 +352,6 @@ perfctr_fail: | |||
355 | for (i--; i >= 0; i--) | 352 | for (i--; i >= 0; i--) |
356 | release_perfctr_nmi(x86_pmu.perfctr + i); | 353 | release_perfctr_nmi(x86_pmu.perfctr + i); |
357 | 354 | ||
358 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
359 | enable_lapic_nmi_watchdog(); | ||
360 | |||
361 | return false; | 355 | return false; |
362 | } | 356 | } |
363 | 357 | ||
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void) | |||
369 | release_perfctr_nmi(x86_pmu.perfctr + i); | 363 | release_perfctr_nmi(x86_pmu.perfctr + i); |
370 | release_evntsel_nmi(x86_pmu.eventsel + i); | 364 | release_evntsel_nmi(x86_pmu.eventsel + i); |
371 | } | 365 | } |
372 | |||
373 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
374 | enable_lapic_nmi_watchdog(); | ||
375 | } | 366 | } |
376 | 367 | ||
377 | #else | 368 | #else |
@@ -381,6 +372,58 @@ static void release_pmc_hardware(void) {} | |||
381 | 372 | ||
382 | #endif | 373 | #endif |
383 | 374 | ||
375 | static bool check_hw_exists(void) | ||
376 | { | ||
377 | u64 val, val_new = 0; | ||
378 | int i, reg, ret = 0; | ||
379 | |||
380 | /* | ||
381 | * Check to see if the BIOS enabled any of the counters, if so | ||
382 | * complain and bail. | ||
383 | */ | ||
384 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
385 | reg = x86_pmu.eventsel + i; | ||
386 | ret = rdmsrl_safe(reg, &val); | ||
387 | if (ret) | ||
388 | goto msr_fail; | ||
389 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
390 | goto bios_fail; | ||
391 | } | ||
392 | |||
393 | if (x86_pmu.num_counters_fixed) { | ||
394 | reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
395 | ret = rdmsrl_safe(reg, &val); | ||
396 | if (ret) | ||
397 | goto msr_fail; | ||
398 | for (i = 0; i < x86_pmu.num_counters_fixed; i++) { | ||
399 | if (val & (0x03 << i*4)) | ||
400 | goto bios_fail; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * Now write a value and read it back to see if it matches, | ||
406 | * this is needed to detect certain hardware emulators (qemu/kvm) | ||
407 | * that don't trap on the MSR access and always return 0s. | ||
408 | */ | ||
409 | val = 0xabcdUL; | ||
410 | ret = checking_wrmsrl(x86_pmu.perfctr, val); | ||
411 | ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); | ||
412 | if (ret || val != val_new) | ||
413 | goto msr_fail; | ||
414 | |||
415 | return true; | ||
416 | |||
417 | bios_fail: | ||
418 | printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); | ||
419 | printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); | ||
420 | return false; | ||
421 | |||
422 | msr_fail: | ||
423 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); | ||
424 | return false; | ||
425 | } | ||
426 | |||
384 | static void reserve_ds_buffers(void); | 427 | static void reserve_ds_buffers(void); |
385 | static void release_ds_buffers(void); | 428 | static void release_ds_buffers(void); |
386 | 429 | ||
@@ -437,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
437 | struct hw_perf_event *hwc = &event->hw; | 480 | struct hw_perf_event *hwc = &event->hw; |
438 | u64 config; | 481 | u64 config; |
439 | 482 | ||
440 | if (!hwc->sample_period) { | 483 | if (!is_sampling_event(event)) { |
441 | hwc->sample_period = x86_pmu.max_period; | 484 | hwc->sample_period = x86_pmu.max_period; |
442 | hwc->last_period = hwc->sample_period; | 485 | hwc->last_period = hwc->sample_period; |
443 | local64_set(&hwc->period_left, hwc->sample_period); | 486 | local64_set(&hwc->period_left, hwc->sample_period); |
@@ -1348,7 +1391,7 @@ static void __init pmu_check_apic(void) | |||
1348 | pr_info("no hardware sampling interrupt available.\n"); | 1391 | pr_info("no hardware sampling interrupt available.\n"); |
1349 | } | 1392 | } |
1350 | 1393 | ||
1351 | void __init init_hw_perf_events(void) | 1394 | int __init init_hw_perf_events(void) |
1352 | { | 1395 | { |
1353 | struct event_constraint *c; | 1396 | struct event_constraint *c; |
1354 | int err; | 1397 | int err; |
@@ -1363,15 +1406,19 @@ void __init init_hw_perf_events(void) | |||
1363 | err = amd_pmu_init(); | 1406 | err = amd_pmu_init(); |
1364 | break; | 1407 | break; |
1365 | default: | 1408 | default: |
1366 | return; | 1409 | return 0; |
1367 | } | 1410 | } |
1368 | if (err != 0) { | 1411 | if (err != 0) { |
1369 | pr_cont("no PMU driver, software events only.\n"); | 1412 | pr_cont("no PMU driver, software events only.\n"); |
1370 | return; | 1413 | return 0; |
1371 | } | 1414 | } |
1372 | 1415 | ||
1373 | pmu_check_apic(); | 1416 | pmu_check_apic(); |
1374 | 1417 | ||
1418 | /* sanity check that the hardware exists or is emulated */ | ||
1419 | if (!check_hw_exists()) | ||
1420 | return 0; | ||
1421 | |||
1375 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1422 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1376 | 1423 | ||
1377 | if (x86_pmu.quirks) | 1424 | if (x86_pmu.quirks) |
@@ -1418,9 +1465,12 @@ void __init init_hw_perf_events(void) | |||
1418 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); | 1465 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1419 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); | 1466 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1420 | 1467 | ||
1421 | perf_pmu_register(&pmu); | 1468 | perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); |
1422 | perf_cpu_notifier(x86_pmu_notifier); | 1469 | perf_cpu_notifier(x86_pmu_notifier); |
1470 | |||
1471 | return 0; | ||
1423 | } | 1472 | } |
1473 | early_initcall(init_hw_perf_events); | ||
1424 | 1474 | ||
1425 | static inline void x86_pmu_read(struct perf_event *event) | 1475 | static inline void x86_pmu_read(struct perf_event *event) |
1426 | { | 1476 | { |
@@ -1666,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1666 | 1716 | ||
1667 | perf_callchain_store(entry, regs->ip); | 1717 | perf_callchain_store(entry, regs->ip); |
1668 | 1718 | ||
1669 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1719 | dump_trace(NULL, regs, NULL, &backtrace_ops, entry); |
1670 | } | 1720 | } |
1671 | 1721 | ||
1672 | #ifdef CONFIG_COMPAT | 1722 | #ifdef CONFIG_COMPAT |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index e421b8cd6944..67e2202a6039 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -1,7 +1,5 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_AMD | 1 | #ifdef CONFIG_CPU_SUP_AMD |
2 | 2 | ||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | ||
4 | |||
5 | static __initconst const u64 amd_hw_cache_event_ids | 3 | static __initconst const u64 amd_hw_cache_event_ids |
6 | [PERF_COUNT_HW_CACHE_MAX] | 4 | [PERF_COUNT_HW_CACHE_MAX] |
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 5 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -275,7 +273,7 @@ done: | |||
275 | return &emptyconstraint; | 273 | return &emptyconstraint; |
276 | } | 274 | } |
277 | 275 | ||
278 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | 276 | static struct amd_nb *amd_alloc_nb(int cpu) |
279 | { | 277 | { |
280 | struct amd_nb *nb; | 278 | struct amd_nb *nb; |
281 | int i; | 279 | int i; |
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
285 | if (!nb) | 283 | if (!nb) |
286 | return NULL; | 284 | return NULL; |
287 | 285 | ||
288 | nb->nb_id = nb_id; | 286 | nb->nb_id = -1; |
289 | 287 | ||
290 | /* | 288 | /* |
291 | * initialize all possible NB constraints | 289 | * initialize all possible NB constraints |
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu) | |||
306 | if (boot_cpu_data.x86_max_cores < 2) | 304 | if (boot_cpu_data.x86_max_cores < 2) |
307 | return NOTIFY_OK; | 305 | return NOTIFY_OK; |
308 | 306 | ||
309 | cpuc->amd_nb = amd_alloc_nb(cpu, -1); | 307 | cpuc->amd_nb = amd_alloc_nb(cpu); |
310 | if (!cpuc->amd_nb) | 308 | if (!cpuc->amd_nb) |
311 | return NOTIFY_BAD; | 309 | return NOTIFY_BAD; |
312 | 310 | ||
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu) | |||
325 | nb_id = amd_get_nb_id(cpu); | 323 | nb_id = amd_get_nb_id(cpu); |
326 | WARN_ON_ONCE(nb_id == BAD_APICID); | 324 | WARN_ON_ONCE(nb_id == BAD_APICID); |
327 | 325 | ||
328 | raw_spin_lock(&amd_nb_lock); | ||
329 | |||
330 | for_each_online_cpu(i) { | 326 | for_each_online_cpu(i) { |
331 | nb = per_cpu(cpu_hw_events, i).amd_nb; | 327 | nb = per_cpu(cpu_hw_events, i).amd_nb; |
332 | if (WARN_ON_ONCE(!nb)) | 328 | if (WARN_ON_ONCE(!nb)) |
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu) | |||
341 | 337 | ||
342 | cpuc->amd_nb->nb_id = nb_id; | 338 | cpuc->amd_nb->nb_id = nb_id; |
343 | cpuc->amd_nb->refcnt++; | 339 | cpuc->amd_nb->refcnt++; |
344 | |||
345 | raw_spin_unlock(&amd_nb_lock); | ||
346 | } | 340 | } |
347 | 341 | ||
348 | static void amd_pmu_cpu_dead(int cpu) | 342 | static void amd_pmu_cpu_dead(int cpu) |
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu) | |||
354 | 348 | ||
355 | cpuhw = &per_cpu(cpu_hw_events, cpu); | 349 | cpuhw = &per_cpu(cpu_hw_events, cpu); |
356 | 350 | ||
357 | raw_spin_lock(&amd_nb_lock); | ||
358 | |||
359 | if (cpuhw->amd_nb) { | 351 | if (cpuhw->amd_nb) { |
360 | struct amd_nb *nb = cpuhw->amd_nb; | 352 | struct amd_nb *nb = cpuhw->amd_nb; |
361 | 353 | ||
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu) | |||
364 | 356 | ||
365 | cpuhw->amd_nb = NULL; | 357 | cpuhw->amd_nb = NULL; |
366 | } | 358 | } |
367 | |||
368 | raw_spin_unlock(&amd_nb_lock); | ||
369 | } | 359 | } |
370 | 360 | ||
371 | static __initconst const struct x86_pmu amd_pmu = { | 361 | static __initconst const struct x86_pmu amd_pmu = { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c8f5c088cad1..24e390e40f2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
816 | if (ret) | 816 | if (ret) |
817 | return ret; | 817 | return ret; |
818 | 818 | ||
819 | if (event->attr.precise_ip && | ||
820 | (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
821 | /* | ||
822 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | ||
823 | * (0x003c) so that we can use it with PEBS. | ||
824 | * | ||
825 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't | ||
826 | * PEBS capable. However we can use INST_RETIRED.ANY_P | ||
827 | * (0x00c0), which is a PEBS capable event, to get the same | ||
828 | * count. | ||
829 | * | ||
830 | * INST_RETIRED.ANY_P counts the number of cycles that retires | ||
831 | * CNTMASK instructions. By setting CNTMASK to a value (16) | ||
832 | * larger than the maximum number of instructions that can be | ||
833 | * retired per cycle (4) and then inverting the condition, we | ||
834 | * count all cycles that retire 16 or less instructions, which | ||
835 | * is every cycle. | ||
836 | * | ||
837 | * Thereby we gain a PEBS capable cycle counter. | ||
838 | */ | ||
839 | u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ | ||
840 | |||
841 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | ||
842 | event->hw.config = alt_config; | ||
843 | } | ||
844 | |||
819 | if (event->attr.type != PERF_TYPE_RAW) | 845 | if (event->attr.type != PERF_TYPE_RAW) |
820 | return 0; | 846 | return 0; |
821 | 847 | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d9f4ff8fcd69..d5a236615501 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -16,32 +16,12 @@ | |||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/nmi.h> | 19 | #include <asm/nmi.h> |
20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
21 | 21 | ||
22 | #include <asm/apic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/perf_event.h> | 23 | #include <asm/perf_event.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | ||
26 | unsigned int cccr_msr; | ||
27 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
28 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
29 | }; | ||
30 | |||
31 | /* Interface defining a CPU specific perfctr watchdog */ | ||
32 | struct wd_ops { | ||
33 | int (*reserve)(void); | ||
34 | void (*unreserve)(void); | ||
35 | int (*setup)(unsigned nmi_hz); | ||
36 | void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); | ||
37 | void (*stop)(void); | ||
38 | unsigned perfctr; | ||
39 | unsigned evntsel; | ||
40 | u64 checkbit; | ||
41 | }; | ||
42 | |||
43 | static const struct wd_ops *wd_ops; | ||
44 | |||
45 | /* | 25 | /* |
46 | * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 26 | * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
47 | * offset from MSR_P4_BSU_ESCR0. | 27 | * offset from MSR_P4_BSU_ESCR0. |
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops; | |||
60 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); | 40 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); |
61 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); | 41 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); |
62 | 42 | ||
63 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
64 | |||
65 | /* converts an msr to an appropriate reservation bit */ | 43 | /* converts an msr to an appropriate reservation bit */ |
66 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | 44 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) |
67 | { | 45 | { |
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr) | |||
172 | clear_bit(counter, evntsel_nmi_owner); | 150 | clear_bit(counter, evntsel_nmi_owner); |
173 | } | 151 | } |
174 | EXPORT_SYMBOL(release_evntsel_nmi); | 152 | EXPORT_SYMBOL(release_evntsel_nmi); |
175 | |||
176 | void disable_lapic_nmi_watchdog(void) | ||
177 | { | ||
178 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
179 | |||
180 | if (atomic_read(&nmi_active) <= 0) | ||
181 | return; | ||
182 | |||
183 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
184 | |||
185 | if (wd_ops) | ||
186 | wd_ops->unreserve(); | ||
187 | |||
188 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
189 | } | ||
190 | |||
191 | void enable_lapic_nmi_watchdog(void) | ||
192 | { | ||
193 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
194 | |||
195 | /* are we already enabled */ | ||
196 | if (atomic_read(&nmi_active) != 0) | ||
197 | return; | ||
198 | |||
199 | /* are we lapic aware */ | ||
200 | if (!wd_ops) | ||
201 | return; | ||
202 | if (!wd_ops->reserve()) { | ||
203 | printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); | ||
204 | return; | ||
205 | } | ||
206 | |||
207 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); | ||
208 | touch_nmi_watchdog(); | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Activate the NMI watchdog via the local APIC. | ||
213 | */ | ||
214 | |||
215 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
216 | { | ||
217 | u64 counter_val; | ||
218 | unsigned int retval = hz; | ||
219 | |||
220 | /* | ||
221 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
222 | * are writable, with higher bits sign extending from bit 31. | ||
223 | * So, we can only program the counter with 31 bit values and | ||
224 | * 32nd bit should be 1, for 33.. to be 1. | ||
225 | * Find the appropriate nmi_hz | ||
226 | */ | ||
227 | counter_val = (u64)cpu_khz * 1000; | ||
228 | do_div(counter_val, retval); | ||
229 | if (counter_val > 0x7fffffffULL) { | ||
230 | u64 count = (u64)cpu_khz * 1000; | ||
231 | do_div(count, 0x7fffffffUL); | ||
232 | retval = count + 1; | ||
233 | } | ||
234 | return retval; | ||
235 | } | ||
236 | |||
237 | static void write_watchdog_counter(unsigned int perfctr_msr, | ||
238 | const char *descr, unsigned nmi_hz) | ||
239 | { | ||
240 | u64 count = (u64)cpu_khz * 1000; | ||
241 | |||
242 | do_div(count, nmi_hz); | ||
243 | if (descr) | ||
244 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | ||
245 | wrmsrl(perfctr_msr, 0 - count); | ||
246 | } | ||
247 | |||
248 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
249 | const char *descr, unsigned nmi_hz) | ||
250 | { | ||
251 | u64 count = (u64)cpu_khz * 1000; | ||
252 | |||
253 | do_div(count, nmi_hz); | ||
254 | if (descr) | ||
255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | ||
256 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * AMD K7/K8/Family10h/Family11h support. | ||
261 | * AMD keeps this interface nicely stable so there is not much variety | ||
262 | */ | ||
263 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
264 | #define K7_EVNTSEL_INT (1 << 20) | ||
265 | #define K7_EVNTSEL_OS (1 << 17) | ||
266 | #define K7_EVNTSEL_USR (1 << 16) | ||
267 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
268 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
269 | |||
270 | static int setup_k7_watchdog(unsigned nmi_hz) | ||
271 | { | ||
272 | unsigned int perfctr_msr, evntsel_msr; | ||
273 | unsigned int evntsel; | ||
274 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
275 | |||
276 | perfctr_msr = wd_ops->perfctr; | ||
277 | evntsel_msr = wd_ops->evntsel; | ||
278 | |||
279 | wrmsrl(perfctr_msr, 0UL); | ||
280 | |||
281 | evntsel = K7_EVNTSEL_INT | ||
282 | | K7_EVNTSEL_OS | ||
283 | | K7_EVNTSEL_USR | ||
284 | | K7_NMI_EVENT; | ||
285 | |||
286 | /* setup the timer */ | ||
287 | wrmsr(evntsel_msr, evntsel, 0); | ||
288 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); | ||
289 | |||
290 | /* initialize the wd struct before enabling */ | ||
291 | wd->perfctr_msr = perfctr_msr; | ||
292 | wd->evntsel_msr = evntsel_msr; | ||
293 | wd->cccr_msr = 0; /* unused */ | ||
294 | |||
295 | /* ok, everything is initialized, announce that we're set */ | ||
296 | cpu_nmi_set_wd_enabled(); | ||
297 | |||
298 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
299 | evntsel |= K7_EVNTSEL_ENABLE; | ||
300 | wrmsr(evntsel_msr, evntsel, 0); | ||
301 | |||
302 | return 1; | ||
303 | } | ||
304 | |||
305 | static void single_msr_stop_watchdog(void) | ||
306 | { | ||
307 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
308 | |||
309 | wrmsr(wd->evntsel_msr, 0, 0); | ||
310 | } | ||
311 | |||
312 | static int single_msr_reserve(void) | ||
313 | { | ||
314 | if (!reserve_perfctr_nmi(wd_ops->perfctr)) | ||
315 | return 0; | ||
316 | |||
317 | if (!reserve_evntsel_nmi(wd_ops->evntsel)) { | ||
318 | release_perfctr_nmi(wd_ops->perfctr); | ||
319 | return 0; | ||
320 | } | ||
321 | return 1; | ||
322 | } | ||
323 | |||
324 | static void single_msr_unreserve(void) | ||
325 | { | ||
326 | release_evntsel_nmi(wd_ops->evntsel); | ||
327 | release_perfctr_nmi(wd_ops->perfctr); | ||
328 | } | ||
329 | |||
330 | static void __kprobes | ||
331 | single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
332 | { | ||
333 | /* start the cycle over again */ | ||
334 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
335 | } | ||
336 | |||
337 | static const struct wd_ops k7_wd_ops = { | ||
338 | .reserve = single_msr_reserve, | ||
339 | .unreserve = single_msr_unreserve, | ||
340 | .setup = setup_k7_watchdog, | ||
341 | .rearm = single_msr_rearm, | ||
342 | .stop = single_msr_stop_watchdog, | ||
343 | .perfctr = MSR_K7_PERFCTR0, | ||
344 | .evntsel = MSR_K7_EVNTSEL0, | ||
345 | .checkbit = 1ULL << 47, | ||
346 | }; | ||
347 | |||
348 | /* | ||
349 | * Intel Model 6 (PPro+,P2,P3,P-M,Core1) | ||
350 | */ | ||
351 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
352 | #define P6_EVNTSEL_INT (1 << 20) | ||
353 | #define P6_EVNTSEL_OS (1 << 17) | ||
354 | #define P6_EVNTSEL_USR (1 << 16) | ||
355 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
356 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
357 | |||
358 | static int setup_p6_watchdog(unsigned nmi_hz) | ||
359 | { | ||
360 | unsigned int perfctr_msr, evntsel_msr; | ||
361 | unsigned int evntsel; | ||
362 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
363 | |||
364 | perfctr_msr = wd_ops->perfctr; | ||
365 | evntsel_msr = wd_ops->evntsel; | ||
366 | |||
367 | /* KVM doesn't implement this MSR */ | ||
368 | if (wrmsr_safe(perfctr_msr, 0, 0) < 0) | ||
369 | return 0; | ||
370 | |||
371 | evntsel = P6_EVNTSEL_INT | ||
372 | | P6_EVNTSEL_OS | ||
373 | | P6_EVNTSEL_USR | ||
374 | | P6_NMI_EVENT; | ||
375 | |||
376 | /* setup the timer */ | ||
377 | wrmsr(evntsel_msr, evntsel, 0); | ||
378 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
379 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); | ||
380 | |||
381 | /* initialize the wd struct before enabling */ | ||
382 | wd->perfctr_msr = perfctr_msr; | ||
383 | wd->evntsel_msr = evntsel_msr; | ||
384 | wd->cccr_msr = 0; /* unused */ | ||
385 | |||
386 | /* ok, everything is initialized, announce that we're set */ | ||
387 | cpu_nmi_set_wd_enabled(); | ||
388 | |||
389 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
390 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
391 | wrmsr(evntsel_msr, evntsel, 0); | ||
392 | |||
393 | return 1; | ||
394 | } | ||
395 | |||
396 | static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
397 | { | ||
398 | /* | ||
399 | * P6 based Pentium M need to re-unmask | ||
400 | * the apic vector but it doesn't hurt | ||
401 | * other P6 variant. | ||
402 | * ArchPerfom/Core Duo also needs this | ||
403 | */ | ||
404 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
405 | |||
406 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
407 | write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); | ||
408 | } | ||
409 | |||
410 | static const struct wd_ops p6_wd_ops = { | ||
411 | .reserve = single_msr_reserve, | ||
412 | .unreserve = single_msr_unreserve, | ||
413 | .setup = setup_p6_watchdog, | ||
414 | .rearm = p6_rearm, | ||
415 | .stop = single_msr_stop_watchdog, | ||
416 | .perfctr = MSR_P6_PERFCTR0, | ||
417 | .evntsel = MSR_P6_EVNTSEL0, | ||
418 | .checkbit = 1ULL << 39, | ||
419 | }; | ||
420 | |||
421 | /* | ||
422 | * Intel P4 performance counters. | ||
423 | * By far the most complicated of all. | ||
424 | */ | ||
425 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) | ||
426 | #define P4_ESCR_EVENT_SELECT(N) ((N) << 25) | ||
427 | #define P4_ESCR_OS (1 << 3) | ||
428 | #define P4_ESCR_USR (1 << 2) | ||
429 | #define P4_CCCR_OVF_PMI0 (1 << 26) | ||
430 | #define P4_CCCR_OVF_PMI1 (1 << 27) | ||
431 | #define P4_CCCR_THRESHOLD(N) ((N) << 20) | ||
432 | #define P4_CCCR_COMPLEMENT (1 << 19) | ||
433 | #define P4_CCCR_COMPARE (1 << 18) | ||
434 | #define P4_CCCR_REQUIRED (3 << 16) | ||
435 | #define P4_CCCR_ESCR_SELECT(N) ((N) << 13) | ||
436 | #define P4_CCCR_ENABLE (1 << 12) | ||
437 | #define P4_CCCR_OVF (1 << 31) | ||
438 | |||
439 | #define P4_CONTROLS 18 | ||
440 | static unsigned int p4_controls[18] = { | ||
441 | MSR_P4_BPU_CCCR0, | ||
442 | MSR_P4_BPU_CCCR1, | ||
443 | MSR_P4_BPU_CCCR2, | ||
444 | MSR_P4_BPU_CCCR3, | ||
445 | MSR_P4_MS_CCCR0, | ||
446 | MSR_P4_MS_CCCR1, | ||
447 | MSR_P4_MS_CCCR2, | ||
448 | MSR_P4_MS_CCCR3, | ||
449 | MSR_P4_FLAME_CCCR0, | ||
450 | MSR_P4_FLAME_CCCR1, | ||
451 | MSR_P4_FLAME_CCCR2, | ||
452 | MSR_P4_FLAME_CCCR3, | ||
453 | MSR_P4_IQ_CCCR0, | ||
454 | MSR_P4_IQ_CCCR1, | ||
455 | MSR_P4_IQ_CCCR2, | ||
456 | MSR_P4_IQ_CCCR3, | ||
457 | MSR_P4_IQ_CCCR4, | ||
458 | MSR_P4_IQ_CCCR5, | ||
459 | }; | ||
460 | /* | ||
461 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
462 | * CRU_ESCR0 (with any non-null event selector) through a complemented | ||
463 | * max threshold. [IA32-Vol3, Section 14.9.9] | ||
464 | */ | ||
465 | static int setup_p4_watchdog(unsigned nmi_hz) | ||
466 | { | ||
467 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
468 | unsigned int evntsel, cccr_val; | ||
469 | unsigned int misc_enable, dummy; | ||
470 | unsigned int ht_num; | ||
471 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
472 | |||
473 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
474 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
475 | return 0; | ||
476 | |||
477 | #ifdef CONFIG_SMP | ||
478 | /* detect which hyperthread we are on */ | ||
479 | if (smp_num_siblings == 2) { | ||
480 | unsigned int ebx, apicid; | ||
481 | |||
482 | ebx = cpuid_ebx(1); | ||
483 | apicid = (ebx >> 24) & 0xff; | ||
484 | ht_num = apicid & 1; | ||
485 | } else | ||
486 | #endif | ||
487 | ht_num = 0; | ||
488 | |||
489 | /* | ||
490 | * performance counters are shared resources | ||
491 | * assign each hyperthread its own set | ||
492 | * (re-use the ESCR0 register, seems safe | ||
493 | * and keeps the cccr_val the same) | ||
494 | */ | ||
495 | if (!ht_num) { | ||
496 | /* logical cpu 0 */ | ||
497 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
498 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
499 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
500 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
501 | |||
502 | /* | ||
503 | * If we're on the kdump kernel or other situation, we may | ||
504 | * still have other performance counter registers set to | ||
505 | * interrupt and they'll keep interrupting forever because | ||
506 | * of the P4_CCCR_OVF quirk. So we need to ACK all the | ||
507 | * pending interrupts and disable all the registers here, | ||
508 | * before reenabling the NMI delivery. Refer to p4_rearm() | ||
509 | * about the P4_CCCR_OVF quirk. | ||
510 | */ | ||
511 | if (reset_devices) { | ||
512 | unsigned int low, high; | ||
513 | int i; | ||
514 | |||
515 | for (i = 0; i < P4_CONTROLS; i++) { | ||
516 | rdmsr(p4_controls[i], low, high); | ||
517 | low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); | ||
518 | wrmsr(p4_controls[i], low, high); | ||
519 | } | ||
520 | } | ||
521 | } else { | ||
522 | /* logical cpu 1 */ | ||
523 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
524 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
525 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
526 | |||
527 | /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ | ||
528 | if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) | ||
529 | cccr_val = P4_CCCR_OVF_PMI0; | ||
530 | else | ||
531 | cccr_val = P4_CCCR_OVF_PMI1; | ||
532 | cccr_val |= P4_CCCR_ESCR_SELECT(4); | ||
533 | } | ||
534 | |||
535 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
536 | | P4_ESCR_OS | ||
537 | | P4_ESCR_USR; | ||
538 | |||
539 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
540 | | P4_CCCR_COMPLEMENT | ||
541 | | P4_CCCR_COMPARE | ||
542 | | P4_CCCR_REQUIRED; | ||
543 | |||
544 | wrmsr(evntsel_msr, evntsel, 0); | ||
545 | wrmsr(cccr_msr, cccr_val, 0); | ||
546 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | ||
547 | |||
548 | wd->perfctr_msr = perfctr_msr; | ||
549 | wd->evntsel_msr = evntsel_msr; | ||
550 | wd->cccr_msr = cccr_msr; | ||
551 | |||
552 | /* ok, everything is initialized, announce that we're set */ | ||
553 | cpu_nmi_set_wd_enabled(); | ||
554 | |||
555 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
556 | cccr_val |= P4_CCCR_ENABLE; | ||
557 | wrmsr(cccr_msr, cccr_val, 0); | ||
558 | return 1; | ||
559 | } | ||
560 | |||
561 | static void stop_p4_watchdog(void) | ||
562 | { | ||
563 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
564 | wrmsr(wd->cccr_msr, 0, 0); | ||
565 | wrmsr(wd->evntsel_msr, 0, 0); | ||
566 | } | ||
567 | |||
568 | static int p4_reserve(void) | ||
569 | { | ||
570 | if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) | ||
571 | return 0; | ||
572 | #ifdef CONFIG_SMP | ||
573 | if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) | ||
574 | goto fail1; | ||
575 | #endif | ||
576 | if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | ||
577 | goto fail2; | ||
578 | /* RED-PEN why is ESCR1 not reserved here? */ | ||
579 | return 1; | ||
580 | fail2: | ||
581 | #ifdef CONFIG_SMP | ||
582 | if (smp_num_siblings > 1) | ||
583 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
584 | fail1: | ||
585 | #endif | ||
586 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | static void p4_unreserve(void) | ||
591 | { | ||
592 | #ifdef CONFIG_SMP | ||
593 | if (smp_num_siblings > 1) | ||
594 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
595 | #endif | ||
596 | release_evntsel_nmi(MSR_P4_CRU_ESCR0); | ||
597 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
598 | } | ||
599 | |||
600 | static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
601 | { | ||
602 | unsigned dummy; | ||
603 | /* | ||
604 | * P4 quirks: | ||
605 | * - An overflown perfctr will assert its interrupt | ||
606 | * until the OVF flag in its CCCR is cleared. | ||
607 | * - LVTPC is masked on interrupt and must be | ||
608 | * unmasked by the LVTPC handler. | ||
609 | */ | ||
610 | rdmsrl(wd->cccr_msr, dummy); | ||
611 | dummy &= ~P4_CCCR_OVF; | ||
612 | wrmsrl(wd->cccr_msr, dummy); | ||
613 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
614 | /* start the cycle over again */ | ||
615 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
616 | } | ||
617 | |||
618 | static const struct wd_ops p4_wd_ops = { | ||
619 | .reserve = p4_reserve, | ||
620 | .unreserve = p4_unreserve, | ||
621 | .setup = setup_p4_watchdog, | ||
622 | .rearm = p4_rearm, | ||
623 | .stop = stop_p4_watchdog, | ||
624 | /* RED-PEN this is wrong for the other sibling */ | ||
625 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
626 | .evntsel = MSR_P4_BSU_ESCR0, | ||
627 | .checkbit = 1ULL << 39, | ||
628 | }; | ||
629 | |||
630 | /* | ||
631 | * Watchdog using the Intel architected PerfMon. | ||
632 | * Used for Core2 and hopefully all future Intel CPUs. | ||
633 | */ | ||
634 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
635 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
636 | |||
637 | static struct wd_ops intel_arch_wd_ops; | ||
638 | |||
639 | static int setup_intel_arch_watchdog(unsigned nmi_hz) | ||
640 | { | ||
641 | unsigned int ebx; | ||
642 | union cpuid10_eax eax; | ||
643 | unsigned int unused; | ||
644 | unsigned int perfctr_msr, evntsel_msr; | ||
645 | unsigned int evntsel; | ||
646 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
647 | |||
648 | /* | ||
649 | * Check whether the Architectural PerfMon supports | ||
650 | * Unhalted Core Cycles Event or not. | ||
651 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
652 | */ | ||
653 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
654 | if ((eax.split.mask_length < | ||
655 | (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
656 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
657 | return 0; | ||
658 | |||
659 | perfctr_msr = wd_ops->perfctr; | ||
660 | evntsel_msr = wd_ops->evntsel; | ||
661 | |||
662 | wrmsrl(perfctr_msr, 0UL); | ||
663 | |||
664 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
665 | | ARCH_PERFMON_EVENTSEL_OS | ||
666 | | ARCH_PERFMON_EVENTSEL_USR | ||
667 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
668 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
669 | |||
670 | /* setup the timer */ | ||
671 | wrmsr(evntsel_msr, evntsel, 0); | ||
672 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
673 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | ||
674 | |||
675 | wd->perfctr_msr = perfctr_msr; | ||
676 | wd->evntsel_msr = evntsel_msr; | ||
677 | wd->cccr_msr = 0; /* unused */ | ||
678 | |||
679 | /* ok, everything is initialized, announce that we're set */ | ||
680 | cpu_nmi_set_wd_enabled(); | ||
681 | |||
682 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
683 | evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
684 | wrmsr(evntsel_msr, evntsel, 0); | ||
685 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | ||
686 | return 1; | ||
687 | } | ||
688 | |||
689 | static struct wd_ops intel_arch_wd_ops __read_mostly = { | ||
690 | .reserve = single_msr_reserve, | ||
691 | .unreserve = single_msr_unreserve, | ||
692 | .setup = setup_intel_arch_watchdog, | ||
693 | .rearm = p6_rearm, | ||
694 | .stop = single_msr_stop_watchdog, | ||
695 | .perfctr = MSR_ARCH_PERFMON_PERFCTR1, | ||
696 | .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, | ||
697 | }; | ||
698 | |||
699 | static void probe_nmi_watchdog(void) | ||
700 | { | ||
701 | switch (boot_cpu_data.x86_vendor) { | ||
702 | case X86_VENDOR_AMD: | ||
703 | if (boot_cpu_data.x86 == 6 || | ||
704 | (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) | ||
705 | wd_ops = &k7_wd_ops; | ||
706 | return; | ||
707 | case X86_VENDOR_INTEL: | ||
708 | /* Work around where perfctr1 doesn't have a working enable | ||
709 | * bit as described in the following errata: | ||
710 | * AE49 Core Duo and Intel Core Solo 65 nm | ||
711 | * AN49 Intel Pentium Dual-Core | ||
712 | * AF49 Dual-Core Intel Xeon Processor LV | ||
713 | */ | ||
714 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || | ||
715 | ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && | ||
716 | boot_cpu_data.x86_mask == 4))) { | ||
717 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; | ||
718 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; | ||
719 | } | ||
720 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
721 | wd_ops = &intel_arch_wd_ops; | ||
722 | break; | ||
723 | } | ||
724 | switch (boot_cpu_data.x86) { | ||
725 | case 6: | ||
726 | if (boot_cpu_data.x86_model > 13) | ||
727 | return; | ||
728 | |||
729 | wd_ops = &p6_wd_ops; | ||
730 | break; | ||
731 | case 15: | ||
732 | wd_ops = &p4_wd_ops; | ||
733 | break; | ||
734 | default: | ||
735 | return; | ||
736 | } | ||
737 | break; | ||
738 | } | ||
739 | } | ||
740 | |||
741 | /* Interface to nmi.c */ | ||
742 | |||
743 | int lapic_watchdog_init(unsigned nmi_hz) | ||
744 | { | ||
745 | if (!wd_ops) { | ||
746 | probe_nmi_watchdog(); | ||
747 | if (!wd_ops) { | ||
748 | printk(KERN_INFO "NMI watchdog: CPU not supported\n"); | ||
749 | return -1; | ||
750 | } | ||
751 | |||
752 | if (!wd_ops->reserve()) { | ||
753 | printk(KERN_ERR | ||
754 | "NMI watchdog: cannot reserve perfctrs\n"); | ||
755 | return -1; | ||
756 | } | ||
757 | } | ||
758 | |||
759 | if (!(wd_ops->setup(nmi_hz))) { | ||
760 | printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", | ||
761 | raw_smp_processor_id()); | ||
762 | return -1; | ||
763 | } | ||
764 | |||
765 | return 0; | ||
766 | } | ||
767 | |||
768 | void lapic_watchdog_stop(void) | ||
769 | { | ||
770 | if (wd_ops) | ||
771 | wd_ops->stop(); | ||
772 | } | ||
773 | |||
774 | unsigned lapic_adjust_nmi_hz(unsigned hz) | ||
775 | { | ||
776 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
777 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
778 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) | ||
779 | hz = adjust_for_32bit_ctr(hz); | ||
780 | return hz; | ||
781 | } | ||
782 | |||
783 | int __kprobes lapic_wd_event(unsigned nmi_hz) | ||
784 | { | ||
785 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
786 | u64 ctr; | ||
787 | |||
788 | rdmsrl(wd->perfctr_msr, ctr); | ||
789 | if (ctr & wd_ops->checkbit) /* perfctr still running? */ | ||
790 | return 0; | ||
791 | |||
792 | wd_ops->rearm(wd, nmi_hz); | ||
793 | return 1; | ||
794 | } | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 1b7b31ab7d86..212a6a42527c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/init.h> | 33 | #include <linux/init.h> |
34 | #include <linux/poll.h> | 34 | #include <linux/poll.h> |
35 | #include <linux/smp.h> | 35 | #include <linux/smp.h> |
36 | #include <linux/smp_lock.h> | ||
37 | #include <linux/major.h> | 36 | #include <linux/major.h> |
38 | #include <linux/fs.h> | 37 | #include <linux/fs.h> |
39 | #include <linux/device.h> | 38 | #include <linux/device.h> |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6e8752c1bd52..8474c998cbd4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { | |||
175 | 175 | ||
176 | void | 176 | void |
177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
178 | unsigned long *stack, unsigned long bp, char *log_lvl) | 178 | unsigned long *stack, char *log_lvl) |
179 | { | 179 | { |
180 | printk("%sCall Trace:\n", log_lvl); | 180 | printk("%sCall Trace:\n", log_lvl); |
181 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 181 | dump_trace(task, regs, stack, &print_trace_ops, log_lvl); |
182 | } | 182 | } |
183 | 183 | ||
184 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 184 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
185 | unsigned long *stack, unsigned long bp) | 185 | unsigned long *stack) |
186 | { | 186 | { |
187 | show_trace_log_lvl(task, regs, stack, bp, ""); | 187 | show_trace_log_lvl(task, regs, stack, ""); |
188 | } | 188 | } |
189 | 189 | ||
190 | void show_stack(struct task_struct *task, unsigned long *sp) | 190 | void show_stack(struct task_struct *task, unsigned long *sp) |
191 | { | 191 | { |
192 | show_stack_log_lvl(task, NULL, sp, 0, ""); | 192 | show_stack_log_lvl(task, NULL, sp, ""); |
193 | } | 193 | } |
194 | 194 | ||
195 | /* | 195 | /* |
@@ -210,7 +210,7 @@ void dump_stack(void) | |||
210 | init_utsname()->release, | 210 | init_utsname()->release, |
211 | (int)strcspn(init_utsname()->version, " "), | 211 | (int)strcspn(init_utsname()->version, " "), |
212 | init_utsname()->version); | 212 | init_utsname()->version); |
213 | show_trace(NULL, NULL, &stack, bp); | 213 | show_trace(NULL, NULL, &stack); |
214 | } | 214 | } |
215 | EXPORT_SYMBOL(dump_stack); | 215 | EXPORT_SYMBOL(dump_stack); |
216 | 216 | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1bc7f75a5bda..74cc1eda384b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,11 +17,12 @@ | |||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | 19 | ||
20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 20 | void dump_trace(struct task_struct *task, |
21 | unsigned long *stack, unsigned long bp, | 21 | struct pt_regs *regs, unsigned long *stack, |
22 | const struct stacktrace_ops *ops, void *data) | 22 | const struct stacktrace_ops *ops, void *data) |
23 | { | 23 | { |
24 | int graph = 0; | 24 | int graph = 0; |
25 | unsigned long bp; | ||
25 | 26 | ||
26 | if (!task) | 27 | if (!task) |
27 | task = current; | 28 | task = current; |
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
34 | stack = (unsigned long *)task->thread.sp; | 35 | stack = (unsigned long *)task->thread.sp; |
35 | } | 36 | } |
36 | 37 | ||
37 | #ifdef CONFIG_FRAME_POINTER | 38 | bp = stack_frame(task, regs); |
38 | if (!bp) { | ||
39 | if (task == current) { | ||
40 | /* Grab bp right from our regs */ | ||
41 | get_bp(bp); | ||
42 | } else { | ||
43 | /* bp is the last reg pushed by switch_to */ | ||
44 | bp = *(unsigned long *) task->thread.sp; | ||
45 | } | ||
46 | } | ||
47 | #endif | ||
48 | |||
49 | for (;;) { | 39 | for (;;) { |
50 | struct thread_info *context; | 40 | struct thread_info *context; |
51 | 41 | ||
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace); | |||
65 | 55 | ||
66 | void | 56 | void |
67 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 57 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
68 | unsigned long *sp, unsigned long bp, char *log_lvl) | 58 | unsigned long *sp, char *log_lvl) |
69 | { | 59 | { |
70 | unsigned long *stack; | 60 | unsigned long *stack; |
71 | int i; | 61 | int i; |
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
87 | touch_nmi_watchdog(); | 77 | touch_nmi_watchdog(); |
88 | } | 78 | } |
89 | printk(KERN_CONT "\n"); | 79 | printk(KERN_CONT "\n"); |
90 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 80 | show_trace_log_lvl(task, regs, sp, log_lvl); |
91 | } | 81 | } |
92 | 82 | ||
93 | 83 | ||
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs) | |||
112 | u8 *ip; | 102 | u8 *ip; |
113 | 103 | ||
114 | printk(KERN_EMERG "Stack:\n"); | 104 | printk(KERN_EMERG "Stack:\n"); |
115 | show_stack_log_lvl(NULL, regs, ®s->sp, | 105 | show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); |
116 | 0, KERN_EMERG); | ||
117 | 106 | ||
118 | printk(KERN_EMERG "Code: "); | 107 | printk(KERN_EMERG "Code: "); |
119 | 108 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6a340485249a..64101335de19 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | |||
139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
140 | */ | 140 | */ |
141 | 141 | ||
142 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 142 | void dump_trace(struct task_struct *task, |
143 | unsigned long *stack, unsigned long bp, | 143 | struct pt_regs *regs, unsigned long *stack, |
144 | const struct stacktrace_ops *ops, void *data) | 144 | const struct stacktrace_ops *ops, void *data) |
145 | { | 145 | { |
146 | const unsigned cpu = get_cpu(); | 146 | const unsigned cpu = get_cpu(); |
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
149 | unsigned used = 0; | 149 | unsigned used = 0; |
150 | struct thread_info *tinfo; | 150 | struct thread_info *tinfo; |
151 | int graph = 0; | 151 | int graph = 0; |
152 | unsigned long bp; | ||
152 | 153 | ||
153 | if (!task) | 154 | if (!task) |
154 | task = current; | 155 | task = current; |
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
160 | stack = (unsigned long *)task->thread.sp; | 161 | stack = (unsigned long *)task->thread.sp; |
161 | } | 162 | } |
162 | 163 | ||
163 | #ifdef CONFIG_FRAME_POINTER | 164 | bp = stack_frame(task, regs); |
164 | if (!bp) { | ||
165 | if (task == current) { | ||
166 | /* Grab bp right from our regs */ | ||
167 | get_bp(bp); | ||
168 | } else { | ||
169 | /* bp is the last reg pushed by switch_to */ | ||
170 | bp = *(unsigned long *) task->thread.sp; | ||
171 | } | ||
172 | } | ||
173 | #endif | ||
174 | |||
175 | /* | 165 | /* |
176 | * Print function call entries in all stacks, starting at the | 166 | * Print function call entries in all stacks, starting at the |
177 | * current stack address. If the stacks consist of nested | 167 | * current stack address. If the stacks consist of nested |
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); | |||
235 | 225 | ||
236 | void | 226 | void |
237 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 227 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
238 | unsigned long *sp, unsigned long bp, char *log_lvl) | 228 | unsigned long *sp, char *log_lvl) |
239 | { | 229 | { |
240 | unsigned long *irq_stack_end; | 230 | unsigned long *irq_stack_end; |
241 | unsigned long *irq_stack; | 231 | unsigned long *irq_stack; |
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
279 | preempt_enable(); | 269 | preempt_enable(); |
280 | 270 | ||
281 | printk(KERN_CONT "\n"); | 271 | printk(KERN_CONT "\n"); |
282 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 272 | show_trace_log_lvl(task, regs, sp, log_lvl); |
283 | } | 273 | } |
284 | 274 | ||
285 | void show_registers(struct pt_regs *regs) | 275 | void show_registers(struct pt_regs *regs) |
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs) | |||
308 | 298 | ||
309 | printk(KERN_EMERG "Stack:\n"); | 299 | printk(KERN_EMERG "Stack:\n"); |
310 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 300 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
311 | regs->bp, KERN_EMERG); | 301 | KERN_EMERG); |
312 | 302 | ||
313 | printk(KERN_EMERG "Code: "); | 303 | printk(KERN_EMERG "Code: "); |
314 | 304 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 59e175e89599..591e60104278 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -395,7 +395,7 @@ sysenter_past_esp: | |||
395 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 395 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
396 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 396 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
397 | */ | 397 | */ |
398 | pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) | 398 | pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) |
399 | CFI_REL_OFFSET eip, 0 | 399 | CFI_REL_OFFSET eip, 0 |
400 | 400 | ||
401 | pushl_cfi %eax | 401 | pushl_cfi %eax |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fe2690d71c0c..e3ba417e8697 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64) | |||
295 | .endm | 295 | .endm |
296 | 296 | ||
297 | /* save partial stack frame */ | 297 | /* save partial stack frame */ |
298 | .pushsection .kprobes.text, "ax" | ||
298 | ENTRY(save_args) | 299 | ENTRY(save_args) |
299 | XCPT_FRAME | 300 | XCPT_FRAME |
300 | cld | 301 | cld |
@@ -334,6 +335,7 @@ ENTRY(save_args) | |||
334 | ret | 335 | ret |
335 | CFI_ENDPROC | 336 | CFI_ENDPROC |
336 | END(save_args) | 337 | END(save_args) |
338 | .popsection | ||
337 | 339 | ||
338 | ENTRY(save_rest) | 340 | ENTRY(save_rest) |
339 | PARTIAL_FRAME 1 REST_SKIP+8 | 341 | PARTIAL_FRAME 1 REST_SKIP+8 |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2d..298448656b60 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/list.h> | 21 | #include <linux/list.h> |
22 | #include <linux/module.h> | ||
22 | 23 | ||
23 | #include <trace/syscall.h> | 24 | #include <trace/syscall.h> |
24 | 25 | ||
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); | |||
49 | int ftrace_arch_code_modify_prepare(void) | 50 | int ftrace_arch_code_modify_prepare(void) |
50 | { | 51 | { |
51 | set_kernel_text_rw(); | 52 | set_kernel_text_rw(); |
53 | set_all_modules_text_rw(); | ||
52 | modifying_code = 1; | 54 | modifying_code = 1; |
53 | return 0; | 55 | return 0; |
54 | } | 56 | } |
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) | |||
56 | int ftrace_arch_code_modify_post_process(void) | 58 | int ftrace_arch_code_modify_post_process(void) |
57 | { | 59 | { |
58 | modifying_code = 0; | 60 | modifying_code = 0; |
61 | set_all_modules_text_ro(); | ||
59 | set_kernel_text_ro(); | 62 | set_kernel_text_ro(); |
60 | return 0; | 63 | return 0; |
61 | } | 64 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index bcece91dd311..9f54b209c378 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -60,16 +60,18 @@ | |||
60 | #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) | 60 | #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) |
61 | #endif | 61 | #endif |
62 | 62 | ||
63 | /* Number of possible pages in the lowmem region */ | ||
64 | LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) | ||
65 | |||
63 | /* Enough space to fit pagetables for the low memory linear map */ | 66 | /* Enough space to fit pagetables for the low memory linear map */ |
64 | MAPPING_BEYOND_END = \ | 67 | MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT |
65 | PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT | ||
66 | 68 | ||
67 | /* | 69 | /* |
68 | * Worst-case size of the kernel mapping we need to make: | 70 | * Worst-case size of the kernel mapping we need to make: |
69 | * the worst-case size of the kernel itself, plus the extra we need | 71 | * a relocatable kernel can live anywhere in lowmem, so we need to be able |
70 | * to map for the linear map. | 72 | * to map all of lowmem. |
71 | */ | 73 | */ |
72 | KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT | 74 | KERNEL_PAGES = LOWMEM_PAGES |
73 | 75 | ||
74 | INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm | 76 | INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm |
75 | RESERVE_BRK(pagetables, INIT_MAP_SIZE) | 77 | RESERVE_BRK(pagetables, INIT_MAP_SIZE) |
@@ -137,39 +139,6 @@ ENTRY(startup_32) | |||
137 | movl %eax, pa(olpc_ofw_pgd) | 139 | movl %eax, pa(olpc_ofw_pgd) |
138 | #endif | 140 | #endif |
139 | 141 | ||
140 | #ifdef CONFIG_PARAVIRT | ||
141 | /* This is can only trip for a broken bootloader... */ | ||
142 | cmpw $0x207, pa(boot_params + BP_version) | ||
143 | jb default_entry | ||
144 | |||
145 | /* Paravirt-compatible boot parameters. Look to see what architecture | ||
146 | we're booting under. */ | ||
147 | movl pa(boot_params + BP_hardware_subarch), %eax | ||
148 | cmpl $num_subarch_entries, %eax | ||
149 | jae bad_subarch | ||
150 | |||
151 | movl pa(subarch_entries)(,%eax,4), %eax | ||
152 | subl $__PAGE_OFFSET, %eax | ||
153 | jmp *%eax | ||
154 | |||
155 | bad_subarch: | ||
156 | WEAK(lguest_entry) | ||
157 | WEAK(xen_entry) | ||
158 | /* Unknown implementation; there's really | ||
159 | nothing we can do at this point. */ | ||
160 | ud2a | ||
161 | |||
162 | __INITDATA | ||
163 | |||
164 | subarch_entries: | ||
165 | .long default_entry /* normal x86/PC */ | ||
166 | .long lguest_entry /* lguest hypervisor */ | ||
167 | .long xen_entry /* Xen hypervisor */ | ||
168 | .long default_entry /* Moorestown MID */ | ||
169 | num_subarch_entries = (. - subarch_entries) / 4 | ||
170 | .previous | ||
171 | #endif /* CONFIG_PARAVIRT */ | ||
172 | |||
173 | /* | 142 | /* |
174 | * Initialize page tables. This creates a PDE and a set of page | 143 | * Initialize page tables. This creates a PDE and a set of page |
175 | * tables, which are located immediately beyond __brk_base. The variable | 144 | * tables, which are located immediately beyond __brk_base. The variable |
@@ -179,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
179 | * | 148 | * |
180 | * Note that the stack is not yet set up! | 149 | * Note that the stack is not yet set up! |
181 | */ | 150 | */ |
182 | default_entry: | ||
183 | #ifdef CONFIG_X86_PAE | 151 | #ifdef CONFIG_X86_PAE |
184 | 152 | ||
185 | /* | 153 | /* |
@@ -259,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
259 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax | 227 | movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax |
260 | movl %eax,pa(initial_page_table+0xffc) | 228 | movl %eax,pa(initial_page_table+0xffc) |
261 | #endif | 229 | #endif |
262 | jmp 3f | 230 | |
231 | #ifdef CONFIG_PARAVIRT | ||
232 | /* This is can only trip for a broken bootloader... */ | ||
233 | cmpw $0x207, pa(boot_params + BP_version) | ||
234 | jb default_entry | ||
235 | |||
236 | /* Paravirt-compatible boot parameters. Look to see what architecture | ||
237 | we're booting under. */ | ||
238 | movl pa(boot_params + BP_hardware_subarch), %eax | ||
239 | cmpl $num_subarch_entries, %eax | ||
240 | jae bad_subarch | ||
241 | |||
242 | movl pa(subarch_entries)(,%eax,4), %eax | ||
243 | subl $__PAGE_OFFSET, %eax | ||
244 | jmp *%eax | ||
245 | |||
246 | bad_subarch: | ||
247 | WEAK(lguest_entry) | ||
248 | WEAK(xen_entry) | ||
249 | /* Unknown implementation; there's really | ||
250 | nothing we can do at this point. */ | ||
251 | ud2a | ||
252 | |||
253 | __INITDATA | ||
254 | |||
255 | subarch_entries: | ||
256 | .long default_entry /* normal x86/PC */ | ||
257 | .long lguest_entry /* lguest hypervisor */ | ||
258 | .long xen_entry /* Xen hypervisor */ | ||
259 | .long default_entry /* Moorestown MID */ | ||
260 | num_subarch_entries = (. - subarch_entries) / 4 | ||
261 | .previous | ||
262 | #else | ||
263 | jmp default_entry | ||
264 | #endif /* CONFIG_PARAVIRT */ | ||
265 | |||
263 | /* | 266 | /* |
264 | * Non-boot CPU entry point; entered from trampoline.S | 267 | * Non-boot CPU entry point; entered from trampoline.S |
265 | * We can't lgdt here, because lgdt itself uses a data segment, but | 268 | * We can't lgdt here, because lgdt itself uses a data segment, but |
@@ -280,7 +283,7 @@ ENTRY(startup_32_smp) | |||
280 | movl %eax,%fs | 283 | movl %eax,%fs |
281 | movl %eax,%gs | 284 | movl %eax,%gs |
282 | #endif /* CONFIG_SMP */ | 285 | #endif /* CONFIG_SMP */ |
283 | 3: | 286 | default_entry: |
284 | 287 | ||
285 | /* | 288 | /* |
286 | * New page tables may be in 4Mbyte page mode and may | 289 | * New page tables may be in 4Mbyte page mode and may |
@@ -314,6 +317,10 @@ ENTRY(startup_32_smp) | |||
314 | subl $0x80000001, %eax | 317 | subl $0x80000001, %eax |
315 | cmpl $(0x8000ffff-0x80000001), %eax | 318 | cmpl $(0x8000ffff-0x80000001), %eax |
316 | ja 6f | 319 | ja 6f |
320 | |||
321 | /* Clear bogus XD_DISABLE bits */ | ||
322 | call verify_cpu | ||
323 | |||
317 | mov $0x80000001, %eax | 324 | mov $0x80000001, %eax |
318 | cpuid | 325 | cpuid |
319 | /* Execute Disable bit supported? */ | 326 | /* Execute Disable bit supported? */ |
@@ -609,6 +616,8 @@ ignore_int: | |||
609 | #endif | 616 | #endif |
610 | iret | 617 | iret |
611 | 618 | ||
619 | #include "verify_cpu.S" | ||
620 | |||
612 | __REFDATA | 621 | __REFDATA |
613 | .align 4 | 622 | .align 4 |
614 | ENTRY(initial_code) | 623 | ENTRY(initial_code) |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ae03cab4352e..4ff5968f12d2 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -27,6 +27,9 @@ | |||
27 | #define HPET_DEV_FSB_CAP 0x1000 | 27 | #define HPET_DEV_FSB_CAP 0x1000 |
28 | #define HPET_DEV_PERI_CAP 0x2000 | 28 | #define HPET_DEV_PERI_CAP 0x2000 |
29 | 29 | ||
30 | #define HPET_MIN_CYCLES 128 | ||
31 | #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) | ||
32 | |||
30 | #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) | 33 | #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) |
31 | 34 | ||
32 | /* | 35 | /* |
@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void) | |||
299 | /* Calculate the min / max delta */ | 302 | /* Calculate the min / max delta */ |
300 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | 303 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, |
301 | &hpet_clockevent); | 304 | &hpet_clockevent); |
302 | /* 5 usec minimum reprogramming delta. */ | 305 | /* Setup minimum reprogramming delta. */ |
303 | hpet_clockevent.min_delta_ns = 5000; | 306 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, |
307 | &hpet_clockevent); | ||
304 | 308 | ||
305 | /* | 309 | /* |
306 | * Start hpet with the boot cpu mask and make it | 310 | * Start hpet with the boot cpu mask and make it |
@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta, | |||
393 | * the wraparound into account) nor a simple count down event | 397 | * the wraparound into account) nor a simple count down event |
394 | * mode. Further the write to the comparator register is | 398 | * mode. Further the write to the comparator register is |
395 | * delayed internally up to two HPET clock cycles in certain | 399 | * delayed internally up to two HPET clock cycles in certain |
396 | * chipsets (ATI, ICH9,10). We worked around that by reading | 400 | * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even |
397 | * back the compare register, but that required another | 401 | * longer delays. We worked around that by reading back the |
398 | * workaround for ICH9,10 chips where the first readout after | 402 | * compare register, but that required another workaround for |
399 | * write can return the old stale value. We already have a | 403 | * ICH9,10 chips where the first readout after write can |
400 | * minimum delta of 5us enforced, but a NMI or SMI hitting | 404 | * return the old stale value. We already had a minimum |
405 | * programming delta of 5us enforced, but a NMI or SMI hitting | ||
401 | * between the counter readout and the comparator write can | 406 | * between the counter readout and the comparator write can |
402 | * move us behind that point easily. Now instead of reading | 407 | * move us behind that point easily. Now instead of reading |
403 | * the compare register back several times, we make the ETIME | 408 | * the compare register back several times, we make the ETIME |
404 | * decision based on the following: Return ETIME if the | 409 | * decision based on the following: Return ETIME if the |
405 | * counter value after the write is less than 8 HPET cycles | 410 | * counter value after the write is less than HPET_MIN_CYCLES |
406 | * away from the event or if the counter is already ahead of | 411 | * away from the event or if the counter is already ahead of |
407 | * the event. | 412 | * the event. The minimum programming delta for the generic |
413 | * clockevents code is set to 1.5 * HPET_MIN_CYCLES. | ||
408 | */ | 414 | */ |
409 | res = (s32)(cnt - hpet_readl(HPET_COUNTER)); | 415 | res = (s32)(cnt - hpet_readl(HPET_COUNTER)); |
410 | 416 | ||
411 | return res < 8 ? -ETIME : 0; | 417 | return res < HPET_MIN_CYCLES ? -ETIME : 0; |
412 | } | 418 | } |
413 | 419 | ||
414 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 420 | static void hpet_legacy_set_mode(enum clock_event_mode mode, |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff15c9dcc25d..42c594254507 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
433 | dr6_p = (unsigned long *)ERR_PTR(args->err); | 433 | dr6_p = (unsigned long *)ERR_PTR(args->err); |
434 | dr6 = *dr6_p; | 434 | dr6 = *dr6_p; |
435 | 435 | ||
436 | /* If it's a single step, TRAP bits are random */ | ||
437 | if (dr6 & DR_STEP) | ||
438 | return NOTIFY_DONE; | ||
439 | |||
436 | /* Do an early return if no trap bits are set in DR6 */ | 440 | /* Do an early return if no trap bits are set in DR6 */ |
437 | if ((dr6 & DR_TRAP_BITS) == 0) | 441 | if ((dr6 & DR_TRAP_BITS) == 0) |
438 | return NOTIFY_DONE; | 442 | return NOTIFY_DONE; |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index ec592caac4b4..cd21b654dec6 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -315,14 +315,18 @@ static void kgdb_remove_all_hw_break(void) | |||
315 | if (!breakinfo[i].enabled) | 315 | if (!breakinfo[i].enabled) |
316 | continue; | 316 | continue; |
317 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 317 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
318 | if (bp->attr.disabled == 1) | 318 | if (!bp->attr.disabled) { |
319 | arch_uninstall_hw_breakpoint(bp); | ||
320 | bp->attr.disabled = 1; | ||
319 | continue; | 321 | continue; |
322 | } | ||
320 | if (dbg_is_early) | 323 | if (dbg_is_early) |
321 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, | 324 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, |
322 | breakinfo[i].type); | 325 | breakinfo[i].type); |
323 | else | 326 | else if (hw_break_release_slot(i)) |
324 | arch_uninstall_hw_breakpoint(bp); | 327 | printk(KERN_ERR "KGDB: hw bpt remove failed %lx\n", |
325 | bp->attr.disabled = 1; | 328 | breakinfo[i].addr); |
329 | breakinfo[i].enabled = 0; | ||
326 | } | 330 | } |
327 | } | 331 | } |
328 | 332 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1cbd54c0df99..5940282bd2f9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1184 | { | 1184 | { |
1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
1186 | 1186 | ||
1187 | /* This is possible if op is under delayed unoptimizing */ | ||
1188 | if (kprobe_disabled(&op->kp)) | ||
1189 | return; | ||
1190 | |||
1187 | preempt_disable(); | 1191 | preempt_disable(); |
1188 | if (kprobe_running()) { | 1192 | if (kprobe_running()) { |
1189 | kprobes_inc_nmissed_count(&op->kp); | 1193 | kprobes_inc_nmissed_count(&op->kp); |
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | |||
1401 | return 0; | 1405 | return 0; |
1402 | } | 1406 | } |
1403 | 1407 | ||
1404 | /* Replace a breakpoint (int3) with a relative jump. */ | 1408 | #define MAX_OPTIMIZE_PROBES 256 |
1405 | int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | 1409 | static struct text_poke_param *jump_poke_params; |
1410 | static struct jump_poke_buffer { | ||
1411 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1412 | } *jump_poke_bufs; | ||
1413 | |||
1414 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
1415 | u8 *insn_buf, | ||
1416 | struct optimized_kprobe *op) | ||
1406 | { | 1417 | { |
1407 | unsigned char jmp_code[RELATIVEJUMP_SIZE]; | ||
1408 | s32 rel = (s32)((long)op->optinsn.insn - | 1418 | s32 rel = (s32)((long)op->optinsn.insn - |
1409 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | 1419 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); |
1410 | 1420 | ||
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | |||
1412 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | 1422 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, |
1413 | RELATIVE_ADDR_SIZE); | 1423 | RELATIVE_ADDR_SIZE); |
1414 | 1424 | ||
1415 | jmp_code[0] = RELATIVEJUMP_OPCODE; | 1425 | insn_buf[0] = RELATIVEJUMP_OPCODE; |
1416 | *(s32 *)(&jmp_code[1]) = rel; | 1426 | *(s32 *)(&insn_buf[1]) = rel; |
1427 | |||
1428 | tprm->addr = op->kp.addr; | ||
1429 | tprm->opcode = insn_buf; | ||
1430 | tprm->len = RELATIVEJUMP_SIZE; | ||
1431 | } | ||
1432 | |||
1433 | /* | ||
1434 | * Replace breakpoints (int3) with relative jumps. | ||
1435 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
1436 | */ | ||
1437 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
1438 | { | ||
1439 | struct optimized_kprobe *op, *tmp; | ||
1440 | int c = 0; | ||
1441 | |||
1442 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1443 | WARN_ON(kprobe_disabled(&op->kp)); | ||
1444 | /* Setup param */ | ||
1445 | setup_optimize_kprobe(&jump_poke_params[c], | ||
1446 | jump_poke_bufs[c].buf, op); | ||
1447 | list_del_init(&op->list); | ||
1448 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1449 | break; | ||
1450 | } | ||
1417 | 1451 | ||
1418 | /* | 1452 | /* |
1419 | * text_poke_smp doesn't support NMI/MCE code modifying. | 1453 | * text_poke_smp doesn't support NMI/MCE code modifying. |
1420 | * However, since kprobes itself also doesn't support NMI/MCE | 1454 | * However, since kprobes itself also doesn't support NMI/MCE |
1421 | * code probing, it's not a problem. | 1455 | * code probing, it's not a problem. |
1422 | */ | 1456 | */ |
1423 | text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); | 1457 | text_poke_smp_batch(jump_poke_params, c); |
1424 | return 0; | 1458 | } |
1459 | |||
1460 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
1461 | u8 *insn_buf, | ||
1462 | struct optimized_kprobe *op) | ||
1463 | { | ||
1464 | /* Set int3 to first byte for kprobes */ | ||
1465 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
1466 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1467 | |||
1468 | tprm->addr = op->kp.addr; | ||
1469 | tprm->opcode = insn_buf; | ||
1470 | tprm->len = RELATIVEJUMP_SIZE; | ||
1471 | } | ||
1472 | |||
1473 | /* | ||
1474 | * Recover original instructions and breakpoints from relative jumps. | ||
1475 | * Caller must call with locking kprobe_mutex. | ||
1476 | */ | ||
1477 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
1478 | struct list_head *done_list) | ||
1479 | { | ||
1480 | struct optimized_kprobe *op, *tmp; | ||
1481 | int c = 0; | ||
1482 | |||
1483 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1484 | /* Setup param */ | ||
1485 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
1486 | jump_poke_bufs[c].buf, op); | ||
1487 | list_move(&op->list, done_list); | ||
1488 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1489 | break; | ||
1490 | } | ||
1491 | |||
1492 | /* | ||
1493 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1494 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1495 | * code probing, it's not a problem. | ||
1496 | */ | ||
1497 | text_poke_smp_batch(jump_poke_params, c); | ||
1425 | } | 1498 | } |
1426 | 1499 | ||
1427 | /* Replace a relative jump with a breakpoint (int3). */ | 1500 | /* Replace a relative jump with a breakpoint (int3). */ |
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p, | |||
1453 | } | 1526 | } |
1454 | return 0; | 1527 | return 0; |
1455 | } | 1528 | } |
1529 | |||
1530 | static int __kprobes init_poke_params(void) | ||
1531 | { | ||
1532 | /* Allocate code buffer and parameter array */ | ||
1533 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
1534 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1535 | if (!jump_poke_bufs) | ||
1536 | return -ENOMEM; | ||
1537 | |||
1538 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
1539 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1540 | if (!jump_poke_params) { | ||
1541 | kfree(jump_poke_bufs); | ||
1542 | jump_poke_bufs = NULL; | ||
1543 | return -ENOMEM; | ||
1544 | } | ||
1545 | |||
1546 | return 0; | ||
1547 | } | ||
1548 | #else /* !CONFIG_OPTPROBES */ | ||
1549 | static int __kprobes init_poke_params(void) | ||
1550 | { | ||
1551 | return 0; | ||
1552 | } | ||
1456 | #endif | 1553 | #endif |
1457 | 1554 | ||
1458 | int __init arch_init_kprobes(void) | 1555 | int __init arch_init_kprobes(void) |
1459 | { | 1556 | { |
1460 | return 0; | 1557 | return init_poke_params(); |
1461 | } | 1558 | } |
1462 | 1559 | ||
1463 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1560 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index ce0cb4721c9a..0fe6d1a66c38 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu) | |||
155 | return 0; | 155 | return 0; |
156 | } | 156 | } |
157 | 157 | ||
158 | static int get_ucode_data(void *to, const u8 *from, size_t n) | ||
159 | { | ||
160 | memcpy(to, from, n); | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static void * | 158 | static void * |
165 | get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | 159 | get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) |
166 | { | 160 | { |
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
168 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 162 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; |
169 | void *mc; | 163 | void *mc; |
170 | 164 | ||
171 | if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) | 165 | get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); |
172 | return NULL; | ||
173 | 166 | ||
174 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 167 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
175 | pr_err("error: invalid type field in container file section header\n"); | 168 | pr_err("error: invalid type field in container file section header\n"); |
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
183 | return NULL; | 176 | return NULL; |
184 | } | 177 | } |
185 | 178 | ||
186 | mc = vmalloc(UCODE_MAX_SIZE); | 179 | mc = vzalloc(UCODE_MAX_SIZE); |
187 | if (mc) { | 180 | if (!mc) |
188 | memset(mc, 0, UCODE_MAX_SIZE); | 181 | return NULL; |
189 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, | 182 | |
190 | total_size)) { | 183 | get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); |
191 | vfree(mc); | 184 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; |
192 | mc = NULL; | 185 | |
193 | } else | ||
194 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | ||
195 | } | ||
196 | return mc; | 186 | return mc; |
197 | } | 187 | } |
198 | 188 | ||
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
202 | unsigned int *buf_pos = (unsigned int *)container_hdr; | 192 | unsigned int *buf_pos = (unsigned int *)container_hdr; |
203 | unsigned long size; | 193 | unsigned long size; |
204 | 194 | ||
205 | if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) | 195 | get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); |
206 | return 0; | ||
207 | 196 | ||
208 | size = buf_pos[2]; | 197 | size = buf_pos[2]; |
209 | 198 | ||
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
219 | } | 208 | } |
220 | 209 | ||
221 | buf += UCODE_CONTAINER_HEADER_SIZE; | 210 | buf += UCODE_CONTAINER_HEADER_SIZE; |
222 | if (get_ucode_data(equiv_cpu_table, buf, size)) { | 211 | get_ucode_data(equiv_cpu_table, buf, size); |
223 | vfree(equiv_cpu_table); | ||
224 | return 0; | ||
225 | } | ||
226 | 212 | ||
227 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 213 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ |
228 | } | 214 | } |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index dcb65cc0a053..1a1b606d3e92 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
364 | 364 | ||
365 | /* For performance reasons, reuse mc area when possible */ | 365 | /* For performance reasons, reuse mc area when possible */ |
366 | if (!mc || mc_size > curr_mc_size) { | 366 | if (!mc || mc_size > curr_mc_size) { |
367 | if (mc) | 367 | vfree(mc); |
368 | vfree(mc); | ||
369 | mc = vmalloc(mc_size); | 368 | mc = vmalloc(mc_size); |
370 | if (!mc) | 369 | if (!mc) |
371 | break; | 370 | break; |
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
374 | 373 | ||
375 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | 374 | if (get_ucode_data(mc, ucode_ptr, mc_size) || |
376 | microcode_sanity_check(mc) < 0) { | 375 | microcode_sanity_check(mc) < 0) { |
377 | vfree(mc); | ||
378 | break; | 376 | break; |
379 | } | 377 | } |
380 | 378 | ||
381 | if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { | 379 | if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { |
382 | if (new_mc) | 380 | vfree(new_mc); |
383 | vfree(new_mc); | ||
384 | new_rev = mc_header.rev; | 381 | new_rev = mc_header.rev; |
385 | new_mc = mc; | 382 | new_mc = mc; |
386 | mc = NULL; /* trigger new vmalloc */ | 383 | mc = NULL; /* trigger new vmalloc */ |
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
390 | leftover -= mc_size; | 387 | leftover -= mc_size; |
391 | } | 388 | } |
392 | 389 | ||
393 | if (mc) | 390 | vfree(mc); |
394 | vfree(mc); | ||
395 | 391 | ||
396 | if (leftover) { | 392 | if (leftover) { |
397 | if (new_mc) | 393 | vfree(new_mc); |
398 | vfree(new_mc); | ||
399 | state = UCODE_ERROR; | 394 | state = UCODE_ERROR; |
400 | goto out; | 395 | goto out; |
401 | } | 396 | } |
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
405 | goto out; | 400 | goto out; |
406 | } | 401 | } |
407 | 402 | ||
408 | if (uci->mc) | 403 | vfree(uci->mc); |
409 | vfree(uci->mc); | ||
410 | uci->mc = (struct microcode_intel *)new_mc; | 404 | uci->mc = (struct microcode_intel *)new_mc; |
411 | 405 | ||
412 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", | 406 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 6da143c2a6b8..ac861b8348e2 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -25,7 +25,6 @@ struct pci_hostbridge_probe { | |||
25 | }; | 25 | }; |
26 | 26 | ||
27 | static u64 __cpuinitdata fam10h_pci_mmconf_base; | 27 | static u64 __cpuinitdata fam10h_pci_mmconf_base; |
28 | static int __cpuinitdata fam10h_pci_mmconf_base_status; | ||
29 | 28 | ||
30 | static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { | 29 | static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { |
31 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, | 30 | { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, |
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2) | |||
44 | return start1 - start2; | 43 | return start1 - start2; |
45 | } | 44 | } |
46 | 45 | ||
47 | /*[47:0] */ | 46 | #define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT) |
48 | /* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ | 47 | #define MMCONF_MASK (~(MMCONF_UNIT - 1)) |
48 | #define MMCONF_SIZE (MMCONF_UNIT << 8) | ||
49 | /* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */ | ||
49 | #define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) | 50 | #define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) |
50 | #define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) | 51 | #define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40)) |
51 | static void __cpuinit get_fam10h_pci_mmconf_base(void) | 52 | static void __cpuinit get_fam10h_pci_mmconf_base(void) |
52 | { | 53 | { |
53 | int i; | 54 | int i; |
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) | |||
64 | struct range range[8]; | 65 | struct range range[8]; |
65 | 66 | ||
66 | /* only try to get setting from BSP */ | 67 | /* only try to get setting from BSP */ |
67 | /* -1 or 1 */ | 68 | if (fam10h_pci_mmconf_base) |
68 | if (fam10h_pci_mmconf_base_status) | ||
69 | return; | 69 | return; |
70 | 70 | ||
71 | if (!early_pci_allowed()) | 71 | if (!early_pci_allowed()) |
72 | goto fail; | 72 | return; |
73 | 73 | ||
74 | found = 0; | 74 | found = 0; |
75 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { | 75 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { |
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) | |||
91 | } | 91 | } |
92 | 92 | ||
93 | if (!found) | 93 | if (!found) |
94 | goto fail; | 94 | return; |
95 | 95 | ||
96 | /* SYS_CFG */ | 96 | /* SYS_CFG */ |
97 | address = MSR_K8_SYSCFG; | 97 | address = MSR_K8_SYSCFG; |
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) | |||
99 | 99 | ||
100 | /* TOP_MEM2 is not enabled? */ | 100 | /* TOP_MEM2 is not enabled? */ |
101 | if (!(val & (1<<21))) { | 101 | if (!(val & (1<<21))) { |
102 | tom2 = 0; | 102 | tom2 = 1ULL << 32; |
103 | } else { | 103 | } else { |
104 | /* TOP_MEM2 */ | 104 | /* TOP_MEM2 */ |
105 | address = MSR_K8_TOP_MEM2; | 105 | address = MSR_K8_TOP_MEM2; |
106 | rdmsrl(address, val); | 106 | rdmsrl(address, val); |
107 | tom2 = val & (0xffffULL<<32); | 107 | tom2 = max(val & 0xffffff800000ULL, 1ULL << 32); |
108 | } | 108 | } |
109 | 109 | ||
110 | if (base <= tom2) | 110 | if (base <= tom2) |
111 | base = tom2 + (1ULL<<32); | 111 | base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK; |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * need to check if the range is in the high mmio range that is | 114 | * need to check if the range is in the high mmio range that is |
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) | |||
123 | if (!(reg & 3)) | 123 | if (!(reg & 3)) |
124 | continue; | 124 | continue; |
125 | 125 | ||
126 | start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ | 126 | start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/ |
127 | reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); | 127 | reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); |
128 | end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ | 128 | end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/ |
129 | 129 | ||
130 | if (!end) | 130 | if (end < tom2) |
131 | continue; | 131 | continue; |
132 | 132 | ||
133 | range[hi_mmio_num].start = start; | 133 | range[hi_mmio_num].start = start; |
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void) | |||
143 | 143 | ||
144 | if (range[hi_mmio_num - 1].end < base) | 144 | if (range[hi_mmio_num - 1].end < base) |
145 | goto out; | 145 | goto out; |
146 | if (range[0].start > base) | 146 | if (range[0].start > base + MMCONF_SIZE) |
147 | goto out; | 147 | goto out; |
148 | 148 | ||
149 | /* need to find one window */ | 149 | /* need to find one window */ |
150 | base = range[0].start - (1ULL << 32); | 150 | base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT; |
151 | if ((base > tom2) && BASE_VALID(base)) | 151 | if ((base > tom2) && BASE_VALID(base)) |
152 | goto out; | 152 | goto out; |
153 | base = range[hi_mmio_num - 1].end + (1ULL << 32); | 153 | base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK; |
154 | if ((base > tom2) && BASE_VALID(base)) | 154 | if (BASE_VALID(base)) |
155 | goto out; | 155 | goto out; |
156 | /* need to find window between ranges */ | 156 | /* need to find window between ranges */ |
157 | if (hi_mmio_num > 1) | 157 | for (i = 1; i < hi_mmio_num; i++) { |
158 | for (i = 0; i < hi_mmio_num - 1; i++) { | 158 | base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK; |
159 | if (range[i + 1].start > (range[i].end + (1ULL << 32))) { | 159 | val = range[i].start & MMCONF_MASK; |
160 | base = range[i].end + (1ULL << 32); | 160 | if (val >= base + MMCONF_SIZE && BASE_VALID(base)) |
161 | if ((base > tom2) && BASE_VALID(base)) | 161 | goto out; |
162 | goto out; | ||
163 | } | ||
164 | } | 162 | } |
165 | |||
166 | fail: | ||
167 | fam10h_pci_mmconf_base_status = -1; | ||
168 | return; | 163 | return; |
164 | |||
169 | out: | 165 | out: |
170 | fam10h_pci_mmconf_base = base; | 166 | fam10h_pci_mmconf_base = base; |
171 | fam10h_pci_mmconf_base_status = 1; | ||
172 | } | 167 | } |
173 | 168 | ||
174 | void __cpuinit fam10h_check_enable_mmcfg(void) | 169 | void __cpuinit fam10h_check_enable_mmcfg(void) |
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) | |||
190 | 185 | ||
191 | /* only trust the one handle 256 buses, if acpi=off */ | 186 | /* only trust the one handle 256 buses, if acpi=off */ |
192 | if (!acpi_pci_disabled || busnbits >= 8) { | 187 | if (!acpi_pci_disabled || busnbits >= 8) { |
193 | u64 base; | 188 | u64 base = val & MMCONF_MASK; |
194 | base = val & (0xffffULL << 32); | 189 | |
195 | if (fam10h_pci_mmconf_base_status <= 0) { | 190 | if (!fam10h_pci_mmconf_base) { |
196 | fam10h_pci_mmconf_base = base; | 191 | fam10h_pci_mmconf_base = base; |
197 | fam10h_pci_mmconf_base_status = 1; | ||
198 | return; | 192 | return; |
199 | } else if (fam10h_pci_mmconf_base == base) | 193 | } else if (fam10h_pci_mmconf_base == base) |
200 | return; | 194 | return; |
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void) | |||
206 | * with 256 buses | 200 | * with 256 buses |
207 | */ | 201 | */ |
208 | get_fam10h_pci_mmconf_base(); | 202 | get_fam10h_pci_mmconf_base(); |
209 | if (fam10h_pci_mmconf_base_status <= 0) | 203 | if (!fam10h_pci_mmconf_base) { |
204 | pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF; | ||
210 | return; | 205 | return; |
206 | } | ||
211 | 207 | ||
212 | printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); | 208 | printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); |
213 | val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | | 209 | val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7bf2dc4c8f70..12fcbe2c143e 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/poll.h> | 31 | #include <linux/poll.h> |
32 | #include <linux/smp.h> | 32 | #include <linux/smp.h> |
33 | #include <linux/smp_lock.h> | ||
34 | #include <linux/major.h> | 33 | #include <linux/major.h> |
35 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
36 | #include <linux/device.h> | 35 | #include <linux/device.h> |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868a86aa..c852041bfc3d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -91,8 +91,7 @@ void exit_thread(void) | |||
91 | void show_regs(struct pt_regs *regs) | 91 | void show_regs(struct pt_regs *regs) |
92 | { | 92 | { |
93 | show_registers(regs); | 93 | show_registers(regs); |
94 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), | 94 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); |
95 | regs->bp); | ||
96 | } | 95 | } |
97 | 96 | ||
98 | void show_regs_common(void) | 97 | void show_regs_common(void) |
@@ -374,6 +373,7 @@ void default_idle(void) | |||
374 | { | 373 | { |
375 | if (hlt_use_halt()) { | 374 | if (hlt_use_halt()) { |
376 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 375 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
376 | trace_cpu_idle(1, smp_processor_id()); | ||
377 | current_thread_info()->status &= ~TS_POLLING; | 377 | current_thread_info()->status &= ~TS_POLLING; |
378 | /* | 378 | /* |
379 | * TS_POLLING-cleared state must be visible before we | 379 | * TS_POLLING-cleared state must be visible before we |
@@ -444,6 +444,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
444 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 444 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
445 | { | 445 | { |
446 | trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); | 446 | trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); |
447 | trace_cpu_idle((ax>>4)+1, smp_processor_id()); | ||
447 | if (!need_resched()) { | 448 | if (!need_resched()) { |
448 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 449 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
449 | clflush((void *)¤t_thread_info()->flags); | 450 | clflush((void *)¤t_thread_info()->flags); |
@@ -460,6 +461,7 @@ static void mwait_idle(void) | |||
460 | { | 461 | { |
461 | if (!need_resched()) { | 462 | if (!need_resched()) { |
462 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 463 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
464 | trace_cpu_idle(1, smp_processor_id()); | ||
463 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 465 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
464 | clflush((void *)¤t_thread_info()->flags); | 466 | clflush((void *)¤t_thread_info()->flags); |
465 | 467 | ||
@@ -481,10 +483,12 @@ static void mwait_idle(void) | |||
481 | static void poll_idle(void) | 483 | static void poll_idle(void) |
482 | { | 484 | { |
483 | trace_power_start(POWER_CSTATE, 0, smp_processor_id()); | 485 | trace_power_start(POWER_CSTATE, 0, smp_processor_id()); |
486 | trace_cpu_idle(0, smp_processor_id()); | ||
484 | local_irq_enable(); | 487 | local_irq_enable(); |
485 | while (!need_resched()) | 488 | while (!need_resched()) |
486 | cpu_relax(); | 489 | cpu_relax(); |
487 | trace_power_end(0); | 490 | trace_power_end(smp_processor_id()); |
491 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
488 | } | 492 | } |
489 | 493 | ||
490 | /* | 494 | /* |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 96586c3cbbbf..4b9befa0e347 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -113,8 +113,8 @@ void cpu_idle(void) | |||
113 | stop_critical_timings(); | 113 | stop_critical_timings(); |
114 | pm_idle(); | 114 | pm_idle(); |
115 | start_critical_timings(); | 115 | start_critical_timings(); |
116 | |||
117 | trace_power_end(smp_processor_id()); | 116 | trace_power_end(smp_processor_id()); |
117 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
118 | } | 118 | } |
119 | tick_nohz_restart_sched_tick(); | 119 | tick_nohz_restart_sched_tick(); |
120 | preempt_enable_no_resched(); | 120 | preempt_enable_no_resched(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b3d7a3a04f38..4c818a738396 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -142,6 +142,8 @@ void cpu_idle(void) | |||
142 | start_critical_timings(); | 142 | start_critical_timings(); |
143 | 143 | ||
144 | trace_power_end(smp_processor_id()); | 144 | trace_power_end(smp_processor_id()); |
145 | trace_cpu_idle(PWR_EVENT_EXIT, | ||
146 | smp_processor_id()); | ||
145 | 147 | ||
146 | /* In many cases the interrupt that ended idle | 148 | /* In many cases the interrupt that ended idle |
147 | has already called exit_idle. But some idle | 149 | has already called exit_idle. But some idle |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 008b91eefa18..42eb3300dfc6 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -83,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
83 | 83 | ||
84 | static atomic64_t last_value = ATOMIC64_INIT(0); | 84 | static atomic64_t last_value = ATOMIC64_INIT(0); |
85 | 85 | ||
86 | void pvclock_resume(void) | ||
87 | { | ||
88 | atomic64_set(&last_value, 0); | ||
89 | } | ||
90 | |||
86 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 91 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
87 | { | 92 | { |
88 | struct pvclock_shadow_time shadow; | 93 | struct pvclock_shadow_time shadow; |
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c new file mode 100644 index 000000000000..2a26819bb6a8 --- /dev/null +++ b/arch/x86/kernel/resource.c | |||
@@ -0,0 +1,48 @@ | |||
1 | #include <linux/ioport.h> | ||
2 | #include <asm/e820.h> | ||
3 | |||
4 | static void resource_clip(struct resource *res, resource_size_t start, | ||
5 | resource_size_t end) | ||
6 | { | ||
7 | resource_size_t low = 0, high = 0; | ||
8 | |||
9 | if (res->end < start || res->start > end) | ||
10 | return; /* no conflict */ | ||
11 | |||
12 | if (res->start < start) | ||
13 | low = start - res->start; | ||
14 | |||
15 | if (res->end > end) | ||
16 | high = res->end - end; | ||
17 | |||
18 | /* Keep the area above or below the conflict, whichever is larger */ | ||
19 | if (low > high) | ||
20 | res->end = start - 1; | ||
21 | else | ||
22 | res->start = end + 1; | ||
23 | } | ||
24 | |||
25 | static void remove_e820_regions(struct resource *avail) | ||
26 | { | ||
27 | int i; | ||
28 | struct e820entry *entry; | ||
29 | |||
30 | for (i = 0; i < e820.nr_map; i++) { | ||
31 | entry = &e820.map[i]; | ||
32 | |||
33 | resource_clip(avail, entry->addr, | ||
34 | entry->addr + entry->size - 1); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | void arch_remove_reservations(struct resource *avail) | ||
39 | { | ||
40 | /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ | ||
41 | if (avail->flags & IORESOURCE_MEM) { | ||
42 | if (avail->start < BIOS_END) | ||
43 | avail->start = BIOS_END; | ||
44 | resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); | ||
45 | |||
46 | remove_e820_regions(avail); | ||
47 | } | ||
48 | } | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0afb8c7e3803..d3cfe26c0252 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -501,7 +501,18 @@ static inline unsigned long long get_total_mem(void) | |||
501 | return total << PAGE_SHIFT; | 501 | return total << PAGE_SHIFT; |
502 | } | 502 | } |
503 | 503 | ||
504 | #define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF | 504 | /* |
505 | * Keep the crash kernel below this limit. On 32 bits earlier kernels | ||
506 | * would limit the kernel to the low 512 MiB due to mapping restrictions. | ||
507 | * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this | ||
508 | * limit once kexec-tools are fixed. | ||
509 | */ | ||
510 | #ifdef CONFIG_X86_32 | ||
511 | # define CRASH_KERNEL_ADDR_MAX (512 << 20) | ||
512 | #else | ||
513 | # define CRASH_KERNEL_ADDR_MAX (896 << 20) | ||
514 | #endif | ||
515 | |||
505 | static void __init reserve_crashkernel(void) | 516 | static void __init reserve_crashkernel(void) |
506 | { | 517 | { |
507 | unsigned long long total_mem; | 518 | unsigned long long total_mem; |
@@ -520,10 +531,10 @@ static void __init reserve_crashkernel(void) | |||
520 | const unsigned long long alignment = 16<<20; /* 16M */ | 531 | const unsigned long long alignment = 16<<20; /* 16M */ |
521 | 532 | ||
522 | /* | 533 | /* |
523 | * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX | 534 | * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX |
524 | */ | 535 | */ |
525 | crash_base = memblock_find_in_range(alignment, | 536 | crash_base = memblock_find_in_range(alignment, |
526 | DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); | 537 | CRASH_KERNEL_ADDR_MAX, crash_size, alignment); |
527 | 538 | ||
528 | if (crash_base == MEMBLOCK_ERROR) { | 539 | if (crash_base == MEMBLOCK_ERROR) { |
529 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 540 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
@@ -769,7 +780,6 @@ void __init setup_arch(char **cmdline_p) | |||
769 | 780 | ||
770 | x86_init.oem.arch_setup(); | 781 | x86_init.oem.arch_setup(); |
771 | 782 | ||
772 | resource_alloc_from_bottom = 0; | ||
773 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; | 783 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; |
774 | setup_memory_map(); | 784 | setup_memory_map(); |
775 | parse_setup_data(); | 785 | parse_setup_data(); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 083e99d1b7df..ee886fe10ef4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void) | |||
281 | */ | 281 | */ |
282 | smp_store_cpu_info(cpuid); | 282 | smp_store_cpu_info(cpuid); |
283 | 283 | ||
284 | /* | ||
285 | * This must be done before setting cpu_online_mask | ||
286 | * or calling notify_cpu_starting. | ||
287 | */ | ||
288 | set_cpu_sibling_map(raw_smp_processor_id()); | ||
289 | wmb(); | ||
290 | |||
284 | notify_cpu_starting(cpuid); | 291 | notify_cpu_starting(cpuid); |
285 | 292 | ||
286 | /* | 293 | /* |
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
316 | */ | 323 | */ |
317 | check_tsc_sync_target(); | 324 | check_tsc_sync_target(); |
318 | 325 | ||
319 | if (nmi_watchdog == NMI_IO_APIC) { | ||
320 | legacy_pic->mask(0); | ||
321 | enable_NMI_through_LVT0(); | ||
322 | legacy_pic->unmask(0); | ||
323 | } | ||
324 | |||
325 | /* This must be done before setting cpu_online_mask */ | ||
326 | set_cpu_sibling_map(raw_smp_processor_id()); | ||
327 | wmb(); | ||
328 | |||
329 | /* | 326 | /* |
330 | * We need to hold call_lock, so there is no inconsistency | 327 | * We need to hold call_lock, so there is no inconsistency |
331 | * between the time smp_call_function() determines number of | 328 | * between the time smp_call_function() determines number of |
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1061 | printk(KERN_INFO "SMP mode deactivated.\n"); | 1058 | printk(KERN_INFO "SMP mode deactivated.\n"); |
1062 | smpboot_clear_io_apic(); | 1059 | smpboot_clear_io_apic(); |
1063 | 1060 | ||
1064 | localise_nmi_watchdog(); | ||
1065 | |||
1066 | connect_bsp_APIC(); | 1061 | connect_bsp_APIC(); |
1067 | setup_local_APIC(); | 1062 | setup_local_APIC(); |
1068 | end_local_APIC_setup(); | 1063 | end_local_APIC_setup(); |
@@ -1166,6 +1161,20 @@ out: | |||
1166 | preempt_enable(); | 1161 | preempt_enable(); |
1167 | } | 1162 | } |
1168 | 1163 | ||
1164 | void arch_disable_nonboot_cpus_begin(void) | ||
1165 | { | ||
1166 | /* | ||
1167 | * Avoid the smp alternatives switch during the disable_nonboot_cpus(). | ||
1168 | * In the suspend path, we will be back in the SMP mode shortly anyways. | ||
1169 | */ | ||
1170 | skip_smp_alternatives = true; | ||
1171 | } | ||
1172 | |||
1173 | void arch_disable_nonboot_cpus_end(void) | ||
1174 | { | ||
1175 | skip_smp_alternatives = false; | ||
1176 | } | ||
1177 | |||
1169 | void arch_enable_nonboot_cpus_begin(void) | 1178 | void arch_enable_nonboot_cpus_begin(void) |
1170 | { | 1179 | { |
1171 | set_mtrr_aps_delayed_init(); | 1180 | set_mtrr_aps_delayed_init(); |
@@ -1196,7 +1205,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1196 | #ifdef CONFIG_X86_IO_APIC | 1205 | #ifdef CONFIG_X86_IO_APIC |
1197 | setup_ioapic_dest(); | 1206 | setup_ioapic_dest(); |
1198 | #endif | 1207 | #endif |
1199 | check_nmi_watchdog(); | ||
1200 | mtrr_aps_init(); | 1208 | mtrr_aps_init(); |
1201 | } | 1209 | } |
1202 | 1210 | ||
@@ -1341,8 +1349,6 @@ int native_cpu_disable(void) | |||
1341 | if (cpu == 0) | 1349 | if (cpu == 0) |
1342 | return -EBUSY; | 1350 | return -EBUSY; |
1343 | 1351 | ||
1344 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1345 | stop_apic_nmi_watchdog(NULL); | ||
1346 | clear_local_APIC(); | 1352 | clear_local_APIC(); |
1347 | 1353 | ||
1348 | cpu_disable_common(); | 1354 | cpu_disable_common(); |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b53c525368a7..938c8e10a19a 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = { | |||
73 | */ | 73 | */ |
74 | void save_stack_trace(struct stack_trace *trace) | 74 | void save_stack_trace(struct stack_trace *trace) |
75 | { | 75 | { |
76 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); | 76 | dump_trace(current, NULL, NULL, &save_stack_ops, trace); |
77 | if (trace->nr_entries < trace->max_entries) | 77 | if (trace->nr_entries < trace->max_entries) |
78 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 78 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
79 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(save_stack_trace); | 80 | EXPORT_SYMBOL_GPL(save_stack_trace); |
81 | 81 | ||
82 | void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) | 82 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) |
83 | { | 83 | { |
84 | dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); | 84 | dump_trace(current, regs, NULL, &save_stack_ops, trace); |
85 | if (trace->nr_entries < trace->max_entries) | 85 | if (trace->nr_entries < trace->max_entries) |
86 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 86 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
87 | } | 87 | } |
88 | 88 | ||
89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
90 | { | 90 | { |
91 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 91 | dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); |
92 | if (trace->nr_entries < trace->max_entries) | 92 | if (trace->nr_entries < trace->max_entries) |
93 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 93 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
94 | } | 94 | } |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fb5cc5e14cfa..25a28a245937 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -22,10 +22,6 @@ | |||
22 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
24 | 24 | ||
25 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
26 | int timer_ack; | ||
27 | #endif | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | 25 | #ifdef CONFIG_X86_64 |
30 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | 26 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; |
31 | #endif | 27 | #endif |
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
63 | /* Keep nmi watchdog up to date */ | 59 | /* Keep nmi watchdog up to date */ |
64 | inc_irq_stat(irq0_irqs); | 60 | inc_irq_stat(irq0_irqs); |
65 | 61 | ||
66 | /* Optimized out for !IO_APIC and x86_64 */ | ||
67 | if (timer_ack) { | ||
68 | /* | ||
69 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
70 | * manually to deassert NMI lines for the watchdog if run | ||
71 | * on an 82489DX-based system. | ||
72 | */ | ||
73 | raw_spin_lock(&i8259A_lock); | ||
74 | outb(0x0c, PIC_MASTER_OCW3); | ||
75 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
76 | inb(PIC_MASTER_POLL); | ||
77 | raw_spin_unlock(&i8259A_lock); | ||
78 | } | ||
79 | |||
80 | global_clock_event->event_handler(global_clock_event); | 62 | global_clock_event->event_handler(global_clock_event); |
81 | 63 | ||
82 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ | 64 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 3af2dff58b21..075d130efcf9 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -127,7 +127,7 @@ startup_64: | |||
127 | no_longmode: | 127 | no_longmode: |
128 | hlt | 128 | hlt |
129 | jmp no_longmode | 129 | jmp no_longmode |
130 | #include "verify_cpu_64.S" | 130 | #include "verify_cpu.S" |
131 | 131 | ||
132 | # Careful these need to be in the same 64K segment as the above; | 132 | # Careful these need to be in the same 64K segment as the above; |
133 | tidt: | 133 | tidt: |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb838ca42c96..c76aaca5694d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors); | |||
83 | 83 | ||
84 | static int ignore_nmis; | 84 | static int ignore_nmis; |
85 | 85 | ||
86 | int unknown_nmi_panic; | ||
87 | |||
86 | static inline void conditional_sti(struct pt_regs *regs) | 88 | static inline void conditional_sti(struct pt_regs *regs) |
87 | { | 89 | { |
88 | if (regs->flags & X86_EFLAGS_IF) | 90 | if (regs->flags & X86_EFLAGS_IF) |
@@ -300,6 +302,13 @@ gp_in_kernel: | |||
300 | die("general protection fault", regs, error_code); | 302 | die("general protection fault", regs, error_code); |
301 | } | 303 | } |
302 | 304 | ||
305 | static int __init setup_unknown_nmi_panic(char *str) | ||
306 | { | ||
307 | unknown_nmi_panic = 1; | ||
308 | return 1; | ||
309 | } | ||
310 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
311 | |||
303 | static notrace __kprobes void | 312 | static notrace __kprobes void |
304 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | 313 | mem_parity_error(unsigned char reason, struct pt_regs *regs) |
305 | { | 314 | { |
@@ -342,9 +351,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
342 | reason = (reason & 0xf) | 8; | 351 | reason = (reason & 0xf) | 8; |
343 | outb(reason, 0x61); | 352 | outb(reason, 0x61); |
344 | 353 | ||
345 | i = 2000; | 354 | i = 20000; |
346 | while (--i) | 355 | while (--i) { |
347 | udelay(1000); | 356 | touch_nmi_watchdog(); |
357 | udelay(100); | ||
358 | } | ||
348 | 359 | ||
349 | reason &= ~8; | 360 | reason &= ~8; |
350 | outb(reason, 0x61); | 361 | outb(reason, 0x61); |
@@ -371,7 +382,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
371 | reason, smp_processor_id()); | 382 | reason, smp_processor_id()); |
372 | 383 | ||
373 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | 384 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); |
374 | if (panic_on_unrecovered_nmi) | 385 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) |
375 | panic("NMI: Not continuing"); | 386 | panic("NMI: Not continuing"); |
376 | 387 | ||
377 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 388 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); |
@@ -397,20 +408,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
397 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 408 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
398 | == NOTIFY_STOP) | 409 | == NOTIFY_STOP) |
399 | return; | 410 | return; |
400 | |||
401 | #ifndef CONFIG_LOCKUP_DETECTOR | ||
402 | /* | ||
403 | * Ok, so this is none of the documented NMI sources, | ||
404 | * so it must be the NMI watchdog. | ||
405 | */ | ||
406 | if (nmi_watchdog_tick(regs, reason)) | ||
407 | return; | ||
408 | if (!do_nmi_callback(regs, cpu)) | ||
409 | #endif /* !CONFIG_LOCKUP_DETECTOR */ | ||
410 | unknown_nmi_error(reason, regs); | ||
411 | #else | ||
412 | unknown_nmi_error(reason, regs); | ||
413 | #endif | 411 | #endif |
412 | unknown_nmi_error(reason, regs); | ||
414 | 413 | ||
415 | return; | 414 | return; |
416 | } | 415 | } |
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
446 | 445 | ||
447 | void stop_nmi(void) | 446 | void stop_nmi(void) |
448 | { | 447 | { |
449 | acpi_nmi_disable(); | ||
450 | ignore_nmis++; | 448 | ignore_nmis++; |
451 | } | 449 | } |
452 | 450 | ||
453 | void restart_nmi(void) | 451 | void restart_nmi(void) |
454 | { | 452 | { |
455 | ignore_nmis--; | 453 | ignore_nmis--; |
456 | acpi_nmi_enable(); | ||
457 | } | 454 | } |
458 | 455 | ||
459 | /* May run on IST stack. */ | 456 | /* May run on IST stack. */ |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0c40d8b72416..356a0d455cf9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void) | |||
872 | 872 | ||
873 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 873 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
874 | return 0; | 874 | return 0; |
875 | |||
876 | if (tsc_clocksource_reliable) | ||
877 | return 0; | ||
875 | /* | 878 | /* |
876 | * Intel systems are normally all synchronized. | 879 | * Intel systems are normally all synchronized. |
877 | * Exceptions must mark TSC as unstable: | 880 | * Exceptions must mark TSC as unstable: |
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void) | |||
879 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | 882 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { |
880 | /* assume multi socket systems are not synchronized: */ | 883 | /* assume multi socket systems are not synchronized: */ |
881 | if (num_possible_cpus() > 1) | 884 | if (num_possible_cpus() > 1) |
882 | tsc_unstable = 1; | 885 | return 1; |
883 | } | 886 | } |
884 | 887 | ||
885 | return tsc_unstable; | 888 | return 0; |
889 | } | ||
890 | |||
891 | |||
892 | static void tsc_refine_calibration_work(struct work_struct *work); | ||
893 | static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); | ||
894 | /** | ||
895 | * tsc_refine_calibration_work - Further refine tsc freq calibration | ||
896 | * @work - ignored. | ||
897 | * | ||
898 | * This functions uses delayed work over a period of a | ||
899 | * second to further refine the TSC freq value. Since this is | ||
900 | * timer based, instead of loop based, we don't block the boot | ||
901 | * process while this longer calibration is done. | ||
902 | * | ||
903 | * If there are any calibration anomolies (too many SMIs, etc), | ||
904 | * or the refined calibration is off by 1% of the fast early | ||
905 | * calibration, we throw out the new calibration and use the | ||
906 | * early calibration. | ||
907 | */ | ||
908 | static void tsc_refine_calibration_work(struct work_struct *work) | ||
909 | { | ||
910 | static u64 tsc_start = -1, ref_start; | ||
911 | static int hpet; | ||
912 | u64 tsc_stop, ref_stop, delta; | ||
913 | unsigned long freq; | ||
914 | |||
915 | /* Don't bother refining TSC on unstable systems */ | ||
916 | if (check_tsc_unstable()) | ||
917 | goto out; | ||
918 | |||
919 | /* | ||
920 | * Since the work is started early in boot, we may be | ||
921 | * delayed the first time we expire. So set the workqueue | ||
922 | * again once we know timers are working. | ||
923 | */ | ||
924 | if (tsc_start == -1) { | ||
925 | /* | ||
926 | * Only set hpet once, to avoid mixing hardware | ||
927 | * if the hpet becomes enabled later. | ||
928 | */ | ||
929 | hpet = is_hpet_enabled(); | ||
930 | schedule_delayed_work(&tsc_irqwork, HZ); | ||
931 | tsc_start = tsc_read_refs(&ref_start, hpet); | ||
932 | return; | ||
933 | } | ||
934 | |||
935 | tsc_stop = tsc_read_refs(&ref_stop, hpet); | ||
936 | |||
937 | /* hpet or pmtimer available ? */ | ||
938 | if (!hpet && !ref_start && !ref_stop) | ||
939 | goto out; | ||
940 | |||
941 | /* Check, whether the sampling was disturbed by an SMI */ | ||
942 | if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) | ||
943 | goto out; | ||
944 | |||
945 | delta = tsc_stop - tsc_start; | ||
946 | delta *= 1000000LL; | ||
947 | if (hpet) | ||
948 | freq = calc_hpet_ref(delta, ref_start, ref_stop); | ||
949 | else | ||
950 | freq = calc_pmtimer_ref(delta, ref_start, ref_stop); | ||
951 | |||
952 | /* Make sure we're within 1% */ | ||
953 | if (abs(tsc_khz - freq) > tsc_khz/100) | ||
954 | goto out; | ||
955 | |||
956 | tsc_khz = freq; | ||
957 | printk(KERN_INFO "Refined TSC clocksource calibration: " | ||
958 | "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, | ||
959 | (unsigned long)tsc_khz % 1000); | ||
960 | |||
961 | out: | ||
962 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | ||
886 | } | 963 | } |
887 | 964 | ||
888 | static void __init init_tsc_clocksource(void) | 965 | |
966 | static int __init init_tsc_clocksource(void) | ||
889 | { | 967 | { |
968 | if (!cpu_has_tsc || tsc_disabled > 0) | ||
969 | return 0; | ||
970 | |||
890 | if (tsc_clocksource_reliable) | 971 | if (tsc_clocksource_reliable) |
891 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 972 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
892 | /* lower the rating if we already know its unstable: */ | 973 | /* lower the rating if we already know its unstable: */ |
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void) | |||
894 | clocksource_tsc.rating = 0; | 975 | clocksource_tsc.rating = 0; |
895 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | 976 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; |
896 | } | 977 | } |
897 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 978 | schedule_delayed_work(&tsc_irqwork, 0); |
979 | return 0; | ||
898 | } | 980 | } |
981 | /* | ||
982 | * We use device_initcall here, to ensure we run after the hpet | ||
983 | * is fully initialized, which may occur at fs_initcall time. | ||
984 | */ | ||
985 | device_initcall(init_tsc_clocksource); | ||
899 | 986 | ||
900 | void __init tsc_init(void) | 987 | void __init tsc_init(void) |
901 | { | 988 | { |
@@ -949,6 +1036,5 @@ void __init tsc_init(void) | |||
949 | mark_tsc_unstable("TSCs unsynchronized"); | 1036 | mark_tsc_unstable("TSCs unsynchronized"); |
950 | 1037 | ||
951 | check_system_tsc_reliable(); | 1038 | check_system_tsc_reliable(); |
952 | init_tsc_clocksource(); | ||
953 | } | 1039 | } |
954 | 1040 | ||
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu.S index 56a8c2a867d9..0edefc19a113 100644 --- a/arch/x86/kernel/verify_cpu_64.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -7,6 +7,7 @@ | |||
7 | * Copyright (c) 2007 Andi Kleen (ak@suse.de) | 7 | * Copyright (c) 2007 Andi Kleen (ak@suse.de) |
8 | * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) | 8 | * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) |
9 | * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) | 9 | * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) |
10 | * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) | ||
10 | * | 11 | * |
11 | * This source code is licensed under the GNU General Public License, | 12 | * This source code is licensed under the GNU General Public License, |
12 | * Version 2. See the file COPYING for more details. | 13 | * Version 2. See the file COPYING for more details. |
@@ -14,18 +15,17 @@ | |||
14 | * This is a common code for verification whether CPU supports | 15 | * This is a common code for verification whether CPU supports |
15 | * long mode and SSE or not. It is not called directly instead this | 16 | * long mode and SSE or not. It is not called directly instead this |
16 | * file is included at various places and compiled in that context. | 17 | * file is included at various places and compiled in that context. |
17 | * Following are the current usage. | 18 | * This file is expected to run in 32bit code. Currently: |
18 | * | 19 | * |
19 | * This file is included by both 16bit and 32bit code. | 20 | * arch/x86/boot/compressed/head_64.S: Boot cpu verification |
21 | * arch/x86/kernel/trampoline_64.S: secondary processor verfication | ||
22 | * arch/x86/kernel/head_32.S: processor startup | ||
20 | * | 23 | * |
21 | * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) | 24 | * verify_cpu, returns the status of longmode and SSE in register %eax. |
22 | * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) | ||
23 | * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) | ||
24 | * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) | ||
25 | * | ||
26 | * verify_cpu, returns the status of cpu check in register %eax. | ||
27 | * 0: Success 1: Failure | 25 | * 0: Success 1: Failure |
28 | * | 26 | * |
27 | * On Intel, the XD_DISABLE flag will be cleared as a side-effect. | ||
28 | * | ||
29 | * The caller needs to check for the error code and take the action | 29 | * The caller needs to check for the error code and take the action |
30 | * appropriately. Either display a message or halt. | 30 | * appropriately. Either display a message or halt. |
31 | */ | 31 | */ |
@@ -62,8 +62,41 @@ verify_cpu: | |||
62 | cmpl $0x444d4163,%ecx | 62 | cmpl $0x444d4163,%ecx |
63 | jnz verify_cpu_noamd | 63 | jnz verify_cpu_noamd |
64 | mov $1,%di # cpu is from AMD | 64 | mov $1,%di # cpu is from AMD |
65 | jmp verify_cpu_check | ||
65 | 66 | ||
66 | verify_cpu_noamd: | 67 | verify_cpu_noamd: |
68 | cmpl $0x756e6547,%ebx # GenuineIntel? | ||
69 | jnz verify_cpu_check | ||
70 | cmpl $0x49656e69,%edx | ||
71 | jnz verify_cpu_check | ||
72 | cmpl $0x6c65746e,%ecx | ||
73 | jnz verify_cpu_check | ||
74 | |||
75 | # only call IA32_MISC_ENABLE when: | ||
76 | # family > 6 || (family == 6 && model >= 0xd) | ||
77 | movl $0x1, %eax # check CPU family and model | ||
78 | cpuid | ||
79 | movl %eax, %ecx | ||
80 | |||
81 | andl $0x0ff00f00, %eax # mask family and extended family | ||
82 | shrl $8, %eax | ||
83 | cmpl $6, %eax | ||
84 | ja verify_cpu_clear_xd # family > 6, ok | ||
85 | jb verify_cpu_check # family < 6, skip | ||
86 | |||
87 | andl $0x000f00f0, %ecx # mask model and extended model | ||
88 | shrl $4, %ecx | ||
89 | cmpl $0xd, %ecx | ||
90 | jb verify_cpu_check # family == 6, model < 0xd, skip | ||
91 | |||
92 | verify_cpu_clear_xd: | ||
93 | movl $MSR_IA32_MISC_ENABLE, %ecx | ||
94 | rdmsr | ||
95 | btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE | ||
96 | jnc verify_cpu_check # only write MSR if bit was changed | ||
97 | wrmsr | ||
98 | |||
99 | verify_cpu_check: | ||
67 | movl $0x1,%eax # Does the cpu have what it takes | 100 | movl $0x1,%eax # Does the cpu have what it takes |
68 | cpuid | 101 | cpuid |
69 | andl $REQUIRED_MASK0,%edx | 102 | andl $REQUIRED_MASK0,%edx |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e03530aebfd0..bf4700755184 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -69,7 +69,7 @@ jiffies_64 = jiffies; | |||
69 | 69 | ||
70 | PHDRS { | 70 | PHDRS { |
71 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
72 | data PT_LOAD FLAGS(7); /* RWE */ | 72 | data PT_LOAD FLAGS(6); /* RW_ */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | user PT_LOAD FLAGS(5); /* R_E */ | 74 | user PT_LOAD FLAGS(5); /* R_E */ |
75 | #ifdef CONFIG_SMP | 75 | #ifdef CONFIG_SMP |
@@ -116,6 +116,10 @@ SECTIONS | |||
116 | 116 | ||
117 | EXCEPTION_TABLE(16) :text = 0x9090 | 117 | EXCEPTION_TABLE(16) :text = 0x9090 |
118 | 118 | ||
119 | #if defined(CONFIG_DEBUG_RODATA) | ||
120 | /* .text should occupy whole number of pages */ | ||
121 | . = ALIGN(PAGE_SIZE); | ||
122 | #endif | ||
119 | X64_ALIGN_DEBUG_RODATA_BEGIN | 123 | X64_ALIGN_DEBUG_RODATA_BEGIN |
120 | RO_DATA(PAGE_SIZE) | 124 | RO_DATA(PAGE_SIZE) |
121 | X64_ALIGN_DEBUG_RODATA_END | 125 | X64_ALIGN_DEBUG_RODATA_END |
@@ -335,7 +339,7 @@ SECTIONS | |||
335 | __bss_start = .; | 339 | __bss_start = .; |
336 | *(.bss..page_aligned) | 340 | *(.bss..page_aligned) |
337 | *(.bss) | 341 | *(.bss) |
338 | . = ALIGN(4); | 342 | . = ALIGN(PAGE_SIZE); |
339 | __bss_stop = .; | 343 | __bss_stop = .; |
340 | } | 344 | } |
341 | 345 | ||
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 9c253bd65e24..547128546cc3 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -394,7 +394,8 @@ static void __init setup_xstate_init(void) | |||
394 | * Setup init_xstate_buf to represent the init state of | 394 | * Setup init_xstate_buf to represent the init state of |
395 | * all the features managed by the xsave | 395 | * all the features managed by the xsave |
396 | */ | 396 | */ |
397 | init_xstate_buf = alloc_bootmem(xstate_size); | 397 | init_xstate_buf = alloc_bootmem_align(xstate_size, |
398 | __alignof__(struct xsave_struct)); | ||
398 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 399 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; |
399 | 400 | ||
400 | clts(); | 401 | clts(); |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index f628234fbeca..3cece05e4ac4 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -575,6 +575,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
575 | s->pics[1].elcr_mask = 0xde; | 575 | s->pics[1].elcr_mask = 0xde; |
576 | s->pics[0].pics_state = s; | 576 | s->pics[0].pics_state = s; |
577 | s->pics[1].pics_state = s; | 577 | s->pics[1].pics_state = s; |
578 | s->pics[0].isr_ack = 0xff; | ||
579 | s->pics[1].isr_ack = 0xff; | ||
578 | 580 | ||
579 | /* | 581 | /* |
580 | * Initialize PIO device | 582 | * Initialize PIO device |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fb8b376bf28c..fbb04aee8301 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2394,7 +2394,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
2394 | ASSERT(!VALID_PAGE(root)); | 2394 | ASSERT(!VALID_PAGE(root)); |
2395 | spin_lock(&vcpu->kvm->mmu_lock); | 2395 | spin_lock(&vcpu->kvm->mmu_lock); |
2396 | kvm_mmu_free_some_pages(vcpu); | 2396 | kvm_mmu_free_some_pages(vcpu); |
2397 | sp = kvm_mmu_get_page(vcpu, i << 30, i << 30, | 2397 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
2398 | i << 30, | ||
2398 | PT32_ROOT_LEVEL, 1, ACC_ALL, | 2399 | PT32_ROOT_LEVEL, 1, ACC_ALL, |
2399 | NULL); | 2400 | NULL); |
2400 | root = __pa(sp->spt); | 2401 | root = __pa(sp->spt); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 82e144a4e514..b81a9b7c2ca4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3395,6 +3395,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3395 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3395 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
3396 | 3396 | ||
3397 | load_host_msrs(vcpu); | 3397 | load_host_msrs(vcpu); |
3398 | kvm_load_ldt(ldt_selector); | ||
3398 | loadsegment(fs, fs_selector); | 3399 | loadsegment(fs, fs_selector); |
3399 | #ifdef CONFIG_X86_64 | 3400 | #ifdef CONFIG_X86_64 |
3400 | load_gs_index(gs_selector); | 3401 | load_gs_index(gs_selector); |
@@ -3402,7 +3403,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3402 | #else | 3403 | #else |
3403 | loadsegment(gs, gs_selector); | 3404 | loadsegment(gs, gs_selector); |
3404 | #endif | 3405 | #endif |
3405 | kvm_load_ldt(ldt_selector); | ||
3406 | 3406 | ||
3407 | reload_tss(vcpu); | 3407 | reload_tss(vcpu); |
3408 | 3408 | ||
@@ -3494,6 +3494,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
3494 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 3494 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
3495 | { | 3495 | { |
3496 | switch (func) { | 3496 | switch (func) { |
3497 | case 0x00000001: | ||
3498 | /* Mask out xsave bit as long as it is not supported by SVM */ | ||
3499 | entry->ecx &= ~(bit(X86_FEATURE_XSAVE)); | ||
3500 | break; | ||
3497 | case 0x80000001: | 3501 | case 0x80000001: |
3498 | if (nested) | 3502 | if (nested) |
3499 | entry->ecx |= (1 << 2); /* Set SVM bit */ | 3503 | entry->ecx |= (1 << 2); /* Set SVM bit */ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8da0e45ff7c9..81fcbe9515c5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -821,10 +821,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
821 | #endif | 821 | #endif |
822 | 822 | ||
823 | #ifdef CONFIG_X86_64 | 823 | #ifdef CONFIG_X86_64 |
824 | if (is_long_mode(&vmx->vcpu)) { | 824 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
825 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 825 | if (is_long_mode(&vmx->vcpu)) |
826 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 826 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
827 | } | ||
828 | #endif | 827 | #endif |
829 | for (i = 0; i < vmx->save_nmsrs; ++i) | 828 | for (i = 0; i < vmx->save_nmsrs; ++i) |
830 | kvm_set_shared_msr(vmx->guest_msrs[i].index, | 829 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
@@ -839,23 +838,23 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
839 | 838 | ||
840 | ++vmx->vcpu.stat.host_state_reload; | 839 | ++vmx->vcpu.stat.host_state_reload; |
841 | vmx->host_state.loaded = 0; | 840 | vmx->host_state.loaded = 0; |
842 | if (vmx->host_state.fs_reload_needed) | 841 | #ifdef CONFIG_X86_64 |
843 | loadsegment(fs, vmx->host_state.fs_sel); | 842 | if (is_long_mode(&vmx->vcpu)) |
843 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
844 | #endif | ||
844 | if (vmx->host_state.gs_ldt_reload_needed) { | 845 | if (vmx->host_state.gs_ldt_reload_needed) { |
845 | kvm_load_ldt(vmx->host_state.ldt_sel); | 846 | kvm_load_ldt(vmx->host_state.ldt_sel); |
846 | #ifdef CONFIG_X86_64 | 847 | #ifdef CONFIG_X86_64 |
847 | load_gs_index(vmx->host_state.gs_sel); | 848 | load_gs_index(vmx->host_state.gs_sel); |
848 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
849 | #else | 849 | #else |
850 | loadsegment(gs, vmx->host_state.gs_sel); | 850 | loadsegment(gs, vmx->host_state.gs_sel); |
851 | #endif | 851 | #endif |
852 | } | 852 | } |
853 | if (vmx->host_state.fs_reload_needed) | ||
854 | loadsegment(fs, vmx->host_state.fs_sel); | ||
853 | reload_tss(); | 855 | reload_tss(); |
854 | #ifdef CONFIG_X86_64 | 856 | #ifdef CONFIG_X86_64 |
855 | if (is_long_mode(&vmx->vcpu)) { | 857 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
856 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
857 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
858 | } | ||
859 | #endif | 858 | #endif |
860 | if (current_thread_info()->status & TS_USEDFPU) | 859 | if (current_thread_info()->status & TS_USEDFPU) |
861 | clts(); | 860 | clts(); |
@@ -4228,11 +4227,6 @@ static int vmx_get_lpage_level(void) | |||
4228 | return PT_PDPE_LEVEL; | 4227 | return PT_PDPE_LEVEL; |
4229 | } | 4228 | } |
4230 | 4229 | ||
4231 | static inline u32 bit(int bitno) | ||
4232 | { | ||
4233 | return 1 << (bitno & 31); | ||
4234 | } | ||
4235 | |||
4236 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | 4230 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) |
4237 | { | 4231 | { |
4238 | struct kvm_cpuid_entry2 *best; | 4232 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cdac9e592aa5..b989e1f1e5d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -155,11 +155,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
155 | 155 | ||
156 | u64 __read_mostly host_xcr0; | 156 | u64 __read_mostly host_xcr0; |
157 | 157 | ||
158 | static inline u32 bit(int bitno) | ||
159 | { | ||
160 | return 1 << (bitno & 31); | ||
161 | } | ||
162 | |||
163 | static void kvm_on_user_return(struct user_return_notifier *urn) | 158 | static void kvm_on_user_return(struct user_return_notifier *urn) |
164 | { | 159 | { |
165 | unsigned slot; | 160 | unsigned slot; |
@@ -4569,9 +4564,11 @@ static void kvm_timer_init(void) | |||
4569 | #ifdef CONFIG_CPU_FREQ | 4564 | #ifdef CONFIG_CPU_FREQ |
4570 | struct cpufreq_policy policy; | 4565 | struct cpufreq_policy policy; |
4571 | memset(&policy, 0, sizeof(policy)); | 4566 | memset(&policy, 0, sizeof(policy)); |
4572 | cpufreq_get_policy(&policy, get_cpu()); | 4567 | cpu = get_cpu(); |
4568 | cpufreq_get_policy(&policy, cpu); | ||
4573 | if (policy.cpuinfo.max_freq) | 4569 | if (policy.cpuinfo.max_freq) |
4574 | max_tsc_khz = policy.cpuinfo.max_freq; | 4570 | max_tsc_khz = policy.cpuinfo.max_freq; |
4571 | put_cpu(); | ||
4575 | #endif | 4572 | #endif |
4576 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | 4573 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, |
4577 | CPUFREQ_TRANSITION_NOTIFIER); | 4574 | CPUFREQ_TRANSITION_NOTIFIER); |
@@ -5522,6 +5519,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5522 | 5519 | ||
5523 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; | 5520 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
5524 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5521 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
5522 | if (sregs->cr4 & X86_CR4_OSXSAVE) | ||
5523 | update_cpuid(vcpu); | ||
5525 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5524 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
5526 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); | 5525 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); |
5527 | mmu_reset_needed = 1; | 5526 | mmu_reset_needed = 1; |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2cea414489f3..c600da830ce0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
70 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 70 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
71 | } | 71 | } |
72 | 72 | ||
73 | static inline u32 bit(int bitno) | ||
74 | { | ||
75 | return 1 << (bitno & 31); | ||
76 | } | ||
77 | |||
73 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 78 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
74 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 79 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
75 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); | 80 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 73b1e1a1f489..4996cf5f73a0 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3) | |||
531 | { | 531 | { |
532 | lguest_data.pgdir = cr3; | 532 | lguest_data.pgdir = cr3; |
533 | lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); | 533 | lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); |
534 | cr3_changed = true; | 534 | |
535 | /* These two page tables are simple, linear, and used during boot */ | ||
536 | if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) | ||
537 | cr3_changed = true; | ||
535 | } | 538 | } |
536 | 539 | ||
537 | static unsigned long lguest_read_cr3(void) | 540 | static unsigned long lguest_read_cr3(void) |
@@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
703 | * to forget all of them. Fortunately, this is very rare. | 706 | * to forget all of them. Fortunately, this is very rare. |
704 | * | 707 | * |
705 | * ... except in early boot when the kernel sets up the initial pagetables, | 708 | * ... except in early boot when the kernel sets up the initial pagetables, |
706 | * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell | 709 | * which makes booting astonishingly slow: 48 seconds! So we don't even tell |
707 | * the Host anything changed until we've done the first page table switch, | 710 | * the Host anything changed until we've done the first real page table switch, |
708 | * which brings boot back to 0.25 seconds. | 711 | * which brings boot back to 4.3 seconds. |
709 | */ | 712 | */ |
710 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) | 713 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) |
711 | { | 714 | { |
@@ -1002,7 +1005,7 @@ static void lguest_time_init(void) | |||
1002 | clockevents_register_device(&lguest_clockevent); | 1005 | clockevents_register_device(&lguest_clockevent); |
1003 | 1006 | ||
1004 | /* Finally, we unblock the timer interrupt. */ | 1007 | /* Finally, we unblock the timer interrupt. */ |
1005 | enable_lguest_irq(0); | 1008 | clear_bit(0, lguest_data.blocked_interrupts); |
1006 | } | 1009 | } |
1007 | 1010 | ||
1008 | /* | 1011 | /* |
@@ -1349,9 +1352,6 @@ __init void lguest_init(void) | |||
1349 | */ | 1352 | */ |
1350 | switch_to_new_gdt(0); | 1353 | switch_to_new_gdt(0); |
1351 | 1354 | ||
1352 | /* We actually boot with all memory mapped, but let's say 128MB. */ | ||
1353 | max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; | ||
1354 | |||
1355 | /* | 1355 | /* |
1356 | * The Host<->Guest Switcher lives at the top of our address space, and | 1356 | * The Host<->Guest Switcher lives at the top of our address space, and |
1357 | * the Host told us how big it is when we made LGUEST_INIT hypercall: | 1357 | * the Host told us how big it is when we made LGUEST_INIT hypercall: |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c0e28a13de7d..947f42abe820 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
364 | /* | 364 | /* |
365 | * We just marked the kernel text read only above, now that | 365 | * We just marked the kernel text read only above, now that |
366 | * we are going to free part of that, we need to make that | 366 | * we are going to free part of that, we need to make that |
367 | * writeable first. | 367 | * writeable and non-executable first. |
368 | */ | 368 | */ |
369 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); | ||
369 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 370 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
370 | 371 | ||
371 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | 372 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0e969f9f401b..f89b5bb4e93f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
226 | 226 | ||
227 | static inline int is_kernel_text(unsigned long addr) | 227 | static inline int is_kernel_text(unsigned long addr) |
228 | { | 228 | { |
229 | if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) | 229 | if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) |
230 | return 1; | 230 | return 1; |
231 | return 0; | 231 | return 0; |
232 | } | 232 | } |
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void) | |||
912 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 912 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
913 | } | 913 | } |
914 | 914 | ||
915 | static void mark_nxdata_nx(void) | ||
916 | { | ||
917 | /* | ||
918 | * When this called, init has already been executed and released, | ||
919 | * so everything past _etext sould be NX. | ||
920 | */ | ||
921 | unsigned long start = PFN_ALIGN(_etext); | ||
922 | /* | ||
923 | * This comes from is_kernel_text upper limit. Also HPAGE where used: | ||
924 | */ | ||
925 | unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; | ||
926 | |||
927 | if (__supported_pte_mask & _PAGE_NX) | ||
928 | printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); | ||
929 | set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); | ||
930 | } | ||
931 | |||
915 | void mark_rodata_ro(void) | 932 | void mark_rodata_ro(void) |
916 | { | 933 | { |
917 | unsigned long start = PFN_ALIGN(_text); | 934 | unsigned long start = PFN_ALIGN(_text); |
@@ -946,6 +963,7 @@ void mark_rodata_ro(void) | |||
946 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 963 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
947 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 964 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
948 | #endif | 965 | #endif |
966 | mark_nxdata_nx(); | ||
949 | } | 967 | } |
950 | #endif | 968 | #endif |
951 | 969 | ||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436f..704a37cedddb 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, | |||
185 | e->trace.entries = e->trace_entries; | 185 | e->trace.entries = e->trace_entries; |
186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | 186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); |
187 | e->trace.skip = 0; | 187 | e->trace.skip = 0; |
188 | save_stack_trace_bp(&e->trace, regs->bp); | 188 | save_stack_trace_regs(&e->trace, regs); |
189 | 189 | ||
190 | /* Round address down to nearest 16 bytes */ | 190 | /* Round address down to nearest 16 bytes */ |
191 | shadow_copy = kmemcheck_shadow_lookup(address | 191 | shadow_copy = kmemcheck_shadow_lookup(address |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 532e7933d606..8b830ca14ac4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/pfn.h> | 13 | #include <linux/pfn.h> |
14 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/pci.h> | ||
16 | 17 | ||
17 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
255 | unsigned long pfn) | 256 | unsigned long pfn) |
256 | { | 257 | { |
257 | pgprot_t forbidden = __pgprot(0); | 258 | pgprot_t forbidden = __pgprot(0); |
259 | pgprot_t required = __pgprot(0); | ||
258 | 260 | ||
259 | /* | 261 | /* |
260 | * The BIOS area between 640k and 1Mb needs to be executable for | 262 | * The BIOS area between 640k and 1Mb needs to be executable for |
261 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. | 263 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. |
262 | */ | 264 | */ |
263 | if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | 265 | #ifdef CONFIG_PCI_BIOS |
266 | if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | ||
264 | pgprot_val(forbidden) |= _PAGE_NX; | 267 | pgprot_val(forbidden) |= _PAGE_NX; |
268 | #endif | ||
265 | 269 | ||
266 | /* | 270 | /* |
267 | * The kernel text needs to be executable for obvious reasons | 271 | * The kernel text needs to be executable for obvious reasons |
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
278 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, | 282 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, |
279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 283 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
280 | pgprot_val(forbidden) |= _PAGE_RW; | 284 | pgprot_val(forbidden) |= _PAGE_RW; |
285 | /* | ||
286 | * .data and .bss should always be writable. | ||
287 | */ | ||
288 | if (within(address, (unsigned long)_sdata, (unsigned long)_edata) || | ||
289 | within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop)) | ||
290 | pgprot_val(required) |= _PAGE_RW; | ||
281 | 291 | ||
282 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | 292 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) |
283 | /* | 293 | /* |
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
317 | #endif | 327 | #endif |
318 | 328 | ||
319 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 329 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
330 | prot = __pgprot(pgprot_val(prot) | pgprot_val(required)); | ||
320 | 331 | ||
321 | return prot; | 332 | return prot; |
322 | } | 333 | } |
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
393 | { | 404 | { |
394 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; | 405 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; |
395 | pte_t new_pte, old_pte, *tmp; | 406 | pte_t new_pte, old_pte, *tmp; |
396 | pgprot_t old_prot, new_prot; | 407 | pgprot_t old_prot, new_prot, req_prot; |
397 | int i, do_split = 1; | 408 | int i, do_split = 1; |
398 | unsigned int level; | 409 | unsigned int level; |
399 | 410 | ||
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
438 | * We are safe now. Check whether the new pgprot is the same: | 449 | * We are safe now. Check whether the new pgprot is the same: |
439 | */ | 450 | */ |
440 | old_pte = *kpte; | 451 | old_pte = *kpte; |
441 | old_prot = new_prot = pte_pgprot(old_pte); | 452 | old_prot = new_prot = req_prot = pte_pgprot(old_pte); |
442 | 453 | ||
443 | pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); | 454 | pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); |
444 | pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); | 455 | pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); |
445 | 456 | ||
446 | /* | 457 | /* |
447 | * old_pte points to the large page base address. So we need | 458 | * old_pte points to the large page base address. So we need |
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
450 | pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); | 461 | pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); |
451 | cpa->pfn = pfn; | 462 | cpa->pfn = pfn; |
452 | 463 | ||
453 | new_prot = static_protections(new_prot, address, pfn); | 464 | new_prot = static_protections(req_prot, address, pfn); |
454 | 465 | ||
455 | /* | 466 | /* |
456 | * We need to check the full range, whether | 467 | * We need to check the full range, whether |
457 | * static_protection() requires a different pgprot for one of | 468 | * static_protection() requires a different pgprot for one of |
458 | * the pages in the range we try to preserve: | 469 | * the pages in the range we try to preserve: |
459 | */ | 470 | */ |
460 | addr = address + PAGE_SIZE; | 471 | addr = address & pmask; |
461 | pfn++; | 472 | pfn = pte_pfn(old_pte); |
462 | for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { | 473 | for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { |
463 | pgprot_t chk_prot = static_protections(new_prot, addr, pfn); | 474 | pgprot_t chk_prot = static_protections(req_prot, addr, pfn); |
464 | 475 | ||
465 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) | 476 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) |
466 | goto out_unlock; | 477 | goto out_unlock; |
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
483 | * that we limited the number of possible pages already to | 494 | * that we limited the number of possible pages already to |
484 | * the number of pages in the large page. | 495 | * the number of pages in the large page. |
485 | */ | 496 | */ |
486 | if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { | 497 | if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { |
487 | /* | 498 | /* |
488 | * The address is aligned and the number of pages | 499 | * The address is aligned and the number of pages |
489 | * covers the full page. | 500 | * covers the full page. |
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index a3250aa34086..410531d3c292 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -41,7 +41,7 @@ void __init x86_report_nx(void) | |||
41 | { | 41 | { |
42 | if (!cpu_has_nx) { | 42 | if (!cpu_has_nx) { |
43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
44 | "missing in CPU or disabled in BIOS!\n"); | 44 | "missing in CPU!\n"); |
45 | } else { | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
47 | if (disable_nx) { | 47 | if (disable_nx) { |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index a17dffd136c1..f16434568a51 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) | |||
92 | /* mark this node as "seen" in node bitmap */ | 92 | /* mark this node as "seen" in node bitmap */ |
93 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); | 93 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); |
94 | 94 | ||
95 | /* don't need to check apic_id here, because it is always 8 bits */ | ||
95 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; | 96 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; |
96 | 97 | ||
97 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", | 98 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a35cb9d8b060..171a0aacb99a 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
134 | } | 134 | } |
135 | 135 | ||
136 | apic_id = pa->apic_id; | 136 | apic_id = pa->apic_id; |
137 | if (apic_id >= MAX_LOCAL_APIC) { | ||
138 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | ||
139 | return; | ||
140 | } | ||
137 | apicid_to_node[apic_id] = node; | 141 | apicid_to_node[apic_id] = node; |
138 | node_set(node, cpu_nodes_parsed); | 142 | node_set(node, cpu_nodes_parsed); |
139 | acpi_numa = 1; | 143 | acpi_numa = 1; |
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
168 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; | 172 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; |
169 | else | 173 | else |
170 | apic_id = pa->apic_id; | 174 | apic_id = pa->apic_id; |
175 | |||
176 | if (apic_id >= MAX_LOCAL_APIC) { | ||
177 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | ||
178 | return; | ||
179 | } | ||
180 | |||
171 | apicid_to_node[apic_id] = node; | 181 | apicid_to_node[apic_id] = node; |
172 | node_set(node, cpu_nodes_parsed); | 182 | node_set(node, cpu_nodes_parsed); |
173 | acpi_numa = 1; | 183 | acpi_numa = 1; |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 12cdbb17ad18..6acc724d5d8f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
223 | 223 | ||
224 | static void __cpuinit calculate_tlb_offset(void) | 224 | static void __cpuinit calculate_tlb_offset(void) |
225 | { | 225 | { |
226 | int cpu, node, nr_node_vecs; | 226 | int cpu, node, nr_node_vecs, idx = 0; |
227 | /* | 227 | /* |
228 | * we are changing tlb_vector_offset for each CPU in runtime, but this | 228 | * we are changing tlb_vector_offset for each CPU in runtime, but this |
229 | * will not cause inconsistency, as the write is atomic under X86. we | 229 | * will not cause inconsistency, as the write is atomic under X86. we |
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void) | |||
239 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | 239 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; |
240 | 240 | ||
241 | for_each_online_node(node) { | 241 | for_each_online_node(node) { |
242 | int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * | 242 | int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * |
243 | nr_node_vecs; | 243 | nr_node_vecs; |
244 | int cpu_offset = 0; | 244 | int cpu_offset = 0; |
245 | for_each_cpu(cpu, cpumask_of_node(node)) { | 245 | for_each_cpu(cpu, cpumask_of_node(node)) { |
@@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void) | |||
248 | cpu_offset++; | 248 | cpu_offset++; |
249 | cpu_offset = cpu_offset % nr_node_vecs; | 249 | cpu_offset = cpu_offset % nr_node_vecs; |
250 | } | 250 | } |
251 | idx++; | ||
251 | } | 252 | } |
252 | } | 253 | } |
253 | 254 | ||
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..72cbec14d783 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
126 | if (!user_mode_vm(regs)) { | 126 | if (!user_mode_vm(regs)) { |
127 | unsigned long stack = kernel_stack_pointer(regs); | 127 | unsigned long stack = kernel_stack_pointer(regs); |
128 | if (depth) | 128 | if (depth) |
129 | dump_trace(NULL, regs, (unsigned long *)stack, 0, | 129 | dump_trace(NULL, regs, (unsigned long *)stack, |
130 | &backtrace_ops, &depth); | 130 | &backtrace_ops, &depth); |
131 | return; | 131 | return; |
132 | } | 132 | } |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 4e8baad36d37..358c8b9c96a7 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
732 | case 0x14: | 732 | case 0x14: |
733 | cpu_type = "x86-64/family14h"; | 733 | cpu_type = "x86-64/family14h"; |
734 | break; | 734 | break; |
735 | case 0x15: | ||
736 | cpu_type = "x86-64/family15h"; | ||
737 | break; | ||
735 | default: | 738 | default: |
736 | return -ENODEV; | 739 | return -ENODEV; |
737 | } | 740 | } |
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index e3ecb71b5790..0636dd93cef8 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c | |||
@@ -58,9 +58,6 @@ static void timer_stop(void) | |||
58 | 58 | ||
59 | int __init op_nmi_timer_init(struct oprofile_operations *ops) | 59 | int __init op_nmi_timer_init(struct oprofile_operations *ops) |
60 | { | 60 | { |
61 | if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) | ||
62 | return -ENODEV; | ||
63 | |||
64 | ops->start = timer_start; | 61 | ops->start = timer_start; |
65 | ops->stop = timer_stop; | 62 | ops->stop = timer_stop; |
66 | ops->cpu_type = "timer"; | 63 | ops->cpu_type = "timer"; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index a011bcc0f943..c3b8e24f2b16 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -29,11 +29,12 @@ | |||
29 | #include "op_x86_model.h" | 29 | #include "op_x86_model.h" |
30 | #include "op_counter.h" | 30 | #include "op_counter.h" |
31 | 31 | ||
32 | #define NUM_COUNTERS 4 | 32 | #define NUM_COUNTERS 4 |
33 | #define NUM_COUNTERS_F15H 6 | ||
33 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 34 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
34 | #define NUM_VIRT_COUNTERS 32 | 35 | #define NUM_VIRT_COUNTERS 32 |
35 | #else | 36 | #else |
36 | #define NUM_VIRT_COUNTERS NUM_COUNTERS | 37 | #define NUM_VIRT_COUNTERS 0 |
37 | #endif | 38 | #endif |
38 | 39 | ||
39 | #define OP_EVENT_MASK 0x0FFF | 40 | #define OP_EVENT_MASK 0x0FFF |
@@ -41,7 +42,8 @@ | |||
41 | 42 | ||
42 | #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) | 43 | #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) |
43 | 44 | ||
44 | static unsigned long reset_value[NUM_VIRT_COUNTERS]; | 45 | static int num_counters; |
46 | static unsigned long reset_value[OP_MAX_COUNTER]; | ||
45 | 47 | ||
46 | #define IBS_FETCH_SIZE 6 | 48 | #define IBS_FETCH_SIZE 6 |
47 | #define IBS_OP_SIZE 12 | 49 | #define IBS_OP_SIZE 12 |
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | |||
387 | int i; | 389 | int i; |
388 | 390 | ||
389 | /* enable active counters */ | 391 | /* enable active counters */ |
390 | for (i = 0; i < NUM_COUNTERS; ++i) { | 392 | for (i = 0; i < num_counters; ++i) { |
391 | int virt = op_x86_phys_to_virt(i); | 393 | int virt = op_x86_phys_to_virt(i); |
392 | if (!reset_value[virt]) | 394 | if (!reset_value[virt]) |
393 | continue; | 395 | continue; |
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
406 | { | 408 | { |
407 | int i; | 409 | int i; |
408 | 410 | ||
409 | for (i = 0; i < NUM_COUNTERS; ++i) { | 411 | for (i = 0; i < num_counters; ++i) { |
410 | if (!msrs->counters[i].addr) | 412 | if (!msrs->counters[i].addr) |
411 | continue; | 413 | continue; |
412 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | 414 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); |
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
418 | { | 420 | { |
419 | int i; | 421 | int i; |
420 | 422 | ||
421 | for (i = 0; i < NUM_COUNTERS; i++) { | 423 | for (i = 0; i < num_counters; i++) { |
422 | if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | 424 | if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) |
423 | goto fail; | 425 | goto fail; |
424 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { | 426 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { |
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
426 | goto fail; | 428 | goto fail; |
427 | } | 429 | } |
428 | /* both registers must be reserved */ | 430 | /* both registers must be reserved */ |
429 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | 431 | if (num_counters == NUM_COUNTERS_F15H) { |
430 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | 432 | msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); |
433 | msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); | ||
434 | } else { | ||
435 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
436 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
437 | } | ||
431 | continue; | 438 | continue; |
432 | fail: | 439 | fail: |
433 | if (!counter_config[i].enabled) | 440 | if (!counter_config[i].enabled) |
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
447 | int i; | 454 | int i; |
448 | 455 | ||
449 | /* setup reset_value */ | 456 | /* setup reset_value */ |
450 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | 457 | for (i = 0; i < OP_MAX_COUNTER; ++i) { |
451 | if (counter_config[i].enabled | 458 | if (counter_config[i].enabled |
452 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | 459 | && msrs->counters[op_x86_virt_to_phys(i)].addr) |
453 | reset_value[i] = counter_config[i].count; | 460 | reset_value[i] = counter_config[i].count; |
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
456 | } | 463 | } |
457 | 464 | ||
458 | /* clear all counters */ | 465 | /* clear all counters */ |
459 | for (i = 0; i < NUM_COUNTERS; ++i) { | 466 | for (i = 0; i < num_counters; ++i) { |
460 | if (!msrs->controls[i].addr) | 467 | if (!msrs->controls[i].addr) |
461 | continue; | 468 | continue; |
462 | rdmsrl(msrs->controls[i].addr, val); | 469 | rdmsrl(msrs->controls[i].addr, val); |
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | |||
472 | } | 479 | } |
473 | 480 | ||
474 | /* enable active counters */ | 481 | /* enable active counters */ |
475 | for (i = 0; i < NUM_COUNTERS; ++i) { | 482 | for (i = 0; i < num_counters; ++i) { |
476 | int virt = op_x86_phys_to_virt(i); | 483 | int virt = op_x86_phys_to_virt(i); |
477 | if (!reset_value[virt]) | 484 | if (!reset_value[virt]) |
478 | continue; | 485 | continue; |
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, | |||
503 | u64 val; | 510 | u64 val; |
504 | int i; | 511 | int i; |
505 | 512 | ||
506 | for (i = 0; i < NUM_COUNTERS; ++i) { | 513 | for (i = 0; i < num_counters; ++i) { |
507 | int virt = op_x86_phys_to_virt(i); | 514 | int virt = op_x86_phys_to_virt(i); |
508 | if (!reset_value[virt]) | 515 | if (!reset_value[virt]) |
509 | continue; | 516 | continue; |
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs) | |||
526 | u64 val; | 533 | u64 val; |
527 | int i; | 534 | int i; |
528 | 535 | ||
529 | for (i = 0; i < NUM_COUNTERS; ++i) { | 536 | for (i = 0; i < num_counters; ++i) { |
530 | if (!reset_value[op_x86_phys_to_virt(i)]) | 537 | if (!reset_value[op_x86_phys_to_virt(i)]) |
531 | continue; | 538 | continue; |
532 | rdmsrl(msrs->controls[i].addr, val); | 539 | rdmsrl(msrs->controls[i].addr, val); |
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
546 | * Subtle: stop on all counters to avoid race with setting our | 553 | * Subtle: stop on all counters to avoid race with setting our |
547 | * pm callback | 554 | * pm callback |
548 | */ | 555 | */ |
549 | for (i = 0; i < NUM_COUNTERS; ++i) { | 556 | for (i = 0; i < num_counters; ++i) { |
550 | if (!reset_value[op_x86_phys_to_virt(i)]) | 557 | if (!reset_value[op_x86_phys_to_virt(i)]) |
551 | continue; | 558 | continue; |
552 | rdmsrl(msrs->controls[i].addr, val); | 559 | rdmsrl(msrs->controls[i].addr, val); |
@@ -603,6 +610,7 @@ static int force_ibs_eilvt_setup(void) | |||
603 | ret = setup_ibs_ctl(i); | 610 | ret = setup_ibs_ctl(i); |
604 | if (ret) | 611 | if (ret) |
605 | return ret; | 612 | return ret; |
613 | pr_err(FW_BUG "using offset %d for IBS interrupts\n", i); | ||
606 | return 0; | 614 | return 0; |
607 | } | 615 | } |
608 | 616 | ||
@@ -630,21 +638,29 @@ static int __init_ibs_nmi(void) | |||
630 | return 0; | 638 | return 0; |
631 | } | 639 | } |
632 | 640 | ||
633 | /* initialize the APIC for the IBS interrupts if available */ | 641 | /* |
642 | * check and reserve APIC extended interrupt LVT offset for IBS if | ||
643 | * available | ||
644 | * | ||
645 | * init_ibs() preforms implicitly cpu-local operations, so pin this | ||
646 | * thread to its current CPU | ||
647 | */ | ||
648 | |||
634 | static void init_ibs(void) | 649 | static void init_ibs(void) |
635 | { | 650 | { |
636 | ibs_caps = get_ibs_caps(); | 651 | preempt_disable(); |
637 | 652 | ||
653 | ibs_caps = get_ibs_caps(); | ||
638 | if (!ibs_caps) | 654 | if (!ibs_caps) |
639 | return; | 655 | goto out; |
640 | 656 | ||
641 | if (__init_ibs_nmi()) { | 657 | if (__init_ibs_nmi() < 0) |
642 | ibs_caps = 0; | 658 | ibs_caps = 0; |
643 | return; | 659 | else |
644 | } | 660 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); |
645 | 661 | ||
646 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", | 662 | out: |
647 | (unsigned)ibs_caps); | 663 | preempt_enable(); |
648 | } | 664 | } |
649 | 665 | ||
650 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 666 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); |
@@ -698,18 +714,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
698 | return 0; | 714 | return 0; |
699 | } | 715 | } |
700 | 716 | ||
717 | struct op_x86_model_spec op_amd_spec; | ||
718 | |||
701 | static int op_amd_init(struct oprofile_operations *ops) | 719 | static int op_amd_init(struct oprofile_operations *ops) |
702 | { | 720 | { |
703 | init_ibs(); | 721 | init_ibs(); |
704 | create_arch_files = ops->create_files; | 722 | create_arch_files = ops->create_files; |
705 | ops->create_files = setup_ibs_files; | 723 | ops->create_files = setup_ibs_files; |
724 | |||
725 | if (boot_cpu_data.x86 == 0x15) { | ||
726 | num_counters = NUM_COUNTERS_F15H; | ||
727 | } else { | ||
728 | num_counters = NUM_COUNTERS; | ||
729 | } | ||
730 | |||
731 | op_amd_spec.num_counters = num_counters; | ||
732 | op_amd_spec.num_controls = num_counters; | ||
733 | op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS); | ||
734 | |||
706 | return 0; | 735 | return 0; |
707 | } | 736 | } |
708 | 737 | ||
709 | struct op_x86_model_spec op_amd_spec = { | 738 | struct op_x86_model_spec op_amd_spec = { |
710 | .num_counters = NUM_COUNTERS, | 739 | /* num_counters/num_controls filled in at runtime */ |
711 | .num_controls = NUM_COUNTERS, | ||
712 | .num_virt_counters = NUM_VIRT_COUNTERS, | ||
713 | .reserved = MSR_AMD_EVENTSEL_RESERVED, | 740 | .reserved = MSR_AMD_EVENTSEL_RESERVED, |
714 | .event_mask = OP_EVENT_MASK, | 741 | .event_mask = OP_EVENT_MASK, |
715 | .init = op_amd_init, | 742 | .init = op_amd_init, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 182558dd5515..9fadec074142 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/oprofile.h> | 11 | #include <linux/oprofile.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
14 | #include <linux/nmi.h> | 14 | #include <asm/nmi.h> |
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
17 | #include <asm/apic.h> | 17 | #include <asm/apic.h> |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index c4bb261c106e..b1805b78842f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -65,21 +65,13 @@ pcibios_align_resource(void *data, const struct resource *res, | |||
65 | resource_size_t size, resource_size_t align) | 65 | resource_size_t size, resource_size_t align) |
66 | { | 66 | { |
67 | struct pci_dev *dev = data; | 67 | struct pci_dev *dev = data; |
68 | resource_size_t start = round_down(res->end - size + 1, align); | 68 | resource_size_t start = res->start; |
69 | 69 | ||
70 | if (res->flags & IORESOURCE_IO) { | 70 | if (res->flags & IORESOURCE_IO) { |
71 | 71 | if (skip_isa_ioresource_align(dev)) | |
72 | /* | 72 | return start; |
73 | * If we're avoiding ISA aliases, the largest contiguous I/O | 73 | if (start & 0x300) |
74 | * port space is 256 bytes. Clearing bits 9 and 10 preserves | 74 | start = (start + 0x3ff) & ~0x3ff; |
75 | * all 256-byte and smaller alignments, so the result will | ||
76 | * still be correctly aligned. | ||
77 | */ | ||
78 | if (!skip_isa_ioresource_align(dev)) | ||
79 | start &= ~0x300; | ||
80 | } else if (res->flags & IORESOURCE_MEM) { | ||
81 | if (start < BIOS_END) | ||
82 | start = res->end; /* fail; no space */ | ||
83 | } | 75 | } |
84 | return start; | 76 | return start; |
85 | } | 77 | } |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 2492d165096a..a5f7d0d63de0 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <asm/pci_x86.h> | 10 | #include <asm/pci_x86.h> |
11 | #include <asm/pci-functions.h> | 11 | #include <asm/pci-functions.h> |
12 | #include <asm/cacheflush.h> | ||
12 | 13 | ||
13 | /* BIOS32 signature: "_32_" */ | 14 | /* BIOS32 signature: "_32_" */ |
14 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) | 15 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) |
@@ -25,6 +26,27 @@ | |||
25 | #define PCIBIOS_HW_TYPE1_SPEC 0x10 | 26 | #define PCIBIOS_HW_TYPE1_SPEC 0x10 |
26 | #define PCIBIOS_HW_TYPE2_SPEC 0x20 | 27 | #define PCIBIOS_HW_TYPE2_SPEC 0x20 |
27 | 28 | ||
29 | int pcibios_enabled; | ||
30 | |||
31 | /* According to the BIOS specification at: | ||
32 | * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could | ||
33 | * restrict the x zone to some pages and make it ro. But this may be | ||
34 | * broken on some bios, complex to handle with static_protections. | ||
35 | * We could make the 0xe0000-0x100000 range rox, but this can break | ||
36 | * some ISA mapping. | ||
37 | * | ||
38 | * So we let's an rw and x hole when pcibios is used. This shouldn't | ||
39 | * happen for modern system with mmconfig, and if you don't want it | ||
40 | * you could disable pcibios... | ||
41 | */ | ||
42 | static inline void set_bios_x(void) | ||
43 | { | ||
44 | pcibios_enabled = 1; | ||
45 | set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); | ||
46 | if (__supported_pte_mask & _PAGE_NX) | ||
47 | printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); | ||
48 | } | ||
49 | |||
28 | /* | 50 | /* |
29 | * This is the standard structure used to identify the entry point | 51 | * This is the standard structure used to identify the entry point |
30 | * to the BIOS32 Service Directory, as documented in | 52 | * to the BIOS32 Service Directory, as documented in |
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void) | |||
332 | DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", | 354 | DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", |
333 | bios32_entry); | 355 | bios32_entry); |
334 | bios32_indirect.address = bios32_entry + PAGE_OFFSET; | 356 | bios32_indirect.address = bios32_entry + PAGE_OFFSET; |
357 | set_bios_x(); | ||
335 | if (check_pcibios()) | 358 | if (check_pcibios()) |
336 | return &pci_bios_access; | 359 | return &pci_bios_access; |
337 | } | 360 | } |
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d7b5109f7a9c..25cd4a07d09f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -70,6 +70,9 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | |||
70 | struct xen_pci_frontend_ops *xen_pci_frontend; | 70 | struct xen_pci_frontend_ops *xen_pci_frontend; |
71 | EXPORT_SYMBOL_GPL(xen_pci_frontend); | 71 | EXPORT_SYMBOL_GPL(xen_pci_frontend); |
72 | 72 | ||
73 | #define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ | ||
74 | MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) | ||
75 | |||
73 | static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, | 76 | static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, |
74 | struct msi_msg *msg) | 77 | struct msi_msg *msg) |
75 | { | 78 | { |
@@ -83,12 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, | |||
83 | MSI_ADDR_REDIRECTION_CPU | | 86 | MSI_ADDR_REDIRECTION_CPU | |
84 | MSI_ADDR_DEST_ID(pirq); | 87 | MSI_ADDR_DEST_ID(pirq); |
85 | 88 | ||
86 | msg->data = | 89 | msg->data = XEN_PIRQ_MSI_DATA; |
87 | MSI_DATA_TRIGGER_EDGE | | ||
88 | MSI_DATA_LEVEL_ASSERT | | ||
89 | /* delivery mode reserved */ | ||
90 | (3 << 8) | | ||
91 | MSI_DATA_VECTOR(0); | ||
92 | } | 90 | } |
93 | 91 | ||
94 | static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 92 | static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
@@ -98,8 +96,23 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
98 | struct msi_msg msg; | 96 | struct msi_msg msg; |
99 | 97 | ||
100 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 98 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
99 | __read_msi_msg(msidesc, &msg); | ||
100 | pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | | ||
101 | ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); | ||
102 | if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { | ||
103 | xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? | ||
104 | "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); | ||
105 | if (irq < 0) | ||
106 | goto error; | ||
107 | ret = set_irq_msi(irq, msidesc); | ||
108 | if (ret < 0) | ||
109 | goto error_while; | ||
110 | printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" | ||
111 | " pirq=%d\n", irq, pirq); | ||
112 | return 0; | ||
113 | } | ||
101 | xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? | 114 | xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? |
102 | "msi-x" : "msi", &irq, &pirq); | 115 | "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); |
103 | if (irq < 0 || pirq < 0) | 116 | if (irq < 0 || pirq < 0) |
104 | goto error; | 117 | goto error; |
105 | printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); | 118 | printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); |
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index cc822a29b227..7785b72ecc3a 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c | |||
@@ -37,9 +37,9 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |||
37 | /* All CPUs enumerated by SFI must be present and enabled */ | 37 | /* All CPUs enumerated by SFI must be present and enabled */ |
38 | static void __cpuinit mp_sfi_register_lapic(u8 id) | 38 | static void __cpuinit mp_sfi_register_lapic(u8 id) |
39 | { | 39 | { |
40 | if (MAX_APICS - id <= 0) { | 40 | if (MAX_LOCAL_APIC - id <= 0) { |
41 | pr_warning("Processor #%d invalid (max %d)\n", | 41 | pr_warning("Processor #%d invalid (max %d)\n", |
42 | id, MAX_APICS); | 42 | id, MAX_LOCAL_APIC); |
43 | return; | 43 | return; |
44 | } | 44 | } |
45 | 45 | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index a318194002b5..df58e9cad96a 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1341 | 1341 | ||
1342 | /* | 1342 | /* |
1343 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | 1343 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) |
1344 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub | 1344 | * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) |
1345 | */ | 1345 | */ |
1346 | bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE | 1346 | bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE |
1347 | * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | 1347 | * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); |
@@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector) | |||
1455 | * the below initialization can't be in firmware because the | 1455 | * the below initialization can't be in firmware because the |
1456 | * messaging IRQ will be determined by the OS | 1456 | * messaging IRQ will be determined by the OS |
1457 | */ | 1457 | */ |
1458 | apicid = uvhub_to_first_apicid(uvhub); | 1458 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; |
1459 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1459 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
1460 | ((apicid << 32) | vector)); | 1460 | ((apicid << 32) | vector)); |
1461 | } | 1461 | } |
@@ -1490,7 +1490,7 @@ calculate_destination_timeout(void) | |||
1490 | /* | 1490 | /* |
1491 | * initialize the bau_control structure for each cpu | 1491 | * initialize the bau_control structure for each cpu |
1492 | */ | 1492 | */ |
1493 | static void __init uv_init_per_cpu(int nuvhubs) | 1493 | static int __init uv_init_per_cpu(int nuvhubs) |
1494 | { | 1494 | { |
1495 | int i; | 1495 | int i; |
1496 | int cpu; | 1496 | int cpu; |
@@ -1507,7 +1507,7 @@ static void __init uv_init_per_cpu(int nuvhubs) | |||
1507 | struct bau_control *smaster = NULL; | 1507 | struct bau_control *smaster = NULL; |
1508 | struct socket_desc { | 1508 | struct socket_desc { |
1509 | short num_cpus; | 1509 | short num_cpus; |
1510 | short cpu_number[16]; | 1510 | short cpu_number[MAX_CPUS_PER_SOCKET]; |
1511 | }; | 1511 | }; |
1512 | struct uvhub_desc { | 1512 | struct uvhub_desc { |
1513 | unsigned short socket_mask; | 1513 | unsigned short socket_mask; |
@@ -1540,6 +1540,10 @@ static void __init uv_init_per_cpu(int nuvhubs) | |||
1540 | sdp = &bdp->socket[socket]; | 1540 | sdp = &bdp->socket[socket]; |
1541 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1541 | sdp->cpu_number[sdp->num_cpus] = cpu; |
1542 | sdp->num_cpus++; | 1542 | sdp->num_cpus++; |
1543 | if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { | ||
1544 | printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); | ||
1545 | return 1; | ||
1546 | } | ||
1543 | } | 1547 | } |
1544 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | 1548 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
1545 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) | 1549 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) |
@@ -1570,6 +1574,12 @@ static void __init uv_init_per_cpu(int nuvhubs) | |||
1570 | bcp->uvhub_master = hmaster; | 1574 | bcp->uvhub_master = hmaster; |
1571 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> | 1575 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> |
1572 | blade_processor_id; | 1576 | blade_processor_id; |
1577 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { | ||
1578 | printk(KERN_EMERG | ||
1579 | "%d cpus per uvhub invalid\n", | ||
1580 | bcp->uvhub_cpu); | ||
1581 | return 1; | ||
1582 | } | ||
1573 | } | 1583 | } |
1574 | nextsocket: | 1584 | nextsocket: |
1575 | socket++; | 1585 | socket++; |
@@ -1595,6 +1605,7 @@ nextsocket: | |||
1595 | bcp->congested_reps = congested_reps; | 1605 | bcp->congested_reps = congested_reps; |
1596 | bcp->congested_period = congested_period; | 1606 | bcp->congested_period = congested_period; |
1597 | } | 1607 | } |
1608 | return 0; | ||
1598 | } | 1609 | } |
1599 | 1610 | ||
1600 | /* | 1611 | /* |
@@ -1625,7 +1636,10 @@ static int __init uv_bau_init(void) | |||
1625 | spin_lock_init(&disable_lock); | 1636 | spin_lock_init(&disable_lock); |
1626 | congested_cycles = microsec_2_cycles(congested_response_us); | 1637 | congested_cycles = microsec_2_cycles(congested_response_us); |
1627 | 1638 | ||
1628 | uv_init_per_cpu(nuvhubs); | 1639 | if (uv_init_per_cpu(nuvhubs)) { |
1640 | nobau = 1; | ||
1641 | return 0; | ||
1642 | } | ||
1629 | 1643 | ||
1630 | uv_partition_base_pnode = 0x7fffffff; | 1644 | uv_partition_base_pnode = 0x7fffffff; |
1631 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | 1645 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) |
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 56e421bc379b..9daf5d1af9f1 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c | |||
@@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu) | |||
89 | 89 | ||
90 | apicid = cpu_physical_id(cpu); | 90 | apicid = cpu_physical_id(cpu); |
91 | pnode = uv_apicid_to_pnode(apicid); | 91 | pnode = uv_apicid_to_pnode(apicid); |
92 | apicid |= uv_apicid_hibits; | ||
92 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 93 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
93 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 94 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
94 | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); | 95 | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); |
@@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode) | |||
107 | static int uv_setup_intr(int cpu, u64 expires) | 108 | static int uv_setup_intr(int cpu, u64 expires) |
108 | { | 109 | { |
109 | u64 val; | 110 | u64 val; |
111 | unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits; | ||
110 | int pnode = uv_cpu_to_pnode(cpu); | 112 | int pnode = uv_cpu_to_pnode(cpu); |
111 | 113 | ||
112 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, | 114 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, |
@@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires) | |||
117 | UVH_EVENT_OCCURRED0_RTC1_MASK); | 119 | UVH_EVENT_OCCURRED0_RTC1_MASK); |
118 | 120 | ||
119 | val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | | 121 | val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | |
120 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); | 122 | ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); |
121 | 123 | ||
122 | /* Set configuration */ | 124 | /* Set configuration */ |
123 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); | 125 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); |
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c index 3371bd053b89..632037671746 100644 --- a/arch/x86/platform/visws/visws_quirks.c +++ b/arch/x86/platform/visws/visws_quirks.c | |||
@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
171 | ver = m->apicver; | 171 | ver = m->apicver; |
172 | if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { | 172 | if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { |
173 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | 173 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", |
174 | m->apicid, MAX_APICS); | 174 | m->apicid, MAX_LOCAL_APIC); |
175 | return; | 175 | return; |
176 | } | 176 | } |
177 | 177 | ||
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 4a2afa1bac51..b6552b189bcd 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) | |||
25 | 25 | ||
26 | export CPPFLAGS_vdso.lds += -P -C | 26 | export CPPFLAGS_vdso.lds += -P -C |
27 | 27 | ||
28 | VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ | 28 | VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ |
29 | -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 | 29 | -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 |
30 | 30 | ||
31 | $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so | 31 | $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so |
@@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter | |||
69 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) | 69 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) |
70 | 70 | ||
71 | CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) | 71 | CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) |
72 | VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 | 72 | VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 |
73 | 73 | ||
74 | # This makes sure the $(obj) subdirectory exists even though vdso32/ | 74 | # This makes sure the $(obj) subdirectory exists even though vdso32/ |
75 | # is not a kbuild sub-make subdirectory. | 75 | # is not a kbuild sub-make subdirectory. |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4d3861..44dcad43989d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | |||
75 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | 75 | enum xen_domain_type xen_domain_type = XEN_NATIVE; |
76 | EXPORT_SYMBOL_GPL(xen_domain_type); | 76 | EXPORT_SYMBOL_GPL(xen_domain_type); |
77 | 77 | ||
78 | unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; | ||
79 | EXPORT_SYMBOL(machine_to_phys_mapping); | ||
80 | unsigned int machine_to_phys_order; | ||
81 | EXPORT_SYMBOL(machine_to_phys_order); | ||
82 | |||
78 | struct start_info *xen_start_info; | 83 | struct start_info *xen_start_info; |
79 | EXPORT_SYMBOL_GPL(xen_start_info); | 84 | EXPORT_SYMBOL_GPL(xen_start_info); |
80 | 85 | ||
@@ -1016,10 +1021,6 @@ static void xen_reboot(int reason) | |||
1016 | { | 1021 | { |
1017 | struct sched_shutdown r = { .reason = reason }; | 1022 | struct sched_shutdown r = { .reason = reason }; |
1018 | 1023 | ||
1019 | #ifdef CONFIG_SMP | ||
1020 | stop_other_cpus(); | ||
1021 | #endif | ||
1022 | |||
1023 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) | 1024 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
1024 | BUG(); | 1025 | BUG(); |
1025 | } | 1026 | } |
@@ -1090,6 +1091,8 @@ static void __init xen_setup_stackprotector(void) | |||
1090 | /* First C function to be called on Xen boot */ | 1091 | /* First C function to be called on Xen boot */ |
1091 | asmlinkage void __init xen_start_kernel(void) | 1092 | asmlinkage void __init xen_start_kernel(void) |
1092 | { | 1093 | { |
1094 | struct physdev_set_iopl set_iopl; | ||
1095 | int rc; | ||
1093 | pgd_t *pgd; | 1096 | pgd_t *pgd; |
1094 | 1097 | ||
1095 | if (!xen_start_info) | 1098 | if (!xen_start_info) |
@@ -1097,6 +1100,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1097 | 1100 | ||
1098 | xen_domain_type = XEN_PV_DOMAIN; | 1101 | xen_domain_type = XEN_PV_DOMAIN; |
1099 | 1102 | ||
1103 | xen_setup_machphys_mapping(); | ||
1104 | |||
1100 | /* Install Xen paravirt ops */ | 1105 | /* Install Xen paravirt ops */ |
1101 | pv_info = xen_info; | 1106 | pv_info = xen_info; |
1102 | pv_init_ops = xen_init_ops; | 1107 | pv_init_ops = xen_init_ops; |
@@ -1191,8 +1196,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1191 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1196 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
1192 | xen_build_mfn_list_list(); | 1197 | xen_build_mfn_list_list(); |
1193 | 1198 | ||
1194 | init_mm.pgd = pgd; | ||
1195 | |||
1196 | /* keep using Xen gdt for now; no urgent need to change it */ | 1199 | /* keep using Xen gdt for now; no urgent need to change it */ |
1197 | 1200 | ||
1198 | #ifdef CONFIG_X86_32 | 1201 | #ifdef CONFIG_X86_32 |
@@ -1202,10 +1205,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1202 | #else | 1205 | #else |
1203 | pv_info.kernel_rpl = 0; | 1206 | pv_info.kernel_rpl = 0; |
1204 | #endif | 1207 | #endif |
1205 | |||
1206 | /* set the limit of our address space */ | 1208 | /* set the limit of our address space */ |
1207 | xen_reserve_top(); | 1209 | xen_reserve_top(); |
1208 | 1210 | ||
1211 | /* We used to do this in xen_arch_setup, but that is too late on AMD | ||
1212 | * were early_cpu_init (run before ->arch_setup()) calls early_amd_init | ||
1213 | * which pokes 0xcf8 port. | ||
1214 | */ | ||
1215 | set_iopl.iopl = 1; | ||
1216 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
1217 | if (rc != 0) | ||
1218 | xen_raw_printk("physdev_op failed %d\n", rc); | ||
1219 | |||
1209 | #ifdef CONFIG_X86_32 | 1220 | #ifdef CONFIG_X86_32 |
1210 | /* set up basic CPUID stuff */ | 1221 | /* set up basic CPUID stuff */ |
1211 | cpu_detect(&new_cpu_data); | 1222 | cpu_detect(&new_cpu_data); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7f75a5..44924e551fde 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
2034 | set_page_prot(pmd, PAGE_KERNEL_RO); | 2034 | set_page_prot(pmd, PAGE_KERNEL_RO); |
2035 | } | 2035 | } |
2036 | 2036 | ||
2037 | void __init xen_setup_machphys_mapping(void) | ||
2038 | { | ||
2039 | struct xen_machphys_mapping mapping; | ||
2040 | unsigned long machine_to_phys_nr_ents; | ||
2041 | |||
2042 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { | ||
2043 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; | ||
2044 | machine_to_phys_nr_ents = mapping.max_mfn + 1; | ||
2045 | } else { | ||
2046 | machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; | ||
2047 | } | ||
2048 | machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); | ||
2049 | } | ||
2050 | |||
2037 | #ifdef CONFIG_X86_64 | 2051 | #ifdef CONFIG_X86_64 |
2038 | static void convert_pfn_mfn(void *v) | 2052 | static void convert_pfn_mfn(void *v) |
2039 | { | 2053 | { |
@@ -2119,44 +2133,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
2119 | return pgd; | 2133 | return pgd; |
2120 | } | 2134 | } |
2121 | #else /* !CONFIG_X86_64 */ | 2135 | #else /* !CONFIG_X86_64 */ |
2122 | static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); | 2136 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
2137 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); | ||
2138 | |||
2139 | static __init void xen_write_cr3_init(unsigned long cr3) | ||
2140 | { | ||
2141 | unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); | ||
2142 | |||
2143 | BUG_ON(read_cr3() != __pa(initial_page_table)); | ||
2144 | BUG_ON(cr3 != __pa(swapper_pg_dir)); | ||
2145 | |||
2146 | /* | ||
2147 | * We are switching to swapper_pg_dir for the first time (from | ||
2148 | * initial_page_table) and therefore need to mark that page | ||
2149 | * read-only and then pin it. | ||
2150 | * | ||
2151 | * Xen disallows sharing of kernel PMDs for PAE | ||
2152 | * guests. Therefore we must copy the kernel PMD from | ||
2153 | * initial_page_table into a new kernel PMD to be used in | ||
2154 | * swapper_pg_dir. | ||
2155 | */ | ||
2156 | swapper_kernel_pmd = | ||
2157 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | ||
2158 | memcpy(swapper_kernel_pmd, initial_kernel_pmd, | ||
2159 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
2160 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = | ||
2161 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); | ||
2162 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); | ||
2163 | |||
2164 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | ||
2165 | xen_write_cr3(cr3); | ||
2166 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); | ||
2167 | |||
2168 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, | ||
2169 | PFN_DOWN(__pa(initial_page_table))); | ||
2170 | set_page_prot(initial_page_table, PAGE_KERNEL); | ||
2171 | set_page_prot(initial_kernel_pmd, PAGE_KERNEL); | ||
2172 | |||
2173 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | ||
2174 | } | ||
2123 | 2175 | ||
2124 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 2176 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
2125 | unsigned long max_pfn) | 2177 | unsigned long max_pfn) |
2126 | { | 2178 | { |
2127 | pmd_t *kernel_pmd; | 2179 | pmd_t *kernel_pmd; |
2128 | 2180 | ||
2129 | level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 2181 | initial_kernel_pmd = |
2182 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | ||
2130 | 2183 | ||
2131 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + | 2184 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + |
2132 | xen_start_info->nr_pt_frames * PAGE_SIZE + | 2185 | xen_start_info->nr_pt_frames * PAGE_SIZE + |
2133 | 512*1024); | 2186 | 512*1024); |
2134 | 2187 | ||
2135 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 2188 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
2136 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 2189 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); |
2137 | 2190 | ||
2138 | xen_map_identity_early(level2_kernel_pgt, max_pfn); | 2191 | xen_map_identity_early(initial_kernel_pmd, max_pfn); |
2139 | 2192 | ||
2140 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | 2193 | memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); |
2141 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], | 2194 | initial_page_table[KERNEL_PGD_BOUNDARY] = |
2142 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); | 2195 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); |
2143 | 2196 | ||
2144 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 2197 | set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); |
2145 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | 2198 | set_page_prot(initial_page_table, PAGE_KERNEL_RO); |
2146 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); | 2199 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); |
2147 | 2200 | ||
2148 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 2201 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
2149 | 2202 | ||
2150 | xen_write_cr3(__pa(swapper_pg_dir)); | 2203 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, |
2151 | 2204 | PFN_DOWN(__pa(initial_page_table))); | |
2152 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | 2205 | xen_write_cr3(__pa(initial_page_table)); |
2153 | 2206 | ||
2154 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), | 2207 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
2155 | __pa(xen_start_info->pt_base + | 2208 | __pa(xen_start_info->pt_base + |
2156 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 2209 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
2157 | "XEN PAGETABLES"); | 2210 | "XEN PAGETABLES"); |
2158 | 2211 | ||
2159 | return swapper_pg_dir; | 2212 | return initial_page_table; |
2160 | } | 2213 | } |
2161 | #endif /* CONFIG_X86_64 */ | 2214 | #endif /* CONFIG_X86_64 */ |
2162 | 2215 | ||
@@ -2290,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
2290 | .write_cr2 = xen_write_cr2, | 2343 | .write_cr2 = xen_write_cr2, |
2291 | 2344 | ||
2292 | .read_cr3 = xen_read_cr3, | 2345 | .read_cr3 = xen_read_cr3, |
2346 | #ifdef CONFIG_X86_32 | ||
2347 | .write_cr3 = xen_write_cr3_init, | ||
2348 | #else | ||
2293 | .write_cr3 = xen_write_cr3, | 2349 | .write_cr3 = xen_write_cr3, |
2350 | #endif | ||
2294 | 2351 | ||
2295 | .flush_tlb_user = xen_flush_tlb, | 2352 | .flush_tlb_user = xen_flush_tlb, |
2296 | .flush_tlb_kernel = xen_flush_tlb, | 2353 | .flush_tlb_kernel = xen_flush_tlb, |
@@ -2358,8 +2415,6 @@ void __init xen_init_mmu_ops(void) | |||
2358 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | 2415 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; |
2359 | pv_mmu_ops = xen_mmu_ops; | 2416 | pv_mmu_ops = xen_mmu_ops; |
2360 | 2417 | ||
2361 | vmap_lazy_unmap = false; | ||
2362 | |||
2363 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2418 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
2364 | } | 2419 | } |
2365 | 2420 | ||
@@ -2627,7 +2682,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2627 | 2682 | ||
2628 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); | 2683 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); |
2629 | 2684 | ||
2630 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; | 2685 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == |
2686 | (VM_PFNMAP | VM_RESERVED | VM_IO))); | ||
2631 | 2687 | ||
2632 | rmd.mfn = mfn; | 2688 | rmd.mfn = mfn; |
2633 | rmd.prot = prot; | 2689 | rmd.prot = prot; |
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 0f456386cce5..25c52f94a27c 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -68,7 +68,7 @@ static int __init check_platform_magic(void) | |||
68 | return 0; | 68 | return 0; |
69 | } | 69 | } |
70 | 70 | ||
71 | void __init xen_unplug_emulated_devices(void) | 71 | void xen_unplug_emulated_devices(void) |
72 | { | 72 | { |
73 | int r; | 73 | int r; |
74 | 74 | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 769c4b01fa32..b5a7f928234b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <xen/interface/callback.h> | 23 | #include <xen/interface/callback.h> |
24 | #include <xen/interface/memory.h> | 24 | #include <xen/interface/memory.h> |
25 | #include <xen/interface/physdev.h> | 25 | #include <xen/interface/physdev.h> |
26 | #include <xen/interface/memory.h> | ||
27 | #include <xen/features.h> | 26 | #include <xen/features.h> |
28 | 27 | ||
29 | #include "xen-ops.h" | 28 | #include "xen-ops.h" |
@@ -182,24 +181,21 @@ char * __init xen_memory_setup(void) | |||
182 | for (i = 0; i < memmap.nr_entries; i++) { | 181 | for (i = 0; i < memmap.nr_entries; i++) { |
183 | unsigned long long end = map[i].addr + map[i].size; | 182 | unsigned long long end = map[i].addr + map[i].size; |
184 | 183 | ||
185 | if (map[i].type == E820_RAM) { | 184 | if (map[i].type == E820_RAM && end > mem_end) { |
186 | if (map[i].addr < mem_end && end > mem_end) { | 185 | /* RAM off the end - may be partially included */ |
187 | /* Truncate region to max_mem. */ | 186 | u64 delta = min(map[i].size, end - mem_end); |
188 | u64 delta = end - mem_end; | ||
189 | 187 | ||
190 | map[i].size -= delta; | 188 | map[i].size -= delta; |
191 | extra_pages += PFN_DOWN(delta); | 189 | end -= delta; |
192 | 190 | ||
193 | end = mem_end; | 191 | extra_pages += PFN_DOWN(delta); |
194 | } | ||
195 | } | 192 | } |
196 | 193 | ||
197 | if (end > xen_extra_mem_start) | 194 | if (map[i].size > 0 && end > xen_extra_mem_start) |
198 | xen_extra_mem_start = end; | 195 | xen_extra_mem_start = end; |
199 | 196 | ||
200 | /* If region is non-RAM or below mem_end, add what remains */ | 197 | /* Add region if any remains */ |
201 | if ((map[i].type != E820_RAM || map[i].addr < mem_end) && | 198 | if (map[i].size > 0) |
202 | map[i].size > 0) | ||
203 | e820_add_region(map[i].addr, map[i].size, map[i].type); | 199 | e820_add_region(map[i].addr, map[i].size, map[i].type); |
204 | } | 200 | } |
205 | 201 | ||
@@ -248,26 +244,11 @@ char * __init xen_memory_setup(void) | |||
248 | else | 244 | else |
249 | extra_pages = 0; | 245 | extra_pages = 0; |
250 | 246 | ||
251 | if (!xen_initial_domain()) | 247 | xen_add_extra_mem(extra_pages); |
252 | xen_add_extra_mem(extra_pages); | ||
253 | 248 | ||
254 | return "Xen"; | 249 | return "Xen"; |
255 | } | 250 | } |
256 | 251 | ||
257 | static void xen_idle(void) | ||
258 | { | ||
259 | local_irq_disable(); | ||
260 | |||
261 | if (need_resched()) | ||
262 | local_irq_enable(); | ||
263 | else { | ||
264 | current_thread_info()->status &= ~TS_POLLING; | ||
265 | smp_mb__after_clear_bit(); | ||
266 | safe_halt(); | ||
267 | current_thread_info()->status |= TS_POLLING; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | /* | 252 | /* |
272 | * Set the bit indicating "nosegneg" library variants should be used. | 253 | * Set the bit indicating "nosegneg" library variants should be used. |
273 | * We only need to bother in pure 32-bit mode; compat 32-bit processes | 254 | * We only need to bother in pure 32-bit mode; compat 32-bit processes |
@@ -337,9 +318,6 @@ void __cpuinit xen_enable_syscall(void) | |||
337 | 318 | ||
338 | void __init xen_arch_setup(void) | 319 | void __init xen_arch_setup(void) |
339 | { | 320 | { |
340 | struct physdev_set_iopl set_iopl; | ||
341 | int rc; | ||
342 | |||
343 | xen_panic_handler_init(); | 321 | xen_panic_handler_init(); |
344 | 322 | ||
345 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 323 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
@@ -356,11 +334,6 @@ void __init xen_arch_setup(void) | |||
356 | xen_enable_sysenter(); | 334 | xen_enable_sysenter(); |
357 | xen_enable_syscall(); | 335 | xen_enable_syscall(); |
358 | 336 | ||
359 | set_iopl.iopl = 1; | ||
360 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
361 | if (rc != 0) | ||
362 | printk(KERN_INFO "physdev_op failed %d\n", rc); | ||
363 | |||
364 | #ifdef CONFIG_ACPI | 337 | #ifdef CONFIG_ACPI |
365 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 338 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
366 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 339 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
@@ -372,7 +345,11 @@ void __init xen_arch_setup(void) | |||
372 | MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? | 345 | MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? |
373 | COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); | 346 | COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); |
374 | 347 | ||
375 | pm_idle = xen_idle; | 348 | /* Set up idle, making sure it calls safe_halt() pvop */ |
349 | #ifdef CONFIG_X86_32 | ||
350 | boot_cpu_data.hlt_works_ok = 1; | ||
351 | #endif | ||
352 | pm_idle = default_idle; | ||
376 | 353 | ||
377 | fiddle_vdso(); | 354 | fiddle_vdso(); |
378 | } | 355 | } |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 1d789d56877c..9bbd63a129b5 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -31,6 +31,7 @@ void xen_hvm_post_suspend(int suspend_cancelled) | |||
31 | int cpu; | 31 | int cpu; |
32 | xen_hvm_init_shared_info(); | 32 | xen_hvm_init_shared_info(); |
33 | xen_callback_vector(); | 33 | xen_callback_vector(); |
34 | xen_unplug_emulated_devices(); | ||
34 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 35 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
35 | for_each_online_cpu(cpu) { | 36 | for_each_online_cpu(cpu) { |
36 | xen_setup_runstate_info(cpu); | 37 | xen_setup_runstate_info(cpu); |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b2bb5aa3b054..5da5e53fb94c 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -426,6 +426,8 @@ void xen_timer_resume(void) | |||
426 | { | 426 | { |
427 | int cpu; | 427 | int cpu; |
428 | 428 | ||
429 | pvclock_resume(); | ||
430 | |||
429 | if (xen_clockevent != &xen_vcpuop_clockevent) | 431 | if (xen_clockevent != &xen_vcpuop_clockevent) |
430 | return; | 432 | return; |
431 | 433 | ||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 64044747348e..9d41bf985757 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -43,7 +43,7 @@ void xen_vcpu_restore(void); | |||
43 | 43 | ||
44 | void xen_callback_vector(void); | 44 | void xen_callback_vector(void); |
45 | void xen_hvm_init_shared_info(void); | 45 | void xen_hvm_init_shared_info(void); |
46 | void __init xen_unplug_emulated_devices(void); | 46 | void xen_unplug_emulated_devices(void); |
47 | 47 | ||
48 | void __init xen_build_dynamic_phys_to_machine(void); | 48 | void __init xen_build_dynamic_phys_to_machine(void); |
49 | 49 | ||