diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-07-04 05:00:38 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-07-04 05:00:42 -0400 |
commit | d7e57676e3ed7ab9b2c7c4bcb7873e51eacbdb84 (patch) | |
tree | f7433f38cd407a0c35a8cbf2b7e3fd756087bce7 /arch/x86 | |
parent | feaa0457ec8351cae855edc9a3052ac49322538e (diff) | |
parent | 746a99a5af60ee676afa2ba469ccd1373493c7e7 (diff) |
Merge branch 'linus' into x86/cleanups
Merge reason: We were on an older pre-rc1 base, move to almost-rc2.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
81 files changed, 1286 insertions, 929 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 73c0bda73fcd..c07f72205909 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -34,6 +34,7 @@ config X86 | |||
34 | select HAVE_DYNAMIC_FTRACE | 34 | select HAVE_DYNAMIC_FTRACE |
35 | select HAVE_FUNCTION_TRACER | 35 | select HAVE_FUNCTION_TRACER |
36 | select HAVE_FUNCTION_GRAPH_TRACER | 36 | select HAVE_FUNCTION_GRAPH_TRACER |
37 | select HAVE_FUNCTION_GRAPH_FP_TEST | ||
37 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | 38 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST |
38 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE | 39 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE |
39 | select HAVE_FTRACE_SYSCALLS | 40 | select HAVE_FTRACE_SYSCALLS |
@@ -1912,25 +1913,14 @@ config DMAR_DEFAULT_ON | |||
1912 | recommended you say N here while the DMAR code remains | 1913 | recommended you say N here while the DMAR code remains |
1913 | experimental. | 1914 | experimental. |
1914 | 1915 | ||
1915 | config DMAR_GFX_WA | ||
1916 | def_bool y | ||
1917 | prompt "Support for Graphics workaround" | ||
1918 | depends on DMAR | ||
1919 | ---help--- | ||
1920 | Current Graphics drivers tend to use physical address | ||
1921 | for DMA and avoid using DMA APIs. Setting this config | ||
1922 | option permits the IOMMU driver to set a unity map for | ||
1923 | all the OS-visible memory. Hence the driver can continue | ||
1924 | to use physical addresses for DMA. | ||
1925 | |||
1926 | config DMAR_FLOPPY_WA | 1916 | config DMAR_FLOPPY_WA |
1927 | def_bool y | 1917 | def_bool y |
1928 | depends on DMAR | 1918 | depends on DMAR |
1929 | ---help--- | 1919 | ---help--- |
1930 | Floppy disk drivers are know to bypass DMA API calls | 1920 | Floppy disk drivers are known to bypass DMA API calls |
1931 | thereby failing to work when IOMMU is enabled. This | 1921 | thereby failing to work when IOMMU is enabled. This |
1932 | workaround will setup a 1:1 mapping for the first | 1922 | workaround will setup a 1:1 mapping for the first |
1933 | 16M to make floppy (an ISA device) work. | 1923 | 16MiB to make floppy (an ISA device) work. |
1934 | 1924 | ||
1935 | config INTR_REMAP | 1925 | config INTR_REMAP |
1936 | bool "Support for Interrupt Remapping (EXPERIMENTAL)" | 1926 | bool "Support for Interrupt Remapping (EXPERIMENTAL)" |
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 507793739ea5..1dfbf64e52a2 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S | |||
@@ -13,7 +13,7 @@ | |||
13 | * touching registers they shouldn't be. | 13 | * touching registers they shouldn't be. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | .code16 | 16 | .code16gcc |
17 | .text | 17 | .text |
18 | .globl intcall | 18 | .globl intcall |
19 | .type intcall, @function | 19 | .type intcall, @function |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index caba99601703..eb0566e83319 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -845,7 +845,7 @@ ENTRY(aesni_cbc_enc) | |||
845 | */ | 845 | */ |
846 | ENTRY(aesni_cbc_dec) | 846 | ENTRY(aesni_cbc_dec) |
847 | cmp $16, LEN | 847 | cmp $16, LEN |
848 | jb .Lcbc_dec_ret | 848 | jb .Lcbc_dec_just_ret |
849 | mov 480(KEYP), KLEN | 849 | mov 480(KEYP), KLEN |
850 | add $240, KEYP | 850 | add $240, KEYP |
851 | movups (IVP), IV | 851 | movups (IVP), IV |
@@ -891,6 +891,7 @@ ENTRY(aesni_cbc_dec) | |||
891 | add $16, OUTP | 891 | add $16, OUTP |
892 | cmp $16, LEN | 892 | cmp $16, LEN |
893 | jge .Lcbc_dec_loop1 | 893 | jge .Lcbc_dec_loop1 |
894 | movups IV, (IVP) | ||
895 | .Lcbc_dec_ret: | 894 | .Lcbc_dec_ret: |
895 | movups IV, (IVP) | ||
896 | .Lcbc_dec_just_ret: | ||
896 | ret | 897 | ret |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 4e663398f77f..c580c5ec1cad 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -198,6 +198,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |||
198 | 198 | ||
199 | blkcipher_walk_init(&walk, dst, src, nbytes); | 199 | blkcipher_walk_init(&walk, dst, src, nbytes); |
200 | err = blkcipher_walk_virt(desc, &walk); | 200 | err = blkcipher_walk_virt(desc, &walk); |
201 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
201 | 202 | ||
202 | kernel_fpu_begin(); | 203 | kernel_fpu_begin(); |
203 | while ((nbytes = walk.nbytes)) { | 204 | while ((nbytes = walk.nbytes)) { |
@@ -221,6 +222,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |||
221 | 222 | ||
222 | blkcipher_walk_init(&walk, dst, src, nbytes); | 223 | blkcipher_walk_init(&walk, dst, src, nbytes); |
223 | err = blkcipher_walk_virt(desc, &walk); | 224 | err = blkcipher_walk_virt(desc, &walk); |
225 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
224 | 226 | ||
225 | kernel_fpu_begin(); | 227 | kernel_fpu_begin(); |
226 | while ((nbytes = walk.nbytes)) { | 228 | while ((nbytes = walk.nbytes)) { |
@@ -266,6 +268,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |||
266 | 268 | ||
267 | blkcipher_walk_init(&walk, dst, src, nbytes); | 269 | blkcipher_walk_init(&walk, dst, src, nbytes); |
268 | err = blkcipher_walk_virt(desc, &walk); | 270 | err = blkcipher_walk_virt(desc, &walk); |
271 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
269 | 272 | ||
270 | kernel_fpu_begin(); | 273 | kernel_fpu_begin(); |
271 | while ((nbytes = walk.nbytes)) { | 274 | while ((nbytes = walk.nbytes)) { |
@@ -289,6 +292,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |||
289 | 292 | ||
290 | blkcipher_walk_init(&walk, dst, src, nbytes); | 293 | blkcipher_walk_init(&walk, dst, src, nbytes); |
291 | err = blkcipher_walk_virt(desc, &walk); | 294 | err = blkcipher_walk_virt(desc, &walk); |
295 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
292 | 296 | ||
293 | kernel_fpu_begin(); | 297 | kernel_fpu_begin(); |
294 | while ((nbytes = walk.nbytes)) { | 298 | while ((nbytes = walk.nbytes)) { |
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 5f9781a3815f..daef6cd2b45d 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c | |||
@@ -48,7 +48,7 @@ static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, | |||
48 | struct blkcipher_desc desc = { | 48 | struct blkcipher_desc desc = { |
49 | .tfm = child, | 49 | .tfm = child, |
50 | .info = desc_in->info, | 50 | .info = desc_in->info, |
51 | .flags = desc_in->flags, | 51 | .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, |
52 | }; | 52 | }; |
53 | 53 | ||
54 | kernel_fpu_begin(); | 54 | kernel_fpu_begin(); |
@@ -67,7 +67,7 @@ static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, | |||
67 | struct blkcipher_desc desc = { | 67 | struct blkcipher_desc desc = { |
68 | .tfm = child, | 68 | .tfm = child, |
69 | .info = desc_in->info, | 69 | .info = desc_in->info, |
70 | .flags = desc_in->flags, | 70 | .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, |
71 | }; | 71 | }; |
72 | 72 | ||
73 | kernel_fpu_begin(); | 73 | kernel_fpu_begin(); |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 4518dc500903..20d1465a2ab0 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -144,6 +144,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) | |||
144 | 144 | ||
145 | #else /* !CONFIG_ACPI */ | 145 | #else /* !CONFIG_ACPI */ |
146 | 146 | ||
147 | #define acpi_disabled 1 | ||
147 | #define acpi_lapic 0 | 148 | #define acpi_lapic 0 |
148 | #define acpi_ioapic 0 | 149 | #define acpi_ioapic 0 |
149 | static inline void acpi_noirq_set(void) { } | 150 | static inline void acpi_noirq_set(void) { } |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 262e02820049..bdf96f119f06 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -29,9 +29,11 @@ extern void amd_iommu_detect(void); | |||
29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); |
30 | extern void amd_iommu_flush_all_domains(void); | 30 | extern void amd_iommu_flush_all_domains(void); |
31 | extern void amd_iommu_flush_all_devices(void); | 31 | extern void amd_iommu_flush_all_devices(void); |
32 | extern void amd_iommu_shutdown(void); | ||
32 | #else | 33 | #else |
33 | static inline int amd_iommu_init(void) { return -ENODEV; } | 34 | static inline int amd_iommu_init(void) { return -ENODEV; } |
34 | static inline void amd_iommu_detect(void) { } | 35 | static inline void amd_iommu_detect(void) { } |
36 | static inline void amd_iommu_shutdown(void) { } | ||
35 | #endif | 37 | #endif |
36 | 38 | ||
37 | #endif /* _ASM_X86_AMD_IOMMU_H */ | 39 | #endif /* _ASM_X86_AMD_IOMMU_H */ |
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 8cb9c814e120..2503d4e64c2a 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -257,7 +257,7 @@ typedef struct { | |||
257 | 257 | ||
258 | /** | 258 | /** |
259 | * atomic64_read - read atomic64 variable | 259 | * atomic64_read - read atomic64 variable |
260 | * @v: pointer of type atomic64_t | 260 | * @ptr: pointer of type atomic64_t |
261 | * | 261 | * |
262 | * Atomically reads the value of @v. | 262 | * Atomically reads the value of @v. |
263 | * Doesn't imply a read memory barrier. | 263 | * Doesn't imply a read memory barrier. |
@@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | |||
294 | * atomic64_xchg - xchg atomic64 variable | 294 | * atomic64_xchg - xchg atomic64 variable |
295 | * @ptr: pointer to type atomic64_t | 295 | * @ptr: pointer to type atomic64_t |
296 | * @new_val: value to assign | 296 | * @new_val: value to assign |
297 | * @old_val: old value that was there | ||
298 | * | 297 | * |
299 | * Atomically xchgs the value of @ptr to @new_val and returns | 298 | * Atomically xchgs the value of @ptr to @new_val and returns |
300 | * the old value. | 299 | * the old value. |
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 418e632d4a80..7a1065958ba9 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -8,7 +8,7 @@ | |||
8 | 8 | ||
9 | #ifdef __KERNEL__ | 9 | #ifdef __KERNEL__ |
10 | 10 | ||
11 | #include <asm/page_types.h> | 11 | #include <asm/pgtable_types.h> |
12 | 12 | ||
13 | /* Physical address where kernel should be loaded. */ | 13 | /* Physical address where kernel should be loaded. */ |
14 | #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ | 14 | #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ |
@@ -16,10 +16,10 @@ | |||
16 | & ~(CONFIG_PHYSICAL_ALIGN - 1)) | 16 | & ~(CONFIG_PHYSICAL_ALIGN - 1)) |
17 | 17 | ||
18 | /* Minimum kernel alignment, as a power of two */ | 18 | /* Minimum kernel alignment, as a power of two */ |
19 | #ifdef CONFIG_x86_64 | 19 | #ifdef CONFIG_X86_64 |
20 | #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT | 20 | #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT |
21 | #else | 21 | #else |
22 | #define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) | 22 | #define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) |
23 | #endif | 23 | #endif |
24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) |
25 | 25 | ||
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index c45f415ce315..c993e9e0fed4 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_DESC_H | 1 | #ifndef _ASM_X86_DESC_H |
2 | #define _ASM_X86_DESC_H | 2 | #define _ASM_X86_DESC_H |
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | ||
5 | #include <asm/desc_defs.h> | 4 | #include <asm/desc_defs.h> |
6 | #include <asm/ldt.h> | 5 | #include <asm/ldt.h> |
7 | #include <asm/mmu.h> | 6 | #include <asm/mmu.h> |
@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) | |||
380 | _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); | 379 | _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); |
381 | } | 380 | } |
382 | 381 | ||
383 | #else | ||
384 | /* | ||
385 | * GET_DESC_BASE reads the descriptor base of the specified segment. | ||
386 | * | ||
387 | * Args: | ||
388 | * idx - descriptor index | ||
389 | * gdt - GDT pointer | ||
390 | * base - 32bit register to which the base will be written | ||
391 | * lo_w - lo word of the "base" register | ||
392 | * lo_b - lo byte of the "base" register | ||
393 | * hi_b - hi byte of the low word of the "base" register | ||
394 | * | ||
395 | * Example: | ||
396 | * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | ||
397 | * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. | ||
398 | */ | ||
399 | #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ | ||
400 | movb idx * 8 + 4(gdt), lo_b; \ | ||
401 | movb idx * 8 + 7(gdt), hi_b; \ | ||
402 | shll $16, base; \ | ||
403 | movw idx * 8 + 2(gdt), lo_w; | ||
404 | |||
405 | |||
406 | #endif /* __ASSEMBLY__ */ | ||
407 | |||
408 | #endif /* _ASM_X86_DESC_H */ | 382 | #endif /* _ASM_X86_DESC_H */ |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index af326a2975b5..fd6d21bbee6c 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -6,6 +6,7 @@ extern void no_iommu_init(void); | |||
6 | extern struct dma_map_ops nommu_dma_ops; | 6 | extern struct dma_map_ops nommu_dma_ops; |
7 | extern int force_iommu, no_iommu; | 7 | extern int force_iommu, no_iommu; |
8 | extern int iommu_detected; | 8 | extern int iommu_detected; |
9 | extern int iommu_pass_through; | ||
9 | 10 | ||
10 | /* 10 seconds */ | 11 | /* 10 seconds */ |
11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 12 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 540a466e50f5..5cdd8d100ec9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -102,15 +102,39 @@ struct mce_log { | |||
102 | 102 | ||
103 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
104 | 104 | ||
105 | #include <linux/percpu.h> | ||
106 | #include <linux/init.h> | ||
107 | #include <asm/atomic.h> | ||
108 | |||
105 | extern int mce_disabled; | 109 | extern int mce_disabled; |
110 | extern int mce_p5_enabled; | ||
106 | 111 | ||
107 | #include <asm/atomic.h> | 112 | #ifdef CONFIG_X86_MCE |
108 | #include <linux/percpu.h> | 113 | void mcheck_init(struct cpuinfo_x86 *c); |
114 | #else | ||
115 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | ||
116 | #endif | ||
117 | |||
118 | #ifdef CONFIG_X86_OLD_MCE | ||
119 | extern int nr_mce_banks; | ||
120 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
121 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
122 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
123 | #endif | ||
124 | |||
125 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
126 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
127 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
128 | static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } | ||
129 | #else | ||
130 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
131 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
132 | static inline void enable_p5_mce(void) {} | ||
133 | #endif | ||
109 | 134 | ||
110 | void mce_setup(struct mce *m); | 135 | void mce_setup(struct mce *m); |
111 | void mce_log(struct mce *m); | 136 | void mce_log(struct mce *m); |
112 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 137 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
113 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
114 | 138 | ||
115 | /* | 139 | /* |
116 | * To support more than 128 would need to escape the predefined | 140 | * To support more than 128 would need to escape the predefined |
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); | |||
145 | DECLARE_PER_CPU(unsigned, mce_exception_count); | 169 | DECLARE_PER_CPU(unsigned, mce_exception_count); |
146 | DECLARE_PER_CPU(unsigned, mce_poll_count); | 170 | DECLARE_PER_CPU(unsigned, mce_poll_count); |
147 | 171 | ||
148 | void mce_log_therm_throt_event(__u64 status); | ||
149 | |||
150 | extern atomic_t mce_entry; | 172 | extern atomic_t mce_entry; |
151 | 173 | ||
152 | void do_machine_check(struct pt_regs *, long); | ||
153 | |||
154 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); | 174 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); |
155 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | 175 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); |
156 | 176 | ||
@@ -167,13 +187,32 @@ void mce_notify_process(void); | |||
167 | DECLARE_PER_CPU(struct mce, injectm); | 187 | DECLARE_PER_CPU(struct mce, injectm); |
168 | extern struct file_operations mce_chrdev_ops; | 188 | extern struct file_operations mce_chrdev_ops; |
169 | 189 | ||
170 | #ifdef CONFIG_X86_MCE | 190 | /* |
171 | void mcheck_init(struct cpuinfo_x86 *c); | 191 | * Exception handler |
172 | #else | 192 | */ |
173 | #define mcheck_init(c) do { } while (0) | 193 | |
174 | #endif | 194 | /* Call the installed machine check handler for this CPU setup. */ |
195 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
196 | void do_machine_check(struct pt_regs *, long); | ||
197 | |||
198 | /* | ||
199 | * Threshold handler | ||
200 | */ | ||
175 | 201 | ||
176 | extern void (*mce_threshold_vector)(void); | 202 | extern void (*mce_threshold_vector)(void); |
203 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
204 | |||
205 | /* | ||
206 | * Thermal handler | ||
207 | */ | ||
208 | |||
209 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
210 | |||
211 | #ifdef CONFIG_X86_NEW_MCE | ||
212 | void mce_log_therm_throt_event(__u64 status); | ||
213 | #else | ||
214 | static inline void mce_log_therm_throt_event(__u64 status) {} | ||
215 | #endif | ||
177 | 216 | ||
178 | #endif /* __KERNEL__ */ | 217 | #endif /* __KERNEL__ */ |
179 | #endif /* _ASM_X86_MCE_H */ | 218 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 22603764e7db..48ad9d29484a 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -3,13 +3,10 @@ | |||
3 | 3 | ||
4 | #include <asm/msr-index.h> | 4 | #include <asm/msr-index.h> |
5 | 5 | ||
6 | #ifndef __ASSEMBLY__ | ||
7 | # include <linux/types.h> | ||
8 | #endif | ||
9 | |||
10 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
11 | #ifndef __ASSEMBLY__ | 7 | #ifndef __ASSEMBLY__ |
12 | 8 | ||
9 | #include <linux/types.h> | ||
13 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
14 | #include <asm/errno.h> | 11 | #include <asm/errno.h> |
15 | #include <asm/cpumask.h> | 12 | #include <asm/cpumask.h> |
@@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
264 | #endif /* CONFIG_SMP */ | 261 | #endif /* CONFIG_SMP */ |
265 | #endif /* __ASSEMBLY__ */ | 262 | #endif /* __ASSEMBLY__ */ |
266 | #endif /* __KERNEL__ */ | 263 | #endif /* __KERNEL__ */ |
267 | |||
268 | |||
269 | #endif /* _ASM_X86_MSR_H */ | 264 | #endif /* _ASM_X86_MSR_H */ |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8d382d3abf38..7639dbf5d223 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -41,7 +41,7 @@ | |||
41 | 41 | ||
42 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ | 42 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ |
43 | #define __PHYSICAL_MASK_SHIFT 46 | 43 | #define __PHYSICAL_MASK_SHIFT 46 |
44 | #define __VIRTUAL_MASK_SHIFT 48 | 44 | #define __VIRTUAL_MASK_SHIFT 47 |
45 | 45 | ||
46 | /* | 46 | /* |
47 | * Kernel image size is limited to 512 MB (see level2_kernel_pgt in | 47 | * Kernel image size is limited to 512 MB (see level2_kernel_pgt in |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index b51a1e8b0baf..1ff685ca221c 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -91,7 +91,7 @@ extern void pci_iommu_alloc(void); | |||
91 | 91 | ||
92 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) | 92 | #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) |
93 | 93 | ||
94 | #if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) | 94 | #if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) |
95 | 95 | ||
96 | #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ | 96 | #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ |
97 | dma_addr_t ADDR_NAME; | 97 | dma_addr_t ADDR_NAME; |
@@ -130,6 +130,7 @@ extern void pci_iommu_alloc(void); | |||
130 | 130 | ||
131 | /* generic pci stuff */ | 131 | /* generic pci stuff */ |
132 | #include <asm-generic/pci.h> | 132 | #include <asm-generic/pci.h> |
133 | #define PCIBIOS_MAX_MEM_32 0xffffffff | ||
133 | 134 | ||
134 | #ifdef CONFIG_NUMA | 135 | #ifdef CONFIG_NUMA |
135 | /* Returns the node based on pci bus */ | 136 | /* Returns the node based on pci bus */ |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index e60fd3e14bdf..b399988eee3a 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -121,6 +121,9 @@ extern int __init pcibios_init(void); | |||
121 | extern int __init pci_mmcfg_arch_init(void); | 121 | extern int __init pci_mmcfg_arch_init(void); |
122 | extern void __init pci_mmcfg_arch_free(void); | 122 | extern void __init pci_mmcfg_arch_free(void); |
123 | 123 | ||
124 | extern struct acpi_mcfg_allocation *pci_mmcfg_config; | ||
125 | extern int pci_mmcfg_config_num; | ||
126 | |||
124 | /* | 127 | /* |
125 | * AMD Fam10h CPUs are buggy, and cannot access MMIO config space | 128 | * AMD Fam10h CPUs are buggy, and cannot access MMIO config space |
126 | * on their northbrige except through the * %eax register. As such, you MUST | 129 | * on their northbrige except through the * %eax register. As such, you MUST |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 02ecb30982a3..103f1ddb0d85 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -42,6 +42,7 @@ | |||
42 | 42 | ||
43 | #else /* ...!ASSEMBLY */ | 43 | #else /* ...!ASSEMBLY */ |
44 | 44 | ||
45 | #include <linux/kernel.h> | ||
45 | #include <linux/stringify.h> | 46 | #include <linux/stringify.h> |
46 | 47 | ||
47 | #ifdef CONFIG_SMP | 48 | #ifdef CONFIG_SMP |
@@ -155,6 +156,15 @@ do { \ | |||
155 | /* We can use this directly for local CPU (faster). */ | 156 | /* We can use this directly for local CPU (faster). */ |
156 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | 157 | DECLARE_PER_CPU(unsigned long, this_cpu_off); |
157 | 158 | ||
159 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
160 | void *pcpu_lpage_remapped(void *kaddr); | ||
161 | #else | ||
162 | static inline void *pcpu_lpage_remapped(void *kaddr) | ||
163 | { | ||
164 | return NULL; | ||
165 | } | ||
166 | #endif | ||
167 | |||
158 | #endif /* !__ASSEMBLY__ */ | 168 | #endif /* !__ASSEMBLY__ */ |
159 | 169 | ||
160 | #ifdef CONFIG_SMP | 170 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..fa64e401589d 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -84,14 +84,12 @@ union cpuid10_edx { | |||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | 85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) |
86 | 86 | ||
87 | extern void set_perf_counter_pending(void); | ||
88 | |||
89 | #define clear_perf_counter_pending() do { } while (0) | ||
90 | #define test_perf_counter_pending() (0) | ||
91 | |||
92 | #ifdef CONFIG_PERF_COUNTERS | 87 | #ifdef CONFIG_PERF_COUNTERS |
93 | extern void init_hw_perf_counters(void); | 88 | extern void init_hw_perf_counters(void); |
94 | extern void perf_counters_lapic_init(void); | 89 | extern void perf_counters_lapic_init(void); |
90 | |||
91 | #define PERF_COUNTER_INDEX_OFFSET 0 | ||
92 | |||
95 | #else | 93 | #else |
96 | static inline void init_hw_perf_counters(void) { } | 94 | static inline void init_hw_perf_counters(void) { } |
97 | static inline void perf_counters_lapic_init(void) { } | 95 | static inline void perf_counters_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); | |||
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | #if defined(CONFIG_HIGHPTE) | 51 | #if defined(CONFIG_HIGHPTE) |
52 | #define __KM_PTE \ | ||
53 | (in_nmi() ? KM_NMI_PTE : \ | ||
54 | in_irq() ? KM_IRQ_PTE : \ | ||
55 | KM_PTE0) | ||
52 | #define pte_offset_map(dir, address) \ | 56 | #define pte_offset_map(dir, address) \ |
53 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ | 57 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ |
54 | pte_index((address))) | 58 | pte_index((address))) |
55 | #define pte_offset_map_nested(dir, address) \ | 59 | #define pte_offset_map_nested(dir, address) \ |
56 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ | 60 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ |
57 | pte_index((address))) | 61 | pte_index((address))) |
58 | #define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) | 62 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) |
59 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) | 63 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) |
60 | #else | 64 | #else |
61 | #define pte_offset_map(dir, address) \ | 65 | #define pte_offset_map(dir, address) \ |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index abde308fdb0f..c57a30117149 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -165,10 +165,7 @@ extern void cleanup_highmap(void); | |||
165 | 165 | ||
166 | /* fs/proc/kcore.c */ | 166 | /* fs/proc/kcore.c */ |
167 | #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) | 167 | #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) |
168 | #define kc_offset_to_vaddr(o) \ | 168 | #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) |
169 | (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \ | ||
170 | ? ((o) | ~__VIRTUAL_MASK) \ | ||
171 | : (o)) | ||
172 | 169 | ||
173 | #define __HAVE_ARCH_PTE_SAME | 170 | #define __HAVE_ARCH_PTE_SAME |
174 | #endif /* !__ASSEMBLY__ */ | 171 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 49fb3ecf3bb3..621f56d73121 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -22,7 +22,14 @@ extern int reboot_force; | |||
22 | 22 | ||
23 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); | 23 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); |
24 | 24 | ||
25 | #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) | 25 | /* |
26 | #define round_down(x, y) ((x) & ~((y) - 1)) | 26 | * This looks more complex than it should be. But we need to |
27 | * get the type for the ~ right in round_down (it needs to be | ||
28 | * as wide as the result!), and we want to evaluate the macro | ||
29 | * arguments just once each. | ||
30 | */ | ||
31 | #define __round_mask(x,y) ((__typeof__(x))((y)-1)) | ||
32 | #define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1) | ||
33 | #define round_down(x,y) ((x) & ~__round_mask(x,y)) | ||
27 | 34 | ||
28 | #endif /* _ASM_X86_PROTO_H */ | 35 | #endif /* _ASM_X86_PROTO_H */ |
diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h deleted file mode 100644 index c62349ee7860..000000000000 --- a/arch/x86/include/asm/therm_throt.h +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | #ifndef _ASM_X86_THERM_THROT_H | ||
2 | #define _ASM_X86_THERM_THROT_H | ||
3 | |||
4 | #include <asm/atomic.h> | ||
5 | |||
6 | extern atomic_t therm_throt_en; | ||
7 | int therm_throt_process(int curr); | ||
8 | |||
9 | #endif /* _ASM_X86_THERM_THROT_H */ | ||
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index bd37ed444a21..20ca9c4d4686 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -45,12 +45,16 @@ extern int no_timer_check; | |||
45 | */ | 45 | */ |
46 | 46 | ||
47 | DECLARE_PER_CPU(unsigned long, cyc2ns); | 47 | DECLARE_PER_CPU(unsigned long, cyc2ns); |
48 | DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
48 | 49 | ||
49 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
50 | 51 | ||
51 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) | 52 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) |
52 | { | 53 | { |
53 | return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; | 54 | int cpu = smp_processor_id(); |
55 | unsigned long long ns = per_cpu(cyc2ns_offset, cpu); | ||
56 | ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR; | ||
57 | return ns; | ||
54 | } | 58 | } |
55 | 59 | ||
56 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | 60 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5c..20e6a795e160 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) | 25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) |
26 | 26 | ||
27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) | 27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) |
28 | #define USER_DS MAKE_MM_SEG(PAGE_OFFSET) | 28 | #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) |
29 | 29 | ||
30 | #define get_ds() (KERNEL_DS) | 30 | #define get_ds() (KERNEL_DS) |
31 | #define get_fs() (current_thread_info()->addr_limit) | 31 | #define get_fs() (current_thread_info()->addr_limit) |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 631086159c53..6b8ca3a0285d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -44,11 +44,7 @@ | |||
44 | 44 | ||
45 | static int __initdata acpi_force = 0; | 45 | static int __initdata acpi_force = 0; |
46 | u32 acpi_rsdt_forced; | 46 | u32 acpi_rsdt_forced; |
47 | #ifdef CONFIG_ACPI | 47 | int acpi_disabled; |
48 | int acpi_disabled = 0; | ||
49 | #else | ||
50 | int acpi_disabled = 1; | ||
51 | #endif | ||
52 | EXPORT_SYMBOL(acpi_disabled); | 48 | EXPORT_SYMBOL(acpi_disabled); |
53 | 49 | ||
54 | #ifdef CONFIG_X86_64 | 50 | #ifdef CONFIG_X86_64 |
@@ -122,72 +118,6 @@ void __init __acpi_unmap_table(char *map, unsigned long size) | |||
122 | early_iounmap(map, size); | 118 | early_iounmap(map, size); |
123 | } | 119 | } |
124 | 120 | ||
125 | #ifdef CONFIG_PCI_MMCONFIG | ||
126 | |||
127 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
128 | |||
129 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ | ||
130 | struct acpi_mcfg_allocation *pci_mmcfg_config; | ||
131 | int pci_mmcfg_config_num; | ||
132 | |||
133 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
134 | { | ||
135 | if (!strcmp(mcfg->header.oem_id, "SGI")) | ||
136 | acpi_mcfg_64bit_base_addr = TRUE; | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | int __init acpi_parse_mcfg(struct acpi_table_header *header) | ||
142 | { | ||
143 | struct acpi_table_mcfg *mcfg; | ||
144 | unsigned long i; | ||
145 | int config_size; | ||
146 | |||
147 | if (!header) | ||
148 | return -EINVAL; | ||
149 | |||
150 | mcfg = (struct acpi_table_mcfg *)header; | ||
151 | |||
152 | /* how many config structures do we have */ | ||
153 | pci_mmcfg_config_num = 0; | ||
154 | i = header->length - sizeof(struct acpi_table_mcfg); | ||
155 | while (i >= sizeof(struct acpi_mcfg_allocation)) { | ||
156 | ++pci_mmcfg_config_num; | ||
157 | i -= sizeof(struct acpi_mcfg_allocation); | ||
158 | }; | ||
159 | if (pci_mmcfg_config_num == 0) { | ||
160 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); | ||
161 | return -ENODEV; | ||
162 | } | ||
163 | |||
164 | config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); | ||
165 | pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); | ||
166 | if (!pci_mmcfg_config) { | ||
167 | printk(KERN_WARNING PREFIX | ||
168 | "No memory for MCFG config tables\n"); | ||
169 | return -ENOMEM; | ||
170 | } | ||
171 | |||
172 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | ||
173 | |||
174 | acpi_mcfg_oem_check(mcfg); | ||
175 | |||
176 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | ||
177 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && | ||
178 | !acpi_mcfg_64bit_base_addr) { | ||
179 | printk(KERN_ERR PREFIX | ||
180 | "MMCONFIG not in low 4GB of memory\n"); | ||
181 | kfree(pci_mmcfg_config); | ||
182 | pci_mmcfg_config_num = 0; | ||
183 | return -ENODEV; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | #endif /* CONFIG_PCI_MMCONFIG */ | ||
190 | |||
191 | #ifdef CONFIG_X86_LOCAL_APIC | 121 | #ifdef CONFIG_X86_LOCAL_APIC |
192 | static int __init acpi_parse_madt(struct acpi_table_header *table) | 122 | static int __init acpi_parse_madt(struct acpi_table_header *table) |
193 | { | 123 | { |
@@ -1519,14 +1449,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1519 | }, | 1449 | }, |
1520 | { | 1450 | { |
1521 | .callback = force_acpi_ht, | 1451 | .callback = force_acpi_ht, |
1522 | .ident = "ASUS P4B266", | ||
1523 | .matches = { | ||
1524 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
1525 | DMI_MATCH(DMI_BOARD_NAME, "P4B266"), | ||
1526 | }, | ||
1527 | }, | ||
1528 | { | ||
1529 | .callback = force_acpi_ht, | ||
1530 | .ident = "ASUS P2B-DS", | 1452 | .ident = "ASUS P2B-DS", |
1531 | .matches = { | 1453 | .matches = { |
1532 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | 1454 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index bbbe4bbb6f34..8c44c232efcb 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -34,12 +34,22 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, | |||
34 | flags->bm_check = 1; | 34 | flags->bm_check = 1; |
35 | else if (c->x86_vendor == X86_VENDOR_INTEL) { | 35 | else if (c->x86_vendor == X86_VENDOR_INTEL) { |
36 | /* | 36 | /* |
37 | * Today all CPUs that support C3 share cache. | 37 | * Today all MP CPUs that support C3 share cache. |
38 | * TBD: This needs to look at cache shared map, once | 38 | * And caches should not be flushed by software while |
39 | * multi-core detection patch makes to the base. | 39 | * entering C3 type state. |
40 | */ | 40 | */ |
41 | flags->bm_check = 1; | 41 | flags->bm_check = 1; |
42 | } | 42 | } |
43 | |||
44 | /* | ||
45 | * On all recent Intel platforms, ARB_DISABLE is a nop. | ||
46 | * So, set bm_control to zero to indicate that ARB_DISABLE | ||
47 | * is not required while entering C3 type state on | ||
48 | * P4, Core and beyond CPUs | ||
49 | */ | ||
50 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
51 | (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14))) | ||
52 | flags->bm_control = 0; | ||
43 | } | 53 | } |
44 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); | 54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); |
45 | 55 | ||
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 7c074eec39fb..d296f4a195c9 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c | |||
@@ -72,6 +72,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) | |||
72 | return; | 72 | return; |
73 | } | 73 | } |
74 | 74 | ||
75 | |||
75 | /* Initialize _PDC data based on the CPU vendor */ | 76 | /* Initialize _PDC data based on the CPU vendor */ |
76 | void arch_acpi_processor_init_pdc(struct acpi_processor *pr) | 77 | void arch_acpi_processor_init_pdc(struct acpi_processor *pr) |
77 | { | 78 | { |
@@ -85,3 +86,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) | |||
85 | } | 86 | } |
86 | 87 | ||
87 | EXPORT_SYMBOL(arch_acpi_processor_init_pdc); | 88 | EXPORT_SYMBOL(arch_acpi_processor_init_pdc); |
89 | |||
90 | void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) | ||
91 | { | ||
92 | if (pr->pdc) { | ||
93 | kfree(pr->pdc->pointer->buffer.pointer); | ||
94 | kfree(pr->pdc->pointer); | ||
95 | kfree(pr->pdc); | ||
96 | pr->pdc = NULL; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 1c60554537c3..9372f0406ad4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -434,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | |||
434 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 434 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); |
435 | } | 435 | } |
436 | 436 | ||
437 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | ||
438 | static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | ||
439 | { | ||
440 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
441 | |||
442 | INC_STATS_COUNTER(domain_flush_single); | ||
443 | |||
444 | iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); | ||
445 | } | ||
446 | |||
437 | /* | 447 | /* |
438 | * This function is used to flush the IO/TLB for a given protection domain | 448 | * This function is used to flush the IO/TLB for a given protection domain |
439 | * on every IOMMU in the system | 449 | * on every IOMMU in the system |
@@ -1078,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu, | |||
1078 | amd_iommu_pd_table[devid] = domain; | 1088 | amd_iommu_pd_table[devid] = domain; |
1079 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1080 | 1090 | ||
1091 | /* | ||
1092 | * We might boot into a crash-kernel here. The crashed kernel | ||
1093 | * left the caches in the IOMMU dirty. So we have to flush | ||
1094 | * here to evict all dirty stuff. | ||
1095 | */ | ||
1081 | iommu_queue_inv_dev_entry(iommu, devid); | 1096 | iommu_queue_inv_dev_entry(iommu, devid); |
1097 | iommu_flush_tlb_pde(iommu, domain->id); | ||
1082 | } | 1098 | } |
1083 | 1099 | ||
1084 | /* | 1100 | /* |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 238989ec077d..10b2accd12ea 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -260,6 +260,14 @@ static void iommu_enable(struct amd_iommu *iommu) | |||
260 | 260 | ||
261 | static void iommu_disable(struct amd_iommu *iommu) | 261 | static void iommu_disable(struct amd_iommu *iommu) |
262 | { | 262 | { |
263 | /* Disable command buffer */ | ||
264 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
265 | |||
266 | /* Disable event logging and event interrupts */ | ||
267 | iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); | ||
268 | iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); | ||
269 | |||
270 | /* Disable IOMMU hardware itself */ | ||
263 | iommu_feature_disable(iommu, CONTROL_IOMMU_EN); | 271 | iommu_feature_disable(iommu, CONTROL_IOMMU_EN); |
264 | } | 272 | } |
265 | 273 | ||
@@ -478,6 +486,10 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) | |||
478 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | 486 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, |
479 | &entry, sizeof(entry)); | 487 | &entry, sizeof(entry)); |
480 | 488 | ||
489 | /* set head and tail to zero manually */ | ||
490 | writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
491 | writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
492 | |||
481 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | 493 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); |
482 | } | 494 | } |
483 | 495 | ||
@@ -1042,6 +1054,7 @@ static void enable_iommus(void) | |||
1042 | struct amd_iommu *iommu; | 1054 | struct amd_iommu *iommu; |
1043 | 1055 | ||
1044 | for_each_iommu(iommu) { | 1056 | for_each_iommu(iommu) { |
1057 | iommu_disable(iommu); | ||
1045 | iommu_set_device_table(iommu); | 1058 | iommu_set_device_table(iommu); |
1046 | iommu_enable_command_buffer(iommu); | 1059 | iommu_enable_command_buffer(iommu); |
1047 | iommu_enable_event_buffer(iommu); | 1060 | iommu_enable_event_buffer(iommu); |
@@ -1066,12 +1079,6 @@ static void disable_iommus(void) | |||
1066 | 1079 | ||
1067 | static int amd_iommu_resume(struct sys_device *dev) | 1080 | static int amd_iommu_resume(struct sys_device *dev) |
1068 | { | 1081 | { |
1069 | /* | ||
1070 | * Disable IOMMUs before reprogramming the hardware registers. | ||
1071 | * IOMMU is still enabled from the resume kernel. | ||
1072 | */ | ||
1073 | disable_iommus(); | ||
1074 | |||
1075 | /* re-load the hardware */ | 1082 | /* re-load the hardware */ |
1076 | enable_iommus(); | 1083 | enable_iommus(); |
1077 | 1084 | ||
@@ -1079,8 +1086,8 @@ static int amd_iommu_resume(struct sys_device *dev) | |||
1079 | * we have to flush after the IOMMUs are enabled because a | 1086 | * we have to flush after the IOMMUs are enabled because a |
1080 | * disabled IOMMU will never execute the commands we send | 1087 | * disabled IOMMU will never execute the commands we send |
1081 | */ | 1088 | */ |
1082 | amd_iommu_flush_all_domains(); | ||
1083 | amd_iommu_flush_all_devices(); | 1089 | amd_iommu_flush_all_devices(); |
1090 | amd_iommu_flush_all_domains(); | ||
1084 | 1091 | ||
1085 | return 0; | 1092 | return 0; |
1086 | } | 1093 | } |
@@ -1273,6 +1280,11 @@ free: | |||
1273 | goto out; | 1280 | goto out; |
1274 | } | 1281 | } |
1275 | 1282 | ||
1283 | void amd_iommu_shutdown(void) | ||
1284 | { | ||
1285 | disable_iommus(); | ||
1286 | } | ||
1287 | |||
1276 | /**************************************************************************** | 1288 | /**************************************************************************** |
1277 | * | 1289 | * |
1278 | * Early detect code. This code runs at IOMMU detection time in the DMA | 1290 | * Early detect code. This code runs at IOMMU detection time in the DMA |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ef8d9290c7ea..4d0216fcb36c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -462,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | |||
462 | static void | 462 | static void |
463 | __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 463 | __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
464 | { | 464 | { |
465 | union entry_union eu; | 465 | union entry_union eu = {{0, 0}}; |
466 | |||
466 | eu.entry = e; | 467 | eu.entry = e; |
467 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | 468 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
468 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 469 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
@@ -1413,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq, | |||
1413 | irte.vector = vector; | 1414 | irte.vector = vector; |
1414 | irte.dest_id = IRTE_DEST(destination); | 1415 | irte.dest_id = IRTE_DEST(destination); |
1415 | 1416 | ||
1417 | /* Set source-id of interrupt request */ | ||
1418 | set_ioapic_sid(&irte, apic_id); | ||
1419 | |||
1416 | modify_irte(irq, &irte); | 1420 | modify_irte(irq, &irte); |
1417 | 1421 | ||
1418 | ir_entry->index2 = (index >> 15) & 0x1; | 1422 | ir_entry->index2 = (index >> 15) & 0x1; |
@@ -2003,7 +2007,9 @@ void disable_IO_APIC(void) | |||
2003 | /* | 2007 | /* |
2004 | * Use virtual wire A mode when interrupt remapping is enabled. | 2008 | * Use virtual wire A mode when interrupt remapping is enabled. |
2005 | */ | 2009 | */ |
2006 | disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); | 2010 | if (cpu_has_apic) |
2011 | disconnect_bsp_APIC(!intr_remapping_enabled && | ||
2012 | ioapic_i8259.pin != -1); | ||
2007 | } | 2013 | } |
2008 | 2014 | ||
2009 | #ifdef CONFIG_X86_32 | 2015 | #ifdef CONFIG_X86_32 |
@@ -3287,6 +3293,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3287 | irte.vector = cfg->vector; | 3293 | irte.vector = cfg->vector; |
3288 | irte.dest_id = IRTE_DEST(dest); | 3294 | irte.dest_id = IRTE_DEST(dest); |
3289 | 3295 | ||
3296 | /* Set source-id of interrupt request */ | ||
3297 | set_msi_sid(&irte, pdev); | ||
3298 | |||
3290 | modify_irte(irq, &irte); | 3299 | modify_irte(irq, &irte); |
3291 | 3300 | ||
3292 | msg->address_hi = MSI_ADDR_BASE_HI; | 3301 | msg->address_hi = MSI_ADDR_BASE_HI; |
@@ -3567,7 +3576,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3567 | 3576 | ||
3568 | #endif /* CONFIG_SMP */ | 3577 | #endif /* CONFIG_SMP */ |
3569 | 3578 | ||
3570 | struct irq_chip dmar_msi_type = { | 3579 | static struct irq_chip dmar_msi_type = { |
3571 | .name = "DMAR_MSI", | 3580 | .name = "DMAR_MSI", |
3572 | .unmask = dmar_msi_unmask, | 3581 | .unmask = dmar_msi_unmask, |
3573 | .mask = dmar_msi_mask, | 3582 | .mask = dmar_msi_mask, |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 440a8bccd91a..0c0182cc947d 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -20,23 +20,12 @@ | |||
20 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
21 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
22 | 22 | ||
23 | #include <linux/threads.h> | ||
24 | #include <linux/cpumask.h> | ||
25 | #include <asm/mpspec.h> | ||
26 | #include <asm/fixmap.h> | ||
27 | #include <asm/apicdef.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
31 | #include <linux/init.h> | ||
32 | #include <asm/ipi.h> | 24 | #include <asm/ipi.h> |
33 | 25 | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> |
37 | #include <asm/acpi.h> | 27 | #include <asm/acpi.h> |
38 | #include <asm/e820.h> | 28 | #include <asm/e820.h> |
39 | #include <asm/setup.h> | ||
40 | 29 | ||
41 | #ifdef CONFIG_HOTPLUG_CPU | 30 | #ifdef CONFIG_HOTPLUG_CPU |
42 | #define DEFAULT_SEND_IPI (1) | 31 | #define DEFAULT_SEND_IPI (1) |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 344eee4ac0a4..eafdfbd1ea95 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <asm/ipi.h> | 44 | #include <asm/ipi.h> |
45 | #include <linux/kernel.h> | 45 | #include <linux/kernel.h> |
46 | #include <linux/string.h> | 46 | #include <linux/string.h> |
47 | #include <linux/init.h> | ||
48 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
49 | #include <linux/smp.h> | 48 | #include <linux/smp.h> |
50 | 49 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e5b27d8f1b47..28e5f5956042 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
258 | { | 258 | { |
259 | #ifdef CONFIG_X86_HT | 259 | #ifdef CONFIG_X86_HT |
260 | unsigned bits; | 260 | unsigned bits; |
261 | int cpu = smp_processor_id(); | ||
261 | 262 | ||
262 | bits = c->x86_coreid_bits; | 263 | bits = c->x86_coreid_bits; |
263 | |||
264 | /* Low order bits define the core id (index of core in socket) */ | 264 | /* Low order bits define the core id (index of core in socket) */ |
265 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | 265 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); |
266 | /* Convert the initial APIC ID into the socket ID */ | 266 | /* Convert the initial APIC ID into the socket ID */ |
267 | c->phys_proc_id = c->initial_apicid >> bits; | 267 | c->phys_proc_id = c->initial_apicid >> bits; |
268 | /* use socket ID also for last level cache */ | ||
269 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | ||
268 | #endif | 270 | #endif |
269 | } | 271 | } |
270 | 272 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9fa33886c0d7..f1961c07af9a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
108 | /* data */ | 108 | /* data */ |
109 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | 109 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, |
110 | 110 | ||
111 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 111 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
112 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, | 112 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, |
113 | GDT_STACK_CANARY_INIT | 113 | GDT_STACK_CANARY_INIT |
114 | #endif | 114 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 45004faf67ea..188a1ca5ad2b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,11 +1,12 @@ | |||
1 | obj-y = mce.o therm_throt.o | 1 | obj-y = mce.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o | 3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o |
4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o | 4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o |
5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | 5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o | 6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
7 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o | 7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
8 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o | ||
9 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
10 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
11 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
11 | |||
12 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index 89e510424152..b945d5dbc609 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -10,10 +10,9 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* Machine Check Handler For AMD Athlon/Duron: */ | 16 | /* Machine Check Handler For AMD Athlon/Duron: */ |
18 | static void k7_machine_check(struct pt_regs *regs, long error_code) | 17 | static void k7_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 18 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index fabba15e4558..af425b83202b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
45 | 45 | ||
46 | #include "mce-internal.h" | 46 | #include "mce-internal.h" |
47 | #include "mce.h" | ||
48 | 47 | ||
49 | /* Handle unconfigured int18 (should never happen) */ | 48 | /* Handle unconfigured int18 (should never happen) */ |
50 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) | |||
57 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | 56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = |
58 | unexpected_machine_check; | 57 | unexpected_machine_check; |
59 | 58 | ||
60 | int mce_disabled; | 59 | int mce_disabled __read_mostly; |
61 | 60 | ||
62 | #ifdef CONFIG_X86_NEW_MCE | 61 | #ifdef CONFIG_X86_NEW_MCE |
63 | 62 | ||
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); | |||
76 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | 75 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors |
77 | * 3: never panic or SIGBUS, log all errors (for testing only) | 76 | * 3: never panic or SIGBUS, log all errors (for testing only) |
78 | */ | 77 | */ |
79 | static int tolerant = 1; | 78 | static int tolerant __read_mostly = 1; |
80 | static int banks; | 79 | static int banks __read_mostly; |
81 | static u64 *bank; | 80 | static u64 *bank __read_mostly; |
82 | static unsigned long notify_user; | 81 | static int rip_msr __read_mostly; |
83 | static int rip_msr; | 82 | static int mce_bootlog __read_mostly = -1; |
84 | static int mce_bootlog = -1; | 83 | static int monarch_timeout __read_mostly = -1; |
85 | static int monarch_timeout = -1; | 84 | static int mce_panic_timeout __read_mostly; |
86 | static int mce_panic_timeout; | 85 | static int mce_dont_log_ce __read_mostly; |
87 | static int mce_dont_log_ce; | 86 | int mce_cmci_disabled __read_mostly; |
88 | int mce_cmci_disabled; | 87 | int mce_ignore_ce __read_mostly; |
89 | int mce_ignore_ce; | 88 | int mce_ser __read_mostly; |
90 | int mce_ser; | 89 | |
91 | 90 | /* User mode helper program triggered by machine check event */ | |
92 | static char trigger[128]; | 91 | static unsigned long mce_need_notify; |
93 | static char *trigger_argv[2] = { trigger, NULL }; | 92 | static char mce_helper[128]; |
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | ||
94 | 94 | ||
95 | static unsigned long dont_init_banks; | 95 | static unsigned long dont_init_banks; |
96 | 96 | ||
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce) | |||
180 | wmb(); | 180 | wmb(); |
181 | 181 | ||
182 | mce->finished = 1; | 182 | mce->finished = 1; |
183 | set_bit(0, ¬ify_user); | 183 | set_bit(0, &mce_need_notify); |
184 | } | 184 | } |
185 | 185 | ||
186 | static void print_mce(struct mce *m) | 186 | static void print_mce(struct mce *m) |
@@ -691,18 +691,21 @@ static atomic_t global_nwo; | |||
691 | * in the entry order. | 691 | * in the entry order. |
692 | * TBD double check parallel CPU hotunplug | 692 | * TBD double check parallel CPU hotunplug |
693 | */ | 693 | */ |
694 | static int mce_start(int no_way_out, int *order) | 694 | static int mce_start(int *no_way_out) |
695 | { | 695 | { |
696 | int nwo; | 696 | int order; |
697 | int cpus = num_online_cpus(); | 697 | int cpus = num_online_cpus(); |
698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | 698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; |
699 | 699 | ||
700 | if (!timeout) { | 700 | if (!timeout) |
701 | *order = -1; | 701 | return -1; |
702 | return no_way_out; | ||
703 | } | ||
704 | 702 | ||
705 | atomic_add(no_way_out, &global_nwo); | 703 | atomic_add(*no_way_out, &global_nwo); |
704 | /* | ||
705 | * global_nwo should be updated before mce_callin | ||
706 | */ | ||
707 | smp_wmb(); | ||
708 | order = atomic_add_return(1, &mce_callin); | ||
706 | 709 | ||
707 | /* | 710 | /* |
708 | * Wait for everyone. | 711 | * Wait for everyone. |
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order) | |||
710 | while (atomic_read(&mce_callin) != cpus) { | 713 | while (atomic_read(&mce_callin) != cpus) { |
711 | if (mce_timed_out(&timeout)) { | 714 | if (mce_timed_out(&timeout)) { |
712 | atomic_set(&global_nwo, 0); | 715 | atomic_set(&global_nwo, 0); |
713 | *order = -1; | 716 | return -1; |
714 | return no_way_out; | ||
715 | } | 717 | } |
716 | ndelay(SPINUNIT); | 718 | ndelay(SPINUNIT); |
717 | } | 719 | } |
718 | 720 | ||
719 | /* | 721 | /* |
720 | * Cache the global no_way_out state. | 722 | * mce_callin should be read before global_nwo |
721 | */ | 723 | */ |
722 | nwo = atomic_read(&global_nwo); | 724 | smp_rmb(); |
723 | 725 | ||
724 | /* | 726 | if (order == 1) { |
725 | * Monarch starts executing now, the others wait. | 727 | /* |
726 | */ | 728 | * Monarch: Starts executing now, the others wait. |
727 | if (*order == 1) { | 729 | */ |
728 | atomic_set(&mce_executing, 1); | 730 | atomic_set(&mce_executing, 1); |
729 | return nwo; | 731 | } else { |
732 | /* | ||
733 | * Subject: Now start the scanning loop one by one in | ||
734 | * the original callin order. | ||
735 | * This way when there are any shared banks it will be | ||
736 | * only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | ||
738 | while (atomic_read(&mce_executing) < order) { | ||
739 | if (mce_timed_out(&timeout)) { | ||
740 | atomic_set(&global_nwo, 0); | ||
741 | return -1; | ||
742 | } | ||
743 | ndelay(SPINUNIT); | ||
744 | } | ||
730 | } | 745 | } |
731 | 746 | ||
732 | /* | 747 | /* |
733 | * Now start the scanning loop one by one | 748 | * Cache the global no_way_out state. |
734 | * in the original callin order. | ||
735 | * This way when there are any shared banks it will | ||
736 | * be only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | 749 | */ |
738 | while (atomic_read(&mce_executing) < *order) { | 750 | *no_way_out = atomic_read(&global_nwo); |
739 | if (mce_timed_out(&timeout)) { | 751 | |
740 | atomic_set(&global_nwo, 0); | 752 | return order; |
741 | *order = -1; | ||
742 | return no_way_out; | ||
743 | } | ||
744 | ndelay(SPINUNIT); | ||
745 | } | ||
746 | return nwo; | ||
747 | } | 753 | } |
748 | 754 | ||
749 | /* | 755 | /* |
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
863 | * check handler. | 869 | * check handler. |
864 | */ | 870 | */ |
865 | int order; | 871 | int order; |
866 | |||
867 | /* | 872 | /* |
868 | * If no_way_out gets set, there is no safe way to recover from this | 873 | * If no_way_out gets set, there is no safe way to recover from this |
869 | * MCE. If tolerant is cranked up, we'll try anyway. | 874 | * MCE. If tolerant is cranked up, we'll try anyway. |
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
887 | if (!banks) | 892 | if (!banks) |
888 | goto out; | 893 | goto out; |
889 | 894 | ||
890 | order = atomic_add_return(1, &mce_callin); | ||
891 | mce_setup(&m); | 895 | mce_setup(&m); |
892 | 896 | ||
893 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 897 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
909 | * This way we don't report duplicated events on shared banks | 913 | * This way we don't report duplicated events on shared banks |
910 | * because the first one to see it will clear it. | 914 | * because the first one to see it will clear it. |
911 | */ | 915 | */ |
912 | no_way_out = mce_start(no_way_out, &order); | 916 | order = mce_start(&no_way_out); |
913 | for (i = 0; i < banks; i++) { | 917 | for (i = 0; i < banks; i++) { |
914 | __clear_bit(i, toclear); | 918 | __clear_bit(i, toclear); |
915 | if (!bank[i]) | 919 | if (!bank[i]) |
@@ -1113,12 +1117,12 @@ static void mcheck_timer(unsigned long data) | |||
1113 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | 1117 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); |
1114 | 1118 | ||
1115 | t->expires = jiffies + *n; | 1119 | t->expires = jiffies + *n; |
1116 | add_timer(t); | 1120 | add_timer_on(t, smp_processor_id()); |
1117 | } | 1121 | } |
1118 | 1122 | ||
1119 | static void mce_do_trigger(struct work_struct *work) | 1123 | static void mce_do_trigger(struct work_struct *work) |
1120 | { | 1124 | { |
1121 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | 1125 | call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); |
1122 | } | 1126 | } |
1123 | 1127 | ||
1124 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | 1128 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void) | |||
1135 | 1139 | ||
1136 | clear_thread_flag(TIF_MCE_NOTIFY); | 1140 | clear_thread_flag(TIF_MCE_NOTIFY); |
1137 | 1141 | ||
1138 | if (test_and_clear_bit(0, ¬ify_user)) { | 1142 | if (test_and_clear_bit(0, &mce_need_notify)) { |
1139 | wake_up_interruptible(&mce_wait); | 1143 | wake_up_interruptible(&mce_wait); |
1140 | 1144 | ||
1141 | /* | 1145 | /* |
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void) | |||
1143 | * work_pending is always cleared before the function is | 1147 | * work_pending is always cleared before the function is |
1144 | * executed. | 1148 | * executed. |
1145 | */ | 1149 | */ |
1146 | if (trigger[0] && !work_pending(&mce_trigger_work)) | 1150 | if (mce_helper[0] && !work_pending(&mce_trigger_work)) |
1147 | schedule_work(&mce_trigger_work); | 1151 | schedule_work(&mce_trigger_work); |
1148 | 1152 | ||
1149 | if (__ratelimit(&ratelimit)) | 1153 | if (__ratelimit(&ratelimit)) |
@@ -1245,7 +1249,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1245 | * Various K7s with broken bank 0 around. Always disable | 1249 | * Various K7s with broken bank 0 around. Always disable |
1246 | * by default. | 1250 | * by default. |
1247 | */ | 1251 | */ |
1248 | if (c->x86 == 6) | 1252 | if (c->x86 == 6 && banks > 0) |
1249 | bank[0] = 0; | 1253 | bank[0] = 0; |
1250 | } | 1254 | } |
1251 | 1255 | ||
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1282 | return; | 1286 | return; |
1283 | switch (c->x86_vendor) { | 1287 | switch (c->x86_vendor) { |
1284 | case X86_VENDOR_INTEL: | 1288 | case X86_VENDOR_INTEL: |
1285 | if (mce_p5_enabled()) | 1289 | intel_p5_mcheck_init(c); |
1286 | intel_p5_mcheck_init(c); | ||
1287 | break; | 1290 | break; |
1288 | case X86_VENDOR_CENTAUR: | 1291 | case X86_VENDOR_CENTAUR: |
1289 | winchip_mcheck_init(c); | 1292 | winchip_mcheck_init(c); |
@@ -1318,7 +1321,7 @@ static void mce_init_timer(void) | |||
1318 | return; | 1321 | return; |
1319 | setup_timer(t, mcheck_timer, smp_processor_id()); | 1322 | setup_timer(t, mcheck_timer, smp_processor_id()); |
1320 | t->expires = round_jiffies(jiffies + *n); | 1323 | t->expires = round_jiffies(jiffies + *n); |
1321 | add_timer(t); | 1324 | add_timer_on(t, smp_processor_id()); |
1322 | } | 1325 | } |
1323 | 1326 | ||
1324 | /* | 1327 | /* |
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev) | |||
1609 | static void mce_cpu_restart(void *data) | 1612 | static void mce_cpu_restart(void *data) |
1610 | { | 1613 | { |
1611 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1614 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1612 | if (mce_available(¤t_cpu_data)) | 1615 | if (!mce_available(¤t_cpu_data)) |
1613 | mce_init(); | 1616 | return; |
1617 | mce_init(); | ||
1614 | mce_init_timer(); | 1618 | mce_init_timer(); |
1615 | } | 1619 | } |
1616 | 1620 | ||
@@ -1620,6 +1624,26 @@ static void mce_restart(void) | |||
1620 | on_each_cpu(mce_cpu_restart, NULL, 1); | 1624 | on_each_cpu(mce_cpu_restart, NULL, 1); |
1621 | } | 1625 | } |
1622 | 1626 | ||
1627 | /* Toggle features for corrected errors */ | ||
1628 | static void mce_disable_ce(void *all) | ||
1629 | { | ||
1630 | if (!mce_available(¤t_cpu_data)) | ||
1631 | return; | ||
1632 | if (all) | ||
1633 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
1634 | cmci_clear(); | ||
1635 | } | ||
1636 | |||
1637 | static void mce_enable_ce(void *all) | ||
1638 | { | ||
1639 | if (!mce_available(¤t_cpu_data)) | ||
1640 | return; | ||
1641 | cmci_reenable(); | ||
1642 | cmci_recheck(); | ||
1643 | if (all) | ||
1644 | mce_init_timer(); | ||
1645 | } | ||
1646 | |||
1623 | static struct sysdev_class mce_sysclass = { | 1647 | static struct sysdev_class mce_sysclass = { |
1624 | .suspend = mce_suspend, | 1648 | .suspend = mce_suspend, |
1625 | .shutdown = mce_shutdown, | 1649 | .shutdown = mce_shutdown, |
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1659 | static ssize_t | 1683 | static ssize_t |
1660 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | 1684 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) |
1661 | { | 1685 | { |
1662 | strcpy(buf, trigger); | 1686 | strcpy(buf, mce_helper); |
1663 | strcat(buf, "\n"); | 1687 | strcat(buf, "\n"); |
1664 | return strlen(trigger) + 1; | 1688 | return strlen(mce_helper) + 1; |
1665 | } | 1689 | } |
1666 | 1690 | ||
1667 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 1691 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1670 | char *p; | 1694 | char *p; |
1671 | int len; | 1695 | int len; |
1672 | 1696 | ||
1673 | strncpy(trigger, buf, sizeof(trigger)); | 1697 | strncpy(mce_helper, buf, sizeof(mce_helper)); |
1674 | trigger[sizeof(trigger)-1] = 0; | 1698 | mce_helper[sizeof(mce_helper)-1] = 0; |
1675 | len = strlen(trigger); | 1699 | len = strlen(mce_helper); |
1676 | p = strchr(trigger, '\n'); | 1700 | p = strchr(mce_helper, '\n'); |
1677 | 1701 | ||
1678 | if (*p) | 1702 | if (*p) |
1679 | *p = 0; | 1703 | *p = 0; |
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1681 | return len; | 1705 | return len; |
1682 | } | 1706 | } |
1683 | 1707 | ||
1708 | static ssize_t set_ignore_ce(struct sys_device *s, | ||
1709 | struct sysdev_attribute *attr, | ||
1710 | const char *buf, size_t size) | ||
1711 | { | ||
1712 | u64 new; | ||
1713 | |||
1714 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1715 | return -EINVAL; | ||
1716 | |||
1717 | if (mce_ignore_ce ^ !!new) { | ||
1718 | if (new) { | ||
1719 | /* disable ce features */ | ||
1720 | on_each_cpu(mce_disable_ce, (void *)1, 1); | ||
1721 | mce_ignore_ce = 1; | ||
1722 | } else { | ||
1723 | /* enable ce features */ | ||
1724 | mce_ignore_ce = 0; | ||
1725 | on_each_cpu(mce_enable_ce, (void *)1, 1); | ||
1726 | } | ||
1727 | } | ||
1728 | return size; | ||
1729 | } | ||
1730 | |||
1731 | static ssize_t set_cmci_disabled(struct sys_device *s, | ||
1732 | struct sysdev_attribute *attr, | ||
1733 | const char *buf, size_t size) | ||
1734 | { | ||
1735 | u64 new; | ||
1736 | |||
1737 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1738 | return -EINVAL; | ||
1739 | |||
1740 | if (mce_cmci_disabled ^ !!new) { | ||
1741 | if (new) { | ||
1742 | /* disable cmci */ | ||
1743 | on_each_cpu(mce_disable_ce, NULL, 1); | ||
1744 | mce_cmci_disabled = 1; | ||
1745 | } else { | ||
1746 | /* enable cmci */ | ||
1747 | mce_cmci_disabled = 0; | ||
1748 | on_each_cpu(mce_enable_ce, NULL, 1); | ||
1749 | } | ||
1750 | } | ||
1751 | return size; | ||
1752 | } | ||
1753 | |||
1684 | static ssize_t store_int_with_restart(struct sys_device *s, | 1754 | static ssize_t store_int_with_restart(struct sys_device *s, |
1685 | struct sysdev_attribute *attr, | 1755 | struct sysdev_attribute *attr, |
1686 | const char *buf, size_t size) | 1756 | const char *buf, size_t size) |
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, | |||
1693 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 1763 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); |
1694 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 1764 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
1695 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 1765 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); |
1766 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | ||
1696 | 1767 | ||
1697 | static struct sysdev_ext_attribute attr_check_interval = { | 1768 | static struct sysdev_ext_attribute attr_check_interval = { |
1698 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | 1769 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, |
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = { | |||
1700 | &check_interval | 1771 | &check_interval |
1701 | }; | 1772 | }; |
1702 | 1773 | ||
1774 | static struct sysdev_ext_attribute attr_ignore_ce = { | ||
1775 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | ||
1776 | &mce_ignore_ce | ||
1777 | }; | ||
1778 | |||
1779 | static struct sysdev_ext_attribute attr_cmci_disabled = { | ||
1780 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | ||
1781 | &mce_cmci_disabled | ||
1782 | }; | ||
1783 | |||
1703 | static struct sysdev_attribute *mce_attrs[] = { | 1784 | static struct sysdev_attribute *mce_attrs[] = { |
1704 | &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, | 1785 | &attr_tolerant.attr, |
1786 | &attr_check_interval.attr, | ||
1787 | &attr_trigger, | ||
1705 | &attr_monarch_timeout.attr, | 1788 | &attr_monarch_timeout.attr, |
1789 | &attr_dont_log_ce.attr, | ||
1790 | &attr_ignore_ce.attr, | ||
1791 | &attr_cmci_disabled.attr, | ||
1706 | NULL | 1792 | NULL |
1707 | }; | 1793 | }; |
1708 | 1794 | ||
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; | |||
1712 | static __cpuinit int mce_create_device(unsigned int cpu) | 1798 | static __cpuinit int mce_create_device(unsigned int cpu) |
1713 | { | 1799 | { |
1714 | int err; | 1800 | int err; |
1715 | int i; | 1801 | int i, j; |
1716 | 1802 | ||
1717 | if (!mce_available(&boot_cpu_data)) | 1803 | if (!mce_available(&boot_cpu_data)) |
1718 | return -EIO; | 1804 | return -EIO; |
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1730 | if (err) | 1816 | if (err) |
1731 | goto error; | 1817 | goto error; |
1732 | } | 1818 | } |
1733 | for (i = 0; i < banks; i++) { | 1819 | for (j = 0; j < banks; j++) { |
1734 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1820 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), |
1735 | &bank_attrs[i]); | 1821 | &bank_attrs[j]); |
1736 | if (err) | 1822 | if (err) |
1737 | goto error2; | 1823 | goto error2; |
1738 | } | 1824 | } |
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1740 | 1826 | ||
1741 | return 0; | 1827 | return 0; |
1742 | error2: | 1828 | error2: |
1743 | while (--i >= 0) | 1829 | while (--j >= 0) |
1744 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | 1830 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); |
1745 | error: | 1831 | error: |
1746 | while (--i >= 0) | 1832 | while (--i >= 0) |
1747 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1833 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void) | |||
1883 | if (!mce_available(&boot_cpu_data)) | 1969 | if (!mce_available(&boot_cpu_data)) |
1884 | return -EIO; | 1970 | return -EIO; |
1885 | 1971 | ||
1886 | alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 1972 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); |
1887 | 1973 | ||
1888 | err = mce_init_banks(); | 1974 | err = mce_init_banks(); |
1889 | if (err) | 1975 | if (err) |
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | |||
1915 | /* This has to be run for each processor */ | 2001 | /* This has to be run for each processor */ |
1916 | void mcheck_init(struct cpuinfo_x86 *c) | 2002 | void mcheck_init(struct cpuinfo_x86 *c) |
1917 | { | 2003 | { |
1918 | if (mce_disabled == 1) | 2004 | if (mce_disabled) |
1919 | return; | 2005 | return; |
1920 | 2006 | ||
1921 | switch (c->x86_vendor) { | 2007 | switch (c->x86_vendor) { |
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) | |||
1945 | 2031 | ||
1946 | static int __init mcheck_enable(char *str) | 2032 | static int __init mcheck_enable(char *str) |
1947 | { | 2033 | { |
1948 | mce_disabled = -1; | 2034 | mce_p5_enabled = 1; |
1949 | return 1; | 2035 | return 1; |
1950 | } | 2036 | } |
1951 | |||
1952 | __setup("mce", mcheck_enable); | 2037 | __setup("mce", mcheck_enable); |
1953 | 2038 | ||
1954 | #endif /* CONFIG_X86_OLD_MCE */ | 2039 | #endif /* CONFIG_X86_OLD_MCE */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 84a552b458c8..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/mce.h> | ||
3 | |||
4 | #ifdef CONFIG_X86_OLD_MCE | ||
5 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
6 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
8 | #endif | ||
9 | |||
10 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
11 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
12 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
13 | extern int mce_p5_enable; | ||
14 | static inline int mce_p5_enabled(void) { return mce_p5_enable; } | ||
15 | static inline void enable_p5_mce(void) { mce_p5_enable = 1; } | ||
16 | #else | ||
17 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
18 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
19 | static inline int mce_p5_enabled(void) { return 0; } | ||
20 | static inline void enable_p5_mce(void) { } | ||
21 | #endif | ||
22 | |||
23 | /* Call the installed machine check handler for this CPU setup. */ | ||
24 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
25 | |||
26 | #ifdef CONFIG_X86_OLD_MCE | ||
27 | |||
28 | extern int nr_mce_banks; | ||
29 | |||
30 | void intel_set_thermal_handler(void); | ||
31 | |||
32 | #else | ||
33 | |||
34 | static inline void intel_set_thermal_handler(void) { } | ||
35 | |||
36 | #endif | ||
37 | |||
38 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ddae21620bda..ddae21620bda 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 2b011d2d8579..e1acec0f7a32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -1,74 +1,226 @@ | |||
1 | /* | 1 | /* |
2 | * Common code for Intel machine checks | 2 | * Intel specific MCE features. |
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | ||
4 | * Copyright (C) 2008, 2009 Intel Corporation | ||
5 | * Author: Andi Kleen | ||
3 | */ | 6 | */ |
4 | #include <linux/interrupt.h> | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/smp.h> | ||
9 | 7 | ||
10 | #include <asm/therm_throt.h> | 8 | #include <linux/init.h> |
11 | #include <asm/processor.h> | 9 | #include <linux/interrupt.h> |
12 | #include <asm/system.h> | 10 | #include <linux/percpu.h> |
13 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | ||
14 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/mce.h> | ||
15 | |||
16 | /* | ||
17 | * Support for Intel Correct Machine Check Interrupts. This allows | ||
18 | * the CPU to raise an interrupt when a corrected machine check happened. | ||
19 | * Normally we pick those up using a regular polling timer. | ||
20 | * Also supports reliable discovery of shared banks. | ||
21 | */ | ||
15 | 22 | ||
16 | #include "mce.h" | 23 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
17 | 24 | ||
18 | void intel_init_thermal(struct cpuinfo_x86 *c) | 25 | /* |
26 | * cmci_discover_lock protects against parallel discovery attempts | ||
27 | * which could race against each other. | ||
28 | */ | ||
29 | static DEFINE_SPINLOCK(cmci_discover_lock); | ||
30 | |||
31 | #define CMCI_THRESHOLD 1 | ||
32 | |||
33 | static int cmci_supported(int *banks) | ||
19 | { | 34 | { |
20 | unsigned int cpu = smp_processor_id(); | 35 | u64 cap; |
21 | int tm2 = 0; | ||
22 | u32 l, h; | ||
23 | 36 | ||
24 | /* Thermal monitoring depends on ACPI and clock modulation*/ | 37 | if (mce_cmci_disabled || mce_ignore_ce) |
25 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | 38 | return 0; |
26 | return; | ||
27 | 39 | ||
28 | /* | 40 | /* |
29 | * First check if its enabled already, in which case there might | 41 | * Vendor check is not strictly needed, but the initial |
30 | * be some SMM goo which handles it, so we can't even put a handler | 42 | * initialization is vendor keyed and this |
31 | * since it might be delivered via SMI already: | 43 | * makes sure none of the backdoors are entered otherwise. |
32 | */ | 44 | */ |
33 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 45 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
34 | h = apic_read(APIC_LVTTHMR); | 46 | return 0; |
35 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 47 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) |
36 | printk(KERN_DEBUG | 48 | return 0; |
37 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 49 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
38 | return; | 50 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); |
51 | return !!(cap & MCG_CMCI_P); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * The interrupt handler. This is called on every event. | ||
56 | * Just call the poller directly to log any events. | ||
57 | * This could in theory increase the threshold under high load, | ||
58 | * but doesn't for now. | ||
59 | */ | ||
60 | static void intel_threshold_interrupt(void) | ||
61 | { | ||
62 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
63 | mce_notify_irq(); | ||
64 | } | ||
65 | |||
66 | static void print_update(char *type, int *hdr, int num) | ||
67 | { | ||
68 | if (*hdr == 0) | ||
69 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
70 | *hdr = 1; | ||
71 | printk(KERN_CONT " %s:%d", type, num); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | ||
76 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | ||
77 | * banks. | ||
78 | */ | ||
79 | static void cmci_discover(int banks, int boot) | ||
80 | { | ||
81 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | ||
82 | unsigned long flags; | ||
83 | int hdr = 0; | ||
84 | int i; | ||
85 | |||
86 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
87 | for (i = 0; i < banks; i++) { | ||
88 | u64 val; | ||
89 | |||
90 | if (test_bit(i, owned)) | ||
91 | continue; | ||
92 | |||
93 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
94 | |||
95 | /* Already owned by someone else? */ | ||
96 | if (val & CMCI_EN) { | ||
97 | if (test_and_clear_bit(i, owned) || boot) | ||
98 | print_update("SHD", &hdr, i); | ||
99 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
100 | continue; | ||
101 | } | ||
102 | |||
103 | val |= CMCI_EN | CMCI_THRESHOLD; | ||
104 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
105 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
106 | |||
107 | /* Did the enable bit stick? -- the bank supports CMCI */ | ||
108 | if (val & CMCI_EN) { | ||
109 | if (!test_and_set_bit(i, owned) || boot) | ||
110 | print_update("CMCI", &hdr, i); | ||
111 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
112 | } else { | ||
113 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | ||
114 | } | ||
39 | } | 115 | } |
116 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
117 | if (hdr) | ||
118 | printk(KERN_CONT "\n"); | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * Just in case we missed an event during initialization check | ||
123 | * all the CMCI owned banks. | ||
124 | */ | ||
125 | void cmci_recheck(void) | ||
126 | { | ||
127 | unsigned long flags; | ||
128 | int banks; | ||
129 | |||
130 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | ||
131 | return; | ||
132 | local_irq_save(flags); | ||
133 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
134 | local_irq_restore(flags); | ||
135 | } | ||
40 | 136 | ||
41 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | 137 | /* |
42 | tm2 = 1; | 138 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
139 | * This allows other CPUs to claim the banks on rediscovery. | ||
140 | */ | ||
141 | void cmci_clear(void) | ||
142 | { | ||
143 | unsigned long flags; | ||
144 | int i; | ||
145 | int banks; | ||
146 | u64 val; | ||
43 | 147 | ||
44 | /* Check whether a vector already exists */ | 148 | if (!cmci_supported(&banks)) |
45 | if (h & APIC_VECTOR_MASK) { | ||
46 | printk(KERN_DEBUG | ||
47 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
48 | cpu, (h & APIC_VECTOR_MASK)); | ||
49 | return; | 149 | return; |
150 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
151 | for (i = 0; i < banks; i++) { | ||
152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | ||
153 | continue; | ||
154 | /* Disable CMCI */ | ||
155 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | ||
157 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
50 | } | 159 | } |
160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * After a CPU went down cycle through all the others and rediscover | ||
165 | * Must run in process context. | ||
166 | */ | ||
167 | void cmci_rediscover(int dying) | ||
168 | { | ||
169 | int banks; | ||
170 | int cpu; | ||
171 | cpumask_var_t old; | ||
172 | |||
173 | if (!cmci_supported(&banks)) | ||
174 | return; | ||
175 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
176 | return; | ||
177 | cpumask_copy(old, ¤t->cpus_allowed); | ||
51 | 178 | ||
52 | /* We'll mask the thermal vector in the lapic till we're ready: */ | 179 | for_each_online_cpu(cpu) { |
53 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | 180 | if (cpu == dying) |
54 | apic_write(APIC_LVTTHMR, h); | 181 | continue; |
182 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | ||
183 | continue; | ||
184 | /* Recheck banks in case CPUs don't all have the same */ | ||
185 | if (cmci_supported(&banks)) | ||
186 | cmci_discover(banks, 0); | ||
187 | } | ||
55 | 188 | ||
56 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 189 | set_cpus_allowed_ptr(current, old); |
57 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 190 | free_cpumask_var(old); |
58 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 191 | } |
59 | 192 | ||
60 | intel_set_thermal_handler(); | 193 | /* |
194 | * Reenable CMCI on this CPU in case a CPU down failed. | ||
195 | */ | ||
196 | void cmci_reenable(void) | ||
197 | { | ||
198 | int banks; | ||
199 | if (cmci_supported(&banks)) | ||
200 | cmci_discover(banks, 0); | ||
201 | } | ||
61 | 202 | ||
62 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 203 | static void intel_init_cmci(void) |
63 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | 204 | { |
205 | int banks; | ||
64 | 206 | ||
65 | /* Unmask the thermal vector: */ | 207 | if (!cmci_supported(&banks)) |
66 | l = apic_read(APIC_LVTTHMR); | 208 | return; |
67 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
68 | 209 | ||
69 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | 210 | mce_threshold_vector = intel_threshold_interrupt; |
70 | cpu, tm2 ? "TM2" : "TM1"); | 211 | cmci_discover(banks, 1); |
212 | /* | ||
213 | * For CPU #0 this runs with still disabled APIC, but that's | ||
214 | * ok because only the vector is set up. We still do another | ||
215 | * check for the banks later for CPU #0 just to make sure | ||
216 | * to not miss any events. | ||
217 | */ | ||
218 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | ||
219 | cmci_recheck(); | ||
220 | } | ||
71 | 221 | ||
72 | /* enable thermal throttle processing */ | 222 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
73 | atomic_set(&therm_throt_en, 1); | 223 | { |
224 | intel_init_thermal(c); | ||
225 | intel_init_cmci(); | ||
74 | } | 226 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c deleted file mode 100644 index f2ef6952c400..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ /dev/null | |||
@@ -1,248 +0,0 @@ | |||
1 | /* | ||
2 | * Intel specific MCE features. | ||
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | ||
4 | * Copyright (C) 2008, 2009 Intel Corporation | ||
5 | * Author: Andi Kleen | ||
6 | */ | ||
7 | |||
8 | #include <linux/init.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/percpu.h> | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/apic.h> | ||
13 | #include <asm/msr.h> | ||
14 | #include <asm/mce.h> | ||
15 | #include <asm/hw_irq.h> | ||
16 | #include <asm/idle.h> | ||
17 | #include <asm/therm_throt.h> | ||
18 | |||
19 | #include "mce.h" | ||
20 | |||
21 | asmlinkage void smp_thermal_interrupt(void) | ||
22 | { | ||
23 | __u64 msr_val; | ||
24 | |||
25 | ack_APIC_irq(); | ||
26 | |||
27 | exit_idle(); | ||
28 | irq_enter(); | ||
29 | |||
30 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
31 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
32 | mce_log_therm_throt_event(msr_val); | ||
33 | |||
34 | inc_irq_stat(irq_thermal_count); | ||
35 | irq_exit(); | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Support for Intel Correct Machine Check Interrupts. This allows | ||
40 | * the CPU to raise an interrupt when a corrected machine check happened. | ||
41 | * Normally we pick those up using a regular polling timer. | ||
42 | * Also supports reliable discovery of shared banks. | ||
43 | */ | ||
44 | |||
45 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | ||
46 | |||
47 | /* | ||
48 | * cmci_discover_lock protects against parallel discovery attempts | ||
49 | * which could race against each other. | ||
50 | */ | ||
51 | static DEFINE_SPINLOCK(cmci_discover_lock); | ||
52 | |||
53 | #define CMCI_THRESHOLD 1 | ||
54 | |||
55 | static int cmci_supported(int *banks) | ||
56 | { | ||
57 | u64 cap; | ||
58 | |||
59 | if (mce_cmci_disabled || mce_ignore_ce) | ||
60 | return 0; | ||
61 | |||
62 | /* | ||
63 | * Vendor check is not strictly needed, but the initial | ||
64 | * initialization is vendor keyed and this | ||
65 | * makes sure none of the backdoors are entered otherwise. | ||
66 | */ | ||
67 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
68 | return 0; | ||
69 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) | ||
70 | return 0; | ||
71 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
72 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | ||
73 | return !!(cap & MCG_CMCI_P); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * The interrupt handler. This is called on every event. | ||
78 | * Just call the poller directly to log any events. | ||
79 | * This could in theory increase the threshold under high load, | ||
80 | * but doesn't for now. | ||
81 | */ | ||
82 | static void intel_threshold_interrupt(void) | ||
83 | { | ||
84 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
85 | mce_notify_irq(); | ||
86 | } | ||
87 | |||
88 | static void print_update(char *type, int *hdr, int num) | ||
89 | { | ||
90 | if (*hdr == 0) | ||
91 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
92 | *hdr = 1; | ||
93 | printk(KERN_CONT " %s:%d", type, num); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | ||
98 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | ||
99 | * banks. | ||
100 | */ | ||
101 | static void cmci_discover(int banks, int boot) | ||
102 | { | ||
103 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | ||
104 | unsigned long flags; | ||
105 | int hdr = 0; | ||
106 | int i; | ||
107 | |||
108 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
109 | for (i = 0; i < banks; i++) { | ||
110 | u64 val; | ||
111 | |||
112 | if (test_bit(i, owned)) | ||
113 | continue; | ||
114 | |||
115 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
116 | |||
117 | /* Already owned by someone else? */ | ||
118 | if (val & CMCI_EN) { | ||
119 | if (test_and_clear_bit(i, owned) || boot) | ||
120 | print_update("SHD", &hdr, i); | ||
121 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
122 | continue; | ||
123 | } | ||
124 | |||
125 | val |= CMCI_EN | CMCI_THRESHOLD; | ||
126 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
127 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
128 | |||
129 | /* Did the enable bit stick? -- the bank supports CMCI */ | ||
130 | if (val & CMCI_EN) { | ||
131 | if (!test_and_set_bit(i, owned) || boot) | ||
132 | print_update("CMCI", &hdr, i); | ||
133 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
134 | } else { | ||
135 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | ||
136 | } | ||
137 | } | ||
138 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
139 | if (hdr) | ||
140 | printk(KERN_CONT "\n"); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Just in case we missed an event during initialization check | ||
145 | * all the CMCI owned banks. | ||
146 | */ | ||
147 | void cmci_recheck(void) | ||
148 | { | ||
149 | unsigned long flags; | ||
150 | int banks; | ||
151 | |||
152 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | ||
153 | return; | ||
154 | local_irq_save(flags); | ||
155 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
156 | local_irq_restore(flags); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Disable CMCI on this CPU for all banks it owns when it goes down. | ||
161 | * This allows other CPUs to claim the banks on rediscovery. | ||
162 | */ | ||
163 | void cmci_clear(void) | ||
164 | { | ||
165 | unsigned long flags; | ||
166 | int i; | ||
167 | int banks; | ||
168 | u64 val; | ||
169 | |||
170 | if (!cmci_supported(&banks)) | ||
171 | return; | ||
172 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
173 | for (i = 0; i < banks; i++) { | ||
174 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | ||
175 | continue; | ||
176 | /* Disable CMCI */ | ||
177 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
178 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | ||
179 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
180 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
181 | } | ||
182 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * After a CPU went down cycle through all the others and rediscover | ||
187 | * Must run in process context. | ||
188 | */ | ||
189 | void cmci_rediscover(int dying) | ||
190 | { | ||
191 | int banks; | ||
192 | int cpu; | ||
193 | cpumask_var_t old; | ||
194 | |||
195 | if (!cmci_supported(&banks)) | ||
196 | return; | ||
197 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
198 | return; | ||
199 | cpumask_copy(old, ¤t->cpus_allowed); | ||
200 | |||
201 | for_each_online_cpu(cpu) { | ||
202 | if (cpu == dying) | ||
203 | continue; | ||
204 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | ||
205 | continue; | ||
206 | /* Recheck banks in case CPUs don't all have the same */ | ||
207 | if (cmci_supported(&banks)) | ||
208 | cmci_discover(banks, 0); | ||
209 | } | ||
210 | |||
211 | set_cpus_allowed_ptr(current, old); | ||
212 | free_cpumask_var(old); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Reenable CMCI on this CPU in case a CPU down failed. | ||
217 | */ | ||
218 | void cmci_reenable(void) | ||
219 | { | ||
220 | int banks; | ||
221 | if (cmci_supported(&banks)) | ||
222 | cmci_discover(banks, 0); | ||
223 | } | ||
224 | |||
225 | static void intel_init_cmci(void) | ||
226 | { | ||
227 | int banks; | ||
228 | |||
229 | if (!cmci_supported(&banks)) | ||
230 | return; | ||
231 | |||
232 | mce_threshold_vector = intel_threshold_interrupt; | ||
233 | cmci_discover(banks, 1); | ||
234 | /* | ||
235 | * For CPU #0 this runs with still disabled APIC, but that's | ||
236 | * ok because only the vector is set up. We still do another | ||
237 | * check for the banks later for CPU #0 just to make sure | ||
238 | * to not miss any events. | ||
239 | */ | ||
240 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | ||
241 | cmci_recheck(); | ||
242 | } | ||
243 | |||
244 | void mce_intel_feature_init(struct cpuinfo_x86 *c) | ||
245 | { | ||
246 | intel_init_thermal(c); | ||
247 | intel_init_cmci(); | ||
248 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 70b710420f74..f5f2d6f71fb6 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
@@ -17,10 +17,9 @@ | |||
17 | 17 | ||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/system.h> | 19 | #include <asm/system.h> |
20 | #include <asm/mce.h> | ||
20 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
21 | 22 | ||
22 | #include "mce.h" | ||
23 | |||
24 | static int firstbank; | 23 | static int firstbank; |
25 | 24 | ||
26 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ | 25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 82cee108a2d3..4482aea9aa2e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -1,21 +1,15 @@ | |||
1 | /* | 1 | /* |
2 | * P4 specific Machine Check Exception Reporting | 2 | * P4 specific Machine Check Exception Reporting |
3 | */ | 3 | */ |
4 | |||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
7 | #include <linux/types.h> | 5 | #include <linux/types.h> |
8 | #include <linux/init.h> | 6 | #include <linux/init.h> |
9 | #include <linux/smp.h> | 7 | #include <linux/smp.h> |
10 | 8 | ||
11 | #include <asm/therm_throt.h> | ||
12 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 10 | #include <asm/mce.h> |
14 | #include <asm/apic.h> | ||
15 | #include <asm/msr.h> | 11 | #include <asm/msr.h> |
16 | 12 | ||
17 | #include "mce.h" | ||
18 | |||
19 | /* as supported by the P4/Xeon family */ | 13 | /* as supported by the P4/Xeon family */ |
20 | struct intel_mce_extended_msrs { | 14 | struct intel_mce_extended_msrs { |
21 | u32 eax; | 15 | u32 eax; |
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs { | |||
33 | 27 | ||
34 | static int mce_num_extended_msrs; | 28 | static int mce_num_extended_msrs; |
35 | 29 | ||
36 | |||
37 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
38 | |||
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | ||
40 | { | ||
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
42 | smp_processor_id()); | ||
43 | add_taint(TAINT_MACHINE_CHECK); | ||
44 | } | ||
45 | |||
46 | /* P4/Xeon Thermal transition interrupt handler: */ | ||
47 | static void intel_thermal_interrupt(struct pt_regs *regs) | ||
48 | { | ||
49 | __u64 msr_val; | ||
50 | |||
51 | ack_APIC_irq(); | ||
52 | |||
53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
54 | therm_throt_process(msr_val & THERM_STATUS_PROCHOT); | ||
55 | } | ||
56 | |||
57 | /* Thermal interrupt handler for this CPU setup: */ | ||
58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = | ||
59 | unexpected_thermal_interrupt; | ||
60 | |||
61 | void smp_thermal_interrupt(struct pt_regs *regs) | ||
62 | { | ||
63 | irq_enter(); | ||
64 | vendor_thermal_interrupt(regs); | ||
65 | __get_cpu_var(irq_stat).irq_thermal_count++; | ||
66 | irq_exit(); | ||
67 | } | ||
68 | |||
69 | void intel_set_thermal_handler(void) | ||
70 | { | ||
71 | vendor_thermal_interrupt = intel_thermal_interrupt; | ||
72 | } | ||
73 | |||
74 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
75 | |||
76 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
77 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
78 | { | 32 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 015f481ab1b0..5c0e6533d9bc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -10,12 +10,11 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* By default disabled */ | 16 | /* By default disabled */ |
18 | int mce_p5_enable; | 17 | int mce_p5_enabled __read_mostly; |
19 | 18 | ||
20 | /* Machine check handler for Pentium class Intel CPUs: */ | 19 | /* Machine check handler for Pentium class Intel CPUs: */ |
21 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | |||
43 | { | 42 | { |
44 | u32 l, h; | 43 | u32 l, h; |
45 | 44 | ||
46 | /* Check for MCE support: */ | 45 | /* Default P5 to off as its often misconnected: */ |
47 | if (!cpu_has(c, X86_FEATURE_MCE)) | 46 | if (!mce_p5_enabled) |
48 | return; | 47 | return; |
49 | 48 | ||
50 | #ifdef CONFIG_X86_OLD_MCE | 49 | /* Check for MCE support: */ |
51 | /* Default P5 to off as its often misconnected: */ | 50 | if (!cpu_has(c, X86_FEATURE_MCE)) |
52 | if (mce_disabled != -1) | ||
53 | return; | 51 | return; |
54 | #endif | ||
55 | 52 | ||
56 | machine_check_vector = pentium_machine_check; | 53 | machine_check_vector = pentium_machine_check; |
57 | /* Make sure the vector pointer is visible before we enable MCEs: */ | 54 | /* Make sure the vector pointer is visible before we enable MCEs: */ |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 43c24e667457..01e4f8178183 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -10,10 +10,9 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* Machine Check Handler For PII/PIII */ | 16 | /* Machine Check Handler For PII/PIII */ |
18 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 17 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 18 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7b1ae2e20ba5..bff8dd191dd5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -13,13 +13,23 @@ | |||
13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | 13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. |
14 | * Inspired by Ross Biro's and Al Borchers' counter code. | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
15 | */ | 15 | */ |
16 | #include <linux/interrupt.h> | ||
16 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
17 | #include <linux/jiffies.h> | 18 | #include <linux/jiffies.h> |
19 | #include <linux/kernel.h> | ||
18 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
19 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
22 | #include <linux/types.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/smp.h> | ||
20 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
21 | 26 | ||
22 | #include <asm/therm_throt.h> | 27 | #include <asm/processor.h> |
28 | #include <asm/system.h> | ||
29 | #include <asm/apic.h> | ||
30 | #include <asm/idle.h> | ||
31 | #include <asm/mce.h> | ||
32 | #include <asm/msr.h> | ||
23 | 33 | ||
24 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
25 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
@@ -27,7 +37,7 @@ | |||
27 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
28 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
29 | 39 | ||
30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | 40 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
31 | 41 | ||
32 | #ifdef CONFIG_SYSFS | 42 | #ifdef CONFIG_SYSFS |
33 | #define define_therm_throt_sysdev_one_ro(_name) \ | 43 | #define define_therm_throt_sysdev_one_ro(_name) \ |
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
82 | * 1 : Event should be logged further, and a message has been | 92 | * 1 : Event should be logged further, and a message has been |
83 | * printed to the syslog. | 93 | * printed to the syslog. |
84 | */ | 94 | */ |
85 | int therm_throt_process(int curr) | 95 | static int therm_throt_process(int curr) |
86 | { | 96 | { |
87 | unsigned int cpu = smp_processor_id(); | 97 | unsigned int cpu = smp_processor_id(); |
88 | __u64 tmp_jiffs = get_jiffies_64(); | 98 | __u64 tmp_jiffs = get_jiffies_64(); |
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void) | |||
186 | 196 | ||
187 | return 0; | 197 | return 0; |
188 | } | 198 | } |
189 | |||
190 | device_initcall(thermal_throttle_init_device); | 199 | device_initcall(thermal_throttle_init_device); |
200 | |||
191 | #endif /* CONFIG_SYSFS */ | 201 | #endif /* CONFIG_SYSFS */ |
202 | |||
203 | /* Thermal transition interrupt handler */ | ||
204 | static void intel_thermal_interrupt(void) | ||
205 | { | ||
206 | __u64 msr_val; | ||
207 | |||
208 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
209 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
210 | mce_log_therm_throt_event(msr_val); | ||
211 | } | ||
212 | |||
213 | static void unexpected_thermal_interrupt(void) | ||
214 | { | ||
215 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
216 | smp_processor_id()); | ||
217 | add_taint(TAINT_MACHINE_CHECK); | ||
218 | } | ||
219 | |||
220 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | ||
221 | |||
222 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | ||
223 | { | ||
224 | exit_idle(); | ||
225 | irq_enter(); | ||
226 | inc_irq_stat(irq_thermal_count); | ||
227 | smp_thermal_vector(); | ||
228 | irq_exit(); | ||
229 | /* Ack only at the end to avoid potential reentry */ | ||
230 | ack_APIC_irq(); | ||
231 | } | ||
232 | |||
233 | void intel_init_thermal(struct cpuinfo_x86 *c) | ||
234 | { | ||
235 | unsigned int cpu = smp_processor_id(); | ||
236 | int tm2 = 0; | ||
237 | u32 l, h; | ||
238 | |||
239 | /* Thermal monitoring depends on ACPI and clock modulation*/ | ||
240 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
241 | return; | ||
242 | |||
243 | /* | ||
244 | * First check if its enabled already, in which case there might | ||
245 | * be some SMM goo which handles it, so we can't even put a handler | ||
246 | * since it might be delivered via SMI already: | ||
247 | */ | ||
248 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
249 | h = apic_read(APIC_LVTTHMR); | ||
250 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
251 | printk(KERN_DEBUG | ||
252 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
257 | tm2 = 1; | ||
258 | |||
259 | /* Check whether a vector already exists */ | ||
260 | if (h & APIC_VECTOR_MASK) { | ||
261 | printk(KERN_DEBUG | ||
262 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
263 | cpu, (h & APIC_VECTOR_MASK)); | ||
264 | return; | ||
265 | } | ||
266 | |||
267 | /* We'll mask the thermal vector in the lapic till we're ready: */ | ||
268 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | ||
269 | apic_write(APIC_LVTTHMR, h); | ||
270 | |||
271 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
272 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
273 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
274 | |||
275 | smp_thermal_vector = intel_thermal_interrupt; | ||
276 | |||
277 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
278 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
279 | |||
280 | /* Unmask the thermal vector: */ | ||
281 | l = apic_read(APIC_LVTTHMR); | ||
282 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
283 | |||
284 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
285 | cpu, tm2 ? "TM2" : "TM1"); | ||
286 | |||
287 | /* enable thermal throttle processing */ | ||
288 | atomic_set(&therm_throt_en, 1); | ||
289 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 81b02487090b..54060f565974 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -9,10 +9,9 @@ | |||
9 | 9 | ||
10 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
11 | #include <asm/system.h> | 11 | #include <asm/system.h> |
12 | #include <asm/mce.h> | ||
12 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
13 | 14 | ||
14 | #include "mce.h" | ||
15 | |||
16 | /* Machine check handler for WinChip C6: */ | 15 | /* Machine check handler for WinChip C6: */ |
17 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
18 | { | 17 | { |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..d4cf4ce19aac 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/kdebug.h> | 19 | #include <linux/kdebug.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | ||
22 | 23 | ||
23 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
24 | #include <asm/stacktrace.h> | 25 | #include <asm/stacktrace.h> |
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) | |||
389 | return event & CORE_EVNTSEL_MASK; | 390 | return event & CORE_EVNTSEL_MASK; |
390 | } | 391 | } |
391 | 392 | ||
392 | static const u64 amd_0f_hw_cache_event_ids | 393 | static const u64 amd_hw_cache_event_ids |
393 | [PERF_COUNT_HW_CACHE_MAX] | 394 | [PERF_COUNT_HW_CACHE_MAX] |
394 | [PERF_COUNT_HW_CACHE_OP_MAX] | 395 | [PERF_COUNT_HW_CACHE_OP_MAX] |
395 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 396 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
396 | { | 397 | { |
397 | [ C(L1D) ] = { | 398 | [ C(L1D) ] = { |
398 | [ C(OP_READ) ] = { | 399 | [ C(OP_READ) ] = { |
399 | [ C(RESULT_ACCESS) ] = 0, | 400 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
400 | [ C(RESULT_MISS) ] = 0, | 401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ |
401 | }, | 402 | }, |
402 | [ C(OP_WRITE) ] = { | 403 | [ C(OP_WRITE) ] = { |
403 | [ C(RESULT_ACCESS) ] = 0, | 404 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ |
404 | [ C(RESULT_MISS) ] = 0, | 405 | [ C(RESULT_MISS) ] = 0, |
405 | }, | 406 | }, |
406 | [ C(OP_PREFETCH) ] = { | 407 | [ C(OP_PREFETCH) ] = { |
407 | [ C(RESULT_ACCESS) ] = 0, | 408 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ |
408 | [ C(RESULT_MISS) ] = 0, | 409 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ |
409 | }, | 410 | }, |
410 | }, | 411 | }, |
411 | [ C(L1I ) ] = { | 412 | [ C(L1I ) ] = { |
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids | |||
418 | [ C(RESULT_MISS) ] = -1, | 419 | [ C(RESULT_MISS) ] = -1, |
419 | }, | 420 | }, |
420 | [ C(OP_PREFETCH) ] = { | 421 | [ C(OP_PREFETCH) ] = { |
421 | [ C(RESULT_ACCESS) ] = 0, | 422 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ |
422 | [ C(RESULT_MISS) ] = 0, | 423 | [ C(RESULT_MISS) ] = 0, |
423 | }, | 424 | }, |
424 | }, | 425 | }, |
425 | [ C(LL ) ] = { | 426 | [ C(LL ) ] = { |
426 | [ C(OP_READ) ] = { | 427 | [ C(OP_READ) ] = { |
427 | [ C(RESULT_ACCESS) ] = 0, | 428 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ |
428 | [ C(RESULT_MISS) ] = 0, | 429 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ |
429 | }, | 430 | }, |
430 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
431 | [ C(RESULT_ACCESS) ] = 0, | 432 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ |
432 | [ C(RESULT_MISS) ] = 0, | 433 | [ C(RESULT_MISS) ] = 0, |
433 | }, | 434 | }, |
434 | [ C(OP_PREFETCH) ] = { | 435 | [ C(OP_PREFETCH) ] = { |
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids | |||
438 | }, | 439 | }, |
439 | [ C(DTLB) ] = { | 440 | [ C(DTLB) ] = { |
440 | [ C(OP_READ) ] = { | 441 | [ C(OP_READ) ] = { |
441 | [ C(RESULT_ACCESS) ] = 0, | 442 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
442 | [ C(RESULT_MISS) ] = 0, | 443 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ |
443 | }, | 444 | }, |
444 | [ C(OP_WRITE) ] = { | 445 | [ C(OP_WRITE) ] = { |
445 | [ C(RESULT_ACCESS) ] = 0, | 446 | [ C(RESULT_ACCESS) ] = 0, |
@@ -911,6 +912,8 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
911 | err = checking_wrmsrl(hwc->counter_base + idx, | 912 | err = checking_wrmsrl(hwc->counter_base + idx, |
912 | (u64)(-left) & x86_pmu.counter_mask); | 913 | (u64)(-left) & x86_pmu.counter_mask); |
913 | 914 | ||
915 | perf_counter_update_userpage(counter); | ||
916 | |||
914 | return ret; | 917 | return ret; |
915 | } | 918 | } |
916 | 919 | ||
@@ -968,13 +971,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | |||
968 | if (!x86_pmu.num_counters_fixed) | 971 | if (!x86_pmu.num_counters_fixed) |
969 | return -1; | 972 | return -1; |
970 | 973 | ||
971 | /* | ||
972 | * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: | ||
973 | */ | ||
974 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
975 | boot_cpu_data.x86_model == 28) | ||
976 | return -1; | ||
977 | |||
978 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | 974 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; |
979 | 975 | ||
980 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 976 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
@@ -1040,6 +1036,8 @@ try_generic: | |||
1040 | x86_perf_counter_set_period(counter, hwc, idx); | 1036 | x86_perf_counter_set_period(counter, hwc, idx); |
1041 | x86_pmu.enable(hwc, idx); | 1037 | x86_pmu.enable(hwc, idx); |
1042 | 1038 | ||
1039 | perf_counter_update_userpage(counter); | ||
1040 | |||
1043 | return 0; | 1041 | return 0; |
1044 | } | 1042 | } |
1045 | 1043 | ||
@@ -1132,6 +1130,8 @@ static void x86_pmu_disable(struct perf_counter *counter) | |||
1132 | x86_perf_counter_update(counter, hwc, idx); | 1130 | x86_perf_counter_update(counter, hwc, idx); |
1133 | cpuc->counters[idx] = NULL; | 1131 | cpuc->counters[idx] = NULL; |
1134 | clear_bit(idx, cpuc->used_mask); | 1132 | clear_bit(idx, cpuc->used_mask); |
1133 | |||
1134 | perf_counter_update_userpage(counter); | ||
1135 | } | 1135 | } |
1136 | 1136 | ||
1137 | /* | 1137 | /* |
@@ -1223,6 +1223,8 @@ again: | |||
1223 | if (!intel_pmu_save_and_restart(counter)) | 1223 | if (!intel_pmu_save_and_restart(counter)) |
1224 | continue; | 1224 | continue; |
1225 | 1225 | ||
1226 | data.period = counter->hw.last_period; | ||
1227 | |||
1226 | if (perf_counter_overflow(counter, 1, &data)) | 1228 | if (perf_counter_overflow(counter, 1, &data)) |
1227 | intel_pmu_disable_counter(&counter->hw, bit); | 1229 | intel_pmu_disable_counter(&counter->hw, bit); |
1228 | } | 1230 | } |
@@ -1425,8 +1427,6 @@ static int intel_pmu_init(void) | |||
1425 | */ | 1427 | */ |
1426 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | 1428 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
1427 | 1429 | ||
1428 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1429 | |||
1430 | /* | 1430 | /* |
1431 | * Install the hw-cache-events table: | 1431 | * Install the hw-cache-events table: |
1432 | */ | 1432 | */ |
@@ -1459,18 +1459,16 @@ static int intel_pmu_init(void) | |||
1459 | 1459 | ||
1460 | static int amd_pmu_init(void) | 1460 | static int amd_pmu_init(void) |
1461 | { | 1461 | { |
1462 | /* Performance-monitoring supported from K7 and later: */ | ||
1463 | if (boot_cpu_data.x86 < 6) | ||
1464 | return -ENODEV; | ||
1465 | |||
1462 | x86_pmu = amd_pmu; | 1466 | x86_pmu = amd_pmu; |
1463 | 1467 | ||
1464 | switch (boot_cpu_data.x86) { | 1468 | /* Events are common for all AMDs */ |
1465 | case 0x0f: | 1469 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
1466 | case 0x10: | 1470 | sizeof(hw_cache_event_ids)); |
1467 | case 0x11: | ||
1468 | memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, | ||
1469 | sizeof(hw_cache_event_ids)); | ||
1470 | 1471 | ||
1471 | pr_cont("AMD Family 0f/10/11 events, "); | ||
1472 | break; | ||
1473 | } | ||
1474 | return 0; | 1472 | return 0; |
1475 | } | 1473 | } |
1476 | 1474 | ||
@@ -1498,21 +1496,22 @@ void __init init_hw_perf_counters(void) | |||
1498 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1496 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1499 | 1497 | ||
1500 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1498 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
1501 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1502 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | 1499 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", |
1503 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | 1500 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
1501 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1504 | } | 1502 | } |
1505 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; | 1503 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; |
1506 | perf_max_counters = x86_pmu.num_counters; | 1504 | perf_max_counters = x86_pmu.num_counters; |
1507 | 1505 | ||
1508 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | 1506 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1509 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1510 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | 1507 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", |
1511 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | 1508 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
1509 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1512 | } | 1510 | } |
1513 | 1511 | ||
1514 | perf_counter_mask |= | 1512 | perf_counter_mask |= |
1515 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | 1513 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
1514 | x86_pmu.intel_ctrl = perf_counter_mask; | ||
1516 | 1515 | ||
1517 | perf_counters_lapic_init(); | 1516 | perf_counters_lapic_init(); |
1518 | register_die_notifier(&perf_counter_nmi_notifier); | 1517 | register_die_notifier(&perf_counter_nmi_notifier); |
@@ -1554,9 +1553,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
1554 | */ | 1553 | */ |
1555 | 1554 | ||
1556 | static inline | 1555 | static inline |
1557 | void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) | 1556 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) |
1558 | { | 1557 | { |
1559 | if (entry->nr < MAX_STACK_DEPTH) | 1558 | if (entry->nr < PERF_MAX_STACK_DEPTH) |
1560 | entry->ip[entry->nr++] = ip; | 1559 | entry->ip[entry->nr++] = ip; |
1561 | } | 1560 | } |
1562 | 1561 | ||
@@ -1577,8 +1576,8 @@ static void backtrace_warning(void *data, char *msg) | |||
1577 | 1576 | ||
1578 | static int backtrace_stack(void *data, char *name) | 1577 | static int backtrace_stack(void *data, char *name) |
1579 | { | 1578 | { |
1580 | /* Don't bother with IRQ stacks for now */ | 1579 | /* Process all stacks: */ |
1581 | return -1; | 1580 | return 0; |
1582 | } | 1581 | } |
1583 | 1582 | ||
1584 | static void backtrace_address(void *data, unsigned long addr, int reliable) | 1583 | static void backtrace_address(void *data, unsigned long addr, int reliable) |
@@ -1596,47 +1595,59 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1596 | .address = backtrace_address, | 1595 | .address = backtrace_address, |
1597 | }; | 1596 | }; |
1598 | 1597 | ||
1598 | #include "../dumpstack.h" | ||
1599 | |||
1599 | static void | 1600 | static void |
1600 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1601 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1601 | { | 1602 | { |
1602 | unsigned long bp; | 1603 | callchain_store(entry, PERF_CONTEXT_KERNEL); |
1603 | char *stack; | 1604 | callchain_store(entry, regs->ip); |
1604 | int nr = entry->nr; | ||
1605 | 1605 | ||
1606 | callchain_store(entry, instruction_pointer(regs)); | 1606 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1607 | } | ||
1607 | 1608 | ||
1608 | stack = ((char *)regs + sizeof(struct pt_regs)); | 1609 | /* |
1609 | #ifdef CONFIG_FRAME_POINTER | 1610 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
1610 | bp = frame_pointer(regs); | 1611 | */ |
1611 | #else | 1612 | static unsigned long |
1612 | bp = 0; | 1613 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) |
1613 | #endif | 1614 | { |
1615 | unsigned long offset, addr = (unsigned long)from; | ||
1616 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1617 | unsigned long size, len = 0; | ||
1618 | struct page *page; | ||
1619 | void *map; | ||
1620 | int ret; | ||
1614 | 1621 | ||
1615 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); | 1622 | do { |
1623 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1624 | if (!ret) | ||
1625 | break; | ||
1616 | 1626 | ||
1617 | entry->kernel = entry->nr - nr; | 1627 | offset = addr & (PAGE_SIZE - 1); |
1618 | } | 1628 | size = min(PAGE_SIZE - offset, n - len); |
1619 | 1629 | ||
1630 | map = kmap_atomic(page, type); | ||
1631 | memcpy(to, map+offset, size); | ||
1632 | kunmap_atomic(map, type); | ||
1633 | put_page(page); | ||
1620 | 1634 | ||
1621 | struct stack_frame { | 1635 | len += size; |
1622 | const void __user *next_fp; | 1636 | to += size; |
1623 | unsigned long return_address; | 1637 | addr += size; |
1624 | }; | 1638 | |
1639 | } while (len < n); | ||
1640 | |||
1641 | return len; | ||
1642 | } | ||
1625 | 1643 | ||
1626 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1644 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) |
1627 | { | 1645 | { |
1628 | int ret; | 1646 | unsigned long bytes; |
1629 | |||
1630 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
1631 | return 0; | ||
1632 | 1647 | ||
1633 | ret = 1; | 1648 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); |
1634 | pagefault_disable(); | ||
1635 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1636 | ret = 0; | ||
1637 | pagefault_enable(); | ||
1638 | 1649 | ||
1639 | return ret; | 1650 | return bytes == sizeof(*frame); |
1640 | } | 1651 | } |
1641 | 1652 | ||
1642 | static void | 1653 | static void |
@@ -1644,28 +1655,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1644 | { | 1655 | { |
1645 | struct stack_frame frame; | 1656 | struct stack_frame frame; |
1646 | const void __user *fp; | 1657 | const void __user *fp; |
1647 | int nr = entry->nr; | ||
1648 | 1658 | ||
1649 | regs = (struct pt_regs *)current->thread.sp0 - 1; | 1659 | if (!user_mode(regs)) |
1650 | fp = (void __user *)regs->bp; | 1660 | regs = task_pt_regs(current); |
1661 | |||
1662 | fp = (void __user *)regs->bp; | ||
1651 | 1663 | ||
1664 | callchain_store(entry, PERF_CONTEXT_USER); | ||
1652 | callchain_store(entry, regs->ip); | 1665 | callchain_store(entry, regs->ip); |
1653 | 1666 | ||
1654 | while (entry->nr < MAX_STACK_DEPTH) { | 1667 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1655 | frame.next_fp = NULL; | 1668 | frame.next_frame = NULL; |
1656 | frame.return_address = 0; | 1669 | frame.return_address = 0; |
1657 | 1670 | ||
1658 | if (!copy_stack_frame(fp, &frame)) | 1671 | if (!copy_stack_frame(fp, &frame)) |
1659 | break; | 1672 | break; |
1660 | 1673 | ||
1661 | if ((unsigned long)fp < user_stack_pointer(regs)) | 1674 | if ((unsigned long)fp < regs->sp) |
1662 | break; | 1675 | break; |
1663 | 1676 | ||
1664 | callchain_store(entry, frame.return_address); | 1677 | callchain_store(entry, frame.return_address); |
1665 | fp = frame.next_fp; | 1678 | fp = frame.next_frame; |
1666 | } | 1679 | } |
1667 | |||
1668 | entry->user = entry->nr - nr; | ||
1669 | } | 1680 | } |
1670 | 1681 | ||
1671 | static void | 1682 | static void |
@@ -1701,9 +1712,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1701 | entry = &__get_cpu_var(irq_entry); | 1712 | entry = &__get_cpu_var(irq_entry); |
1702 | 1713 | ||
1703 | entry->nr = 0; | 1714 | entry->nr = 0; |
1704 | entry->hv = 0; | ||
1705 | entry->kernel = 0; | ||
1706 | entry->user = 0; | ||
1707 | 1715 | ||
1708 | perf_do_callchain(regs, entry); | 1716 | perf_do_callchain(regs, entry); |
1709 | 1717 | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d6f5b9fbde32..5c481f6205bf 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void) | |||
716 | wd_ops = &k7_wd_ops; | 716 | wd_ops = &k7_wd_ops; |
717 | break; | 717 | break; |
718 | case X86_VENDOR_INTEL: | 718 | case X86_VENDOR_INTEL: |
719 | /* | 719 | /* Work around where perfctr1 doesn't have a working enable |
720 | * Work around Core Duo (Yonah) errata AE49 where perfctr1 | 720 | * bit as described in the following errata: |
721 | * doesn't have a working enable bit. | 721 | * AE49 Core Duo and Intel Core Solo 65 nm |
722 | * AN49 Intel Pentium Dual-Core | ||
723 | * AF49 Dual-Core Intel Xeon Processor LV | ||
722 | */ | 724 | */ |
723 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { | 725 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || |
726 | ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && | ||
727 | boot_cpu_data.x86_mask == 4))) { | ||
724 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; | 728 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; |
725 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; | 729 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; |
726 | } | 730 | } |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ff958248e61d..5e409dc298a4 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/cpu.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | #include <asm/iommu.h> | ||
30 | 31 | ||
31 | 32 | ||
32 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
103 | #ifdef CONFIG_HPET_TIMER | 104 | #ifdef CONFIG_HPET_TIMER |
104 | hpet_disable(); | 105 | hpet_disable(); |
105 | #endif | 106 | #endif |
107 | |||
108 | #ifdef CONFIG_X86_64 | ||
109 | pci_iommu_shutdown(); | ||
110 | #endif | ||
111 | |||
106 | crash_save_cpu(regs, safe_smp_processor_id()); | 112 | crash_save_cpu(regs, safe_smp_processor_id()); |
107 | } | 113 | } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 95ea5fa7d444..c8405718a4c3 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "dumpstack.h" | 22 | #include "dumpstack.h" |
23 | 23 | ||
24 | int panic_on_unrecovered_nmi; | 24 | int panic_on_unrecovered_nmi; |
25 | int panic_on_io_nmi; | ||
25 | unsigned int code_bytes = 64; | 26 | unsigned int code_bytes = 64; |
26 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | 27 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; |
27 | static int die_counter; | 28 | static int die_counter; |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7271fa33d791..c4ca89d9aaf4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -1383,6 +1383,8 @@ static unsigned long ram_alignment(resource_size_t pos) | |||
1383 | return 32*1024*1024; | 1383 | return 32*1024*1024; |
1384 | } | 1384 | } |
1385 | 1385 | ||
1386 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) | ||
1387 | |||
1386 | void __init e820_reserve_resources_late(void) | 1388 | void __init e820_reserve_resources_late(void) |
1387 | { | 1389 | { |
1388 | int i; | 1390 | int i; |
@@ -1400,17 +1402,19 @@ void __init e820_reserve_resources_late(void) | |||
1400 | * avoid stolen RAM: | 1402 | * avoid stolen RAM: |
1401 | */ | 1403 | */ |
1402 | for (i = 0; i < e820.nr_map; i++) { | 1404 | for (i = 0; i < e820.nr_map; i++) { |
1403 | struct e820entry *entry = &e820_saved.map[i]; | 1405 | struct e820entry *entry = &e820.map[i]; |
1404 | resource_size_t start, end; | 1406 | u64 start, end; |
1405 | 1407 | ||
1406 | if (entry->type != E820_RAM) | 1408 | if (entry->type != E820_RAM) |
1407 | continue; | 1409 | continue; |
1408 | start = entry->addr + entry->size; | 1410 | start = entry->addr + entry->size; |
1409 | end = round_up(start, ram_alignment(start)); | 1411 | end = round_up(start, ram_alignment(start)) - 1; |
1410 | if (start == end) | 1412 | if (end > MAX_RESOURCE_SIZE) |
1413 | end = MAX_RESOURCE_SIZE; | ||
1414 | if (start >= end) | ||
1411 | continue; | 1415 | continue; |
1412 | reserve_region_with_split(&iomem_resource, start, | 1416 | reserve_region_with_split(&iomem_resource, start, end, |
1413 | end - 1, "RAM buffer"); | 1417 | "RAM buffer"); |
1414 | } | 1418 | } |
1415 | } | 1419 | } |
1416 | 1420 | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1736acc4d7aa..96f7ac0bbf01 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void) | |||
240 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; | 240 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; |
241 | int e820_type; | 241 | int e820_type; |
242 | 242 | ||
243 | if (md->attribute & EFI_MEMORY_WB) | 243 | switch (md->type) { |
244 | e820_type = E820_RAM; | 244 | case EFI_LOADER_CODE: |
245 | else | 245 | case EFI_LOADER_DATA: |
246 | case EFI_BOOT_SERVICES_CODE: | ||
247 | case EFI_BOOT_SERVICES_DATA: | ||
248 | case EFI_CONVENTIONAL_MEMORY: | ||
249 | if (md->attribute & EFI_MEMORY_WB) | ||
250 | e820_type = E820_RAM; | ||
251 | else | ||
252 | e820_type = E820_RESERVED; | ||
253 | break; | ||
254 | case EFI_ACPI_RECLAIM_MEMORY: | ||
255 | e820_type = E820_ACPI; | ||
256 | break; | ||
257 | case EFI_ACPI_MEMORY_NVS: | ||
258 | e820_type = E820_NVS; | ||
259 | break; | ||
260 | case EFI_UNUSABLE_MEMORY: | ||
261 | e820_type = E820_UNUSABLE; | ||
262 | break; | ||
263 | default: | ||
264 | /* | ||
265 | * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE | ||
266 | * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO | ||
267 | * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE | ||
268 | */ | ||
246 | e820_type = E820_RESERVED; | 269 | e820_type = E820_RESERVED; |
270 | break; | ||
271 | } | ||
247 | e820_add_region(start, size, e820_type); | 272 | e820_add_region(start, size, e820_type); |
248 | } | 273 | } |
249 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 274 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c929add475c9..c097e7d607c6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -48,7 +48,6 @@ | |||
48 | #include <asm/segment.h> | 48 | #include <asm/segment.h> |
49 | #include <asm/smp.h> | 49 | #include <asm/smp.h> |
50 | #include <asm/page_types.h> | 50 | #include <asm/page_types.h> |
51 | #include <asm/desc.h> | ||
52 | #include <asm/percpu.h> | 51 | #include <asm/percpu.h> |
53 | #include <asm/dwarf2.h> | 52 | #include <asm/dwarf2.h> |
54 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
@@ -84,7 +83,7 @@ | |||
84 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | 83 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
85 | #else | 84 | #else |
86 | #define preempt_stop(clobbers) | 85 | #define preempt_stop(clobbers) |
87 | #define resume_kernel restore_nocheck | 86 | #define resume_kernel restore_all |
88 | #endif | 87 | #endif |
89 | 88 | ||
90 | .macro TRACE_IRQS_IRET | 89 | .macro TRACE_IRQS_IRET |
@@ -372,7 +371,7 @@ END(ret_from_exception) | |||
372 | ENTRY(resume_kernel) | 371 | ENTRY(resume_kernel) |
373 | DISABLE_INTERRUPTS(CLBR_ANY) | 372 | DISABLE_INTERRUPTS(CLBR_ANY) |
374 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | 373 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? |
375 | jnz restore_nocheck | 374 | jnz restore_all |
376 | need_resched: | 375 | need_resched: |
377 | movl TI_flags(%ebp), %ecx # need_resched set ? | 376 | movl TI_flags(%ebp), %ecx # need_resched set ? |
378 | testb $_TIF_NEED_RESCHED, %cl | 377 | testb $_TIF_NEED_RESCHED, %cl |
@@ -540,6 +539,8 @@ syscall_exit: | |||
540 | jne syscall_exit_work | 539 | jne syscall_exit_work |
541 | 540 | ||
542 | restore_all: | 541 | restore_all: |
542 | TRACE_IRQS_IRET | ||
543 | restore_all_notrace: | ||
543 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | 544 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
544 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we | 545 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we |
545 | # are returning to the kernel. | 546 | # are returning to the kernel. |
@@ -551,8 +552,6 @@ restore_all: | |||
551 | CFI_REMEMBER_STATE | 552 | CFI_REMEMBER_STATE |
552 | je ldt_ss # returning to user-space with LDT SS | 553 | je ldt_ss # returning to user-space with LDT SS |
553 | restore_nocheck: | 554 | restore_nocheck: |
554 | TRACE_IRQS_IRET | ||
555 | restore_nocheck_notrace: | ||
556 | RESTORE_REGS 4 # skip orig_eax/error_code | 555 | RESTORE_REGS 4 # skip orig_eax/error_code |
557 | CFI_ADJUST_CFA_OFFSET -4 | 556 | CFI_ADJUST_CFA_OFFSET -4 |
558 | irq_return: | 557 | irq_return: |
@@ -588,22 +587,34 @@ ldt_ss: | |||
588 | jne restore_nocheck | 587 | jne restore_nocheck |
589 | #endif | 588 | #endif |
590 | 589 | ||
591 | /* If returning to userspace with 16bit stack, | 590 | /* |
592 | * try to fix the higher word of ESP, as the CPU | 591 | * Setup and switch to ESPFIX stack |
593 | * won't restore it. | 592 | * |
594 | * This is an "official" bug of all the x86-compatible | 593 | * We're returning to userspace with a 16 bit stack. The CPU will not |
595 | * CPUs, which we can try to work around to make | 594 | * restore the high word of ESP for us on executing iret... This is an |
596 | * dosemu and wine happy. */ | 595 | * "official" bug of all the x86-compatible CPUs, which we can work |
597 | movl PT_OLDESP(%esp), %eax | 596 | * around to make dosemu and wine happy. We do this by preloading the |
598 | movl %esp, %edx | 597 | * high word of ESP with the high word of the userspace ESP while |
599 | call patch_espfix_desc | 598 | * compensating for the offset by changing to the ESPFIX segment with |
599 | * a base address that matches for the difference. | ||
600 | */ | ||
601 | mov %esp, %edx /* load kernel esp */ | ||
602 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ | ||
603 | mov %dx, %ax /* eax: new kernel esp */ | ||
604 | sub %eax, %edx /* offset (low word is 0) */ | ||
605 | PER_CPU(gdt_page, %ebx) | ||
606 | shr $16, %edx | ||
607 | mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ | ||
608 | mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ | ||
600 | pushl $__ESPFIX_SS | 609 | pushl $__ESPFIX_SS |
601 | CFI_ADJUST_CFA_OFFSET 4 | 610 | CFI_ADJUST_CFA_OFFSET 4 |
602 | pushl %eax | 611 | push %eax /* new kernel esp */ |
603 | CFI_ADJUST_CFA_OFFSET 4 | 612 | CFI_ADJUST_CFA_OFFSET 4 |
613 | /* Disable interrupts, but do not irqtrace this section: we | ||
614 | * will soon execute iret and the tracer was already set to | ||
615 | * the irqstate after the iret */ | ||
604 | DISABLE_INTERRUPTS(CLBR_EAX) | 616 | DISABLE_INTERRUPTS(CLBR_EAX) |
605 | TRACE_IRQS_OFF | 617 | lss (%esp), %esp /* switch to espfix segment */ |
606 | lss (%esp), %esp | ||
607 | CFI_ADJUST_CFA_OFFSET -8 | 618 | CFI_ADJUST_CFA_OFFSET -8 |
608 | jmp restore_nocheck | 619 | jmp restore_nocheck |
609 | CFI_ENDPROC | 620 | CFI_ENDPROC |
@@ -716,15 +727,24 @@ PTREGSCALL(vm86) | |||
716 | PTREGSCALL(vm86old) | 727 | PTREGSCALL(vm86old) |
717 | 728 | ||
718 | .macro FIXUP_ESPFIX_STACK | 729 | .macro FIXUP_ESPFIX_STACK |
719 | /* since we are on a wrong stack, we cant make it a C code :( */ | 730 | /* |
731 | * Switch back for ESPFIX stack to the normal zerobased stack | ||
732 | * | ||
733 | * We can't call C functions using the ESPFIX stack. This code reads | ||
734 | * the high word of the segment base from the GDT and swiches to the | ||
735 | * normal stack and adjusts ESP with the matching offset. | ||
736 | */ | ||
737 | /* fixup the stack */ | ||
720 | PER_CPU(gdt_page, %ebx) | 738 | PER_CPU(gdt_page, %ebx) |
721 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | 739 | mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ |
722 | addl %esp, %eax | 740 | mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ |
741 | shl $16, %eax | ||
742 | addl %esp, %eax /* the adjusted stack pointer */ | ||
723 | pushl $__KERNEL_DS | 743 | pushl $__KERNEL_DS |
724 | CFI_ADJUST_CFA_OFFSET 4 | 744 | CFI_ADJUST_CFA_OFFSET 4 |
725 | pushl %eax | 745 | pushl %eax |
726 | CFI_ADJUST_CFA_OFFSET 4 | 746 | CFI_ADJUST_CFA_OFFSET 4 |
727 | lss (%esp), %esp | 747 | lss (%esp), %esp /* switch to the normal stack segment */ |
728 | CFI_ADJUST_CFA_OFFSET -8 | 748 | CFI_ADJUST_CFA_OFFSET -8 |
729 | .endm | 749 | .endm |
730 | .macro UNWIND_ESPFIX_STACK | 750 | .macro UNWIND_ESPFIX_STACK |
@@ -1154,6 +1174,7 @@ ENTRY(ftrace_graph_caller) | |||
1154 | pushl %edx | 1174 | pushl %edx |
1155 | movl 0xc(%esp), %edx | 1175 | movl 0xc(%esp), %edx |
1156 | lea 0x4(%ebp), %eax | 1176 | lea 0x4(%ebp), %eax |
1177 | movl (%ebp), %ecx | ||
1157 | subl $MCOUNT_INSN_SIZE, %edx | 1178 | subl $MCOUNT_INSN_SIZE, %edx |
1158 | call prepare_ftrace_return | 1179 | call prepare_ftrace_return |
1159 | popl %edx | 1180 | popl %edx |
@@ -1168,6 +1189,7 @@ return_to_handler: | |||
1168 | pushl %eax | 1189 | pushl %eax |
1169 | pushl %ecx | 1190 | pushl %ecx |
1170 | pushl %edx | 1191 | pushl %edx |
1192 | movl %ebp, %eax | ||
1171 | call ftrace_return_to_handler | 1193 | call ftrace_return_to_handler |
1172 | movl %eax, 0xc(%esp) | 1194 | movl %eax, 0xc(%esp) |
1173 | popl %edx | 1195 | popl %edx |
@@ -1329,7 +1351,7 @@ nmi_stack_correct: | |||
1329 | xorl %edx,%edx # zero error code | 1351 | xorl %edx,%edx # zero error code |
1330 | movl %esp,%eax # pt_regs pointer | 1352 | movl %esp,%eax # pt_regs pointer |
1331 | call do_nmi | 1353 | call do_nmi |
1332 | jmp restore_nocheck_notrace | 1354 | jmp restore_all_notrace |
1333 | CFI_ENDPROC | 1355 | CFI_ENDPROC |
1334 | 1356 | ||
1335 | nmi_stack_fixup: | 1357 | nmi_stack_fixup: |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index de74f0a3e0ed..c251be745107 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller) | |||
135 | 135 | ||
136 | leaq 8(%rbp), %rdi | 136 | leaq 8(%rbp), %rdi |
137 | movq 0x38(%rsp), %rsi | 137 | movq 0x38(%rsp), %rsi |
138 | movq (%rbp), %rdx | ||
138 | subq $MCOUNT_INSN_SIZE, %rsi | 139 | subq $MCOUNT_INSN_SIZE, %rsi |
139 | 140 | ||
140 | call prepare_ftrace_return | 141 | call prepare_ftrace_return |
@@ -150,6 +151,7 @@ GLOBAL(return_to_handler) | |||
150 | /* Save the return values */ | 151 | /* Save the return values */ |
151 | movq %rax, (%rsp) | 152 | movq %rax, (%rsp) |
152 | movq %rdx, 8(%rsp) | 153 | movq %rdx, 8(%rsp) |
154 | movq %rbp, %rdi | ||
153 | 155 | ||
154 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
155 | 157 | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b79c5533c421..d94e1ea3b9fe 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void) | |||
408 | * Hook the return address and push it in the stack of return addrs | 408 | * Hook the return address and push it in the stack of return addrs |
409 | * in current thread info. | 409 | * in current thread info. |
410 | */ | 410 | */ |
411 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | 411 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, |
412 | unsigned long frame_pointer) | ||
412 | { | 413 | { |
413 | unsigned long old; | 414 | unsigned long old; |
414 | int faulted; | 415 | int faulted; |
@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
453 | return; | 454 | return; |
454 | } | 455 | } |
455 | 456 | ||
456 | if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { | 457 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, |
458 | frame_pointer) == -EBUSY) { | ||
457 | *parent = old; | 459 | *parent = old; |
458 | return; | 460 | return; |
459 | } | 461 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index dc5ed4bdd88d..8663afb56535 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <asm/segment.h> | 13 | #include <asm/segment.h> |
14 | #include <asm/page_types.h> | 14 | #include <asm/page_types.h> |
15 | #include <asm/pgtable_types.h> | 15 | #include <asm/pgtable_types.h> |
16 | #include <asm/desc.h> | ||
17 | #include <asm/cache.h> | 16 | #include <asm/cache.h> |
18 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
19 | #include <asm/asm-offsets.h> | 18 | #include <asm/asm-offsets.h> |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 54b29bb24e71..fa54f78e2a05 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/linkage.h> | 12 | #include <linux/linkage.h> |
13 | #include <linux/threads.h> | 13 | #include <linux/threads.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <asm/desc.h> | ||
16 | #include <asm/segment.h> | 15 | #include <asm/segment.h> |
17 | #include <asm/pgtable.h> | 16 | #include <asm/pgtable.h> |
18 | #include <asm/page.h> | 17 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 81408b93f887..dedc2bddf7a5 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
510 | { | 510 | { |
511 | 511 | ||
512 | if (request_irq(dev->irq, hpet_interrupt_handler, | 512 | if (request_irq(dev->irq, hpet_interrupt_handler, |
513 | IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) | 513 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, |
514 | dev->name, dev)) | ||
514 | return -1; | 515 | return -1; |
515 | 516 | ||
516 | disable_irq(dev->irq); | 517 | disable_irq(dev->irq); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 745579bc8256..1a041bcf506b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -32,6 +32,8 @@ int no_iommu __read_mostly; | |||
32 | /* Set this to 1 if there is a HW IOMMU in the system */ | 32 | /* Set this to 1 if there is a HW IOMMU in the system */ |
33 | int iommu_detected __read_mostly = 0; | 33 | int iommu_detected __read_mostly = 0; |
34 | 34 | ||
35 | int iommu_pass_through; | ||
36 | |||
35 | dma_addr_t bad_dma_address __read_mostly = 0; | 37 | dma_addr_t bad_dma_address __read_mostly = 0; |
36 | EXPORT_SYMBOL(bad_dma_address); | 38 | EXPORT_SYMBOL(bad_dma_address); |
37 | 39 | ||
@@ -210,6 +212,10 @@ static __init int iommu_setup(char *p) | |||
210 | if (!strncmp(p, "soft", 4)) | 212 | if (!strncmp(p, "soft", 4)) |
211 | swiotlb = 1; | 213 | swiotlb = 1; |
212 | #endif | 214 | #endif |
215 | if (!strncmp(p, "pt", 2)) { | ||
216 | iommu_pass_through = 1; | ||
217 | return 1; | ||
218 | } | ||
213 | 219 | ||
214 | gart_parse_options(p); | 220 | gart_parse_options(p); |
215 | 221 | ||
@@ -290,6 +296,8 @@ static int __init pci_iommu_init(void) | |||
290 | void pci_iommu_shutdown(void) | 296 | void pci_iommu_shutdown(void) |
291 | { | 297 | { |
292 | gart_iommu_shutdown(); | 298 | gart_iommu_shutdown(); |
299 | |||
300 | amd_iommu_shutdown(); | ||
293 | } | 301 | } |
294 | /* Must execute after PCI subsystem */ | 302 | /* Must execute after PCI subsystem */ |
295 | fs_initcall(pci_iommu_init); | 303 | fs_initcall(pci_iommu_init); |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index a1712f2b50f1..6af96ee44200 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void) | |||
71 | { | 71 | { |
72 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 72 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) | 74 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || |
75 | iommu_pass_through) | ||
75 | swiotlb = 1; | 76 | swiotlb = 1; |
76 | #endif | 77 | #endif |
77 | if (swiotlb_force) | 78 | if (swiotlb_force) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index be5ae80f897f..de2cab132844 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align) | |||
289 | return ret; | 289 | return ret; |
290 | } | 290 | } |
291 | 291 | ||
292 | #ifdef CONFIG_X86_64 | ||
293 | static void __init init_gbpages(void) | ||
294 | { | ||
295 | if (direct_gbpages && cpu_has_gbpages) | ||
296 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
297 | else | ||
298 | direct_gbpages = 0; | ||
299 | } | ||
300 | #else | ||
301 | static inline void init_gbpages(void) | ||
302 | { | ||
303 | } | ||
304 | #endif | ||
305 | |||
292 | static void __init reserve_brk(void) | 306 | static void __init reserve_brk(void) |
293 | { | 307 | { |
294 | if (_brk_end > _brk_start) | 308 | if (_brk_end > _brk_start) |
@@ -871,6 +885,8 @@ void __init setup_arch(char **cmdline_p) | |||
871 | 885 | ||
872 | reserve_brk(); | 886 | reserve_brk(); |
873 | 887 | ||
888 | init_gbpages(); | ||
889 | |||
874 | /* max_pfn_mapped is updated here */ | 890 | /* max_pfn_mapped is updated here */ |
875 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | 891 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); |
876 | max_pfn_mapped = max_low_pfn_mapped; | 892 | max_pfn_mapped = max_low_pfn_mapped; |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9c3f0823e6aa..29a3eef7cf4a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
124 | } | 124 | } |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * Remap allocator | 127 | * Large page remap allocator |
128 | * | 128 | * |
129 | * This allocator uses PMD page as unit. A PMD page is allocated for | 129 | * This allocator uses PMD page as unit. A PMD page is allocated for |
130 | * each cpu and each is remapped into vmalloc area using PMD mapping. | 130 | * each cpu and each is remapped into vmalloc area using PMD mapping. |
@@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
137 | * better than only using 4k mappings while still being NUMA friendly. | 137 | * better than only using 4k mappings while still being NUMA friendly. |
138 | */ | 138 | */ |
139 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 139 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
140 | static size_t pcpur_size __initdata; | 140 | struct pcpul_ent { |
141 | static void **pcpur_ptrs __initdata; | 141 | unsigned int cpu; |
142 | void *ptr; | ||
143 | }; | ||
144 | |||
145 | static size_t pcpul_size; | ||
146 | static struct pcpul_ent *pcpul_map; | ||
147 | static struct vm_struct pcpul_vm; | ||
142 | 148 | ||
143 | static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) | 149 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) |
144 | { | 150 | { |
145 | size_t off = (size_t)pageno << PAGE_SHIFT; | 151 | size_t off = (size_t)pageno << PAGE_SHIFT; |
146 | 152 | ||
147 | if (off >= pcpur_size) | 153 | if (off >= pcpul_size) |
148 | return NULL; | 154 | return NULL; |
149 | 155 | ||
150 | return virt_to_page(pcpur_ptrs[cpu] + off); | 156 | return virt_to_page(pcpul_map[cpu].ptr + off); |
151 | } | 157 | } |
152 | 158 | ||
153 | static ssize_t __init setup_pcpu_remap(size_t static_size) | 159 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
154 | { | 160 | { |
155 | static struct vm_struct vm; | 161 | size_t map_size, dyn_size; |
156 | size_t ptrs_size, dyn_size; | ||
157 | unsigned int cpu; | 162 | unsigned int cpu; |
163 | int i, j; | ||
158 | ssize_t ret; | 164 | ssize_t ret; |
159 | 165 | ||
160 | /* | 166 | if (!chosen) { |
161 | * If large page isn't supported, there's no benefit in doing | 167 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
162 | * this. Also, on non-NUMA, embedding is better. | 168 | size_t tot_size = num_possible_cpus() * PMD_SIZE; |
163 | * | 169 | |
164 | * NOTE: disabled for now. | 170 | /* on non-NUMA, embedding is better */ |
165 | */ | 171 | if (!pcpu_need_numa()) |
166 | if (true || !cpu_has_pse || !pcpu_need_numa()) | 172 | return -EINVAL; |
173 | |||
174 | /* don't consume more than 20% of vmalloc area */ | ||
175 | if (tot_size > vm_size / 5) { | ||
176 | pr_info("PERCPU: too large chunk size %zuMB for " | ||
177 | "large page remap\n", tot_size >> 20); | ||
178 | return -EINVAL; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* need PSE */ | ||
183 | if (!cpu_has_pse) { | ||
184 | pr_warning("PERCPU: lpage allocator requires PSE\n"); | ||
167 | return -EINVAL; | 185 | return -EINVAL; |
186 | } | ||
168 | 187 | ||
169 | /* | 188 | /* |
170 | * Currently supports only single page. Supporting multiple | 189 | * Currently supports only single page. Supporting multiple |
171 | * pages won't be too difficult if it ever becomes necessary. | 190 | * pages won't be too difficult if it ever becomes necessary. |
172 | */ | 191 | */ |
173 | pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | 192 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + |
174 | PERCPU_DYNAMIC_RESERVE); | 193 | PERCPU_DYNAMIC_RESERVE); |
175 | if (pcpur_size > PMD_SIZE) { | 194 | if (pcpul_size > PMD_SIZE) { |
176 | pr_warning("PERCPU: static data is larger than large page, " | 195 | pr_warning("PERCPU: static data is larger than large page, " |
177 | "can't use large page\n"); | 196 | "can't use large page\n"); |
178 | return -EINVAL; | 197 | return -EINVAL; |
179 | } | 198 | } |
180 | dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | 199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; |
181 | 200 | ||
182 | /* allocate pointer array and alloc large pages */ | 201 | /* allocate pointer array and alloc large pages */ |
183 | ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); | 202 | map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); |
184 | pcpur_ptrs = alloc_bootmem(ptrs_size); | 203 | pcpul_map = alloc_bootmem(map_size); |
185 | 204 | ||
186 | for_each_possible_cpu(cpu) { | 205 | for_each_possible_cpu(cpu) { |
187 | pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); | 206 | pcpul_map[cpu].cpu = cpu; |
188 | if (!pcpur_ptrs[cpu]) | 207 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, |
208 | PMD_SIZE); | ||
209 | if (!pcpul_map[cpu].ptr) { | ||
210 | pr_warning("PERCPU: failed to allocate large page " | ||
211 | "for cpu%u\n", cpu); | ||
189 | goto enomem; | 212 | goto enomem; |
213 | } | ||
190 | 214 | ||
191 | /* | 215 | /* |
192 | * Only use pcpur_size bytes and give back the rest. | 216 | * Only use pcpul_size bytes and give back the rest. |
193 | * | 217 | * |
194 | * Ingo: The 2MB up-rounding bootmem is needed to make | 218 | * Ingo: The 2MB up-rounding bootmem is needed to make |
195 | * sure the partial 2MB page is still fully RAM - it's | 219 | * sure the partial 2MB page is still fully RAM - it's |
196 | * not well-specified to have a PAT-incompatible area | 220 | * not well-specified to have a PAT-incompatible area |
197 | * (unmapped RAM, device memory, etc.) in that hole. | 221 | * (unmapped RAM, device memory, etc.) in that hole. |
198 | */ | 222 | */ |
199 | free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), | 223 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), |
200 | PMD_SIZE - pcpur_size); | 224 | PMD_SIZE - pcpul_size); |
201 | 225 | ||
202 | memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); | 226 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); |
203 | } | 227 | } |
204 | 228 | ||
205 | /* allocate address and map */ | 229 | /* allocate address and map */ |
206 | vm.flags = VM_ALLOC; | 230 | pcpul_vm.flags = VM_ALLOC; |
207 | vm.size = num_possible_cpus() * PMD_SIZE; | 231 | pcpul_vm.size = num_possible_cpus() * PMD_SIZE; |
208 | vm_area_register_early(&vm, PMD_SIZE); | 232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); |
209 | 233 | ||
210 | for_each_possible_cpu(cpu) { | 234 | for_each_possible_cpu(cpu) { |
211 | pmd_t *pmd; | 235 | pmd_t *pmd, pmd_v; |
212 | 236 | ||
213 | pmd = populate_extra_pmd((unsigned long)vm.addr | 237 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + |
214 | + cpu * PMD_SIZE); | 238 | cpu * PMD_SIZE); |
215 | set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), | 239 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), |
216 | PAGE_KERNEL_LARGE)); | 240 | PAGE_KERNEL_LARGE); |
241 | set_pmd(pmd, pmd_v); | ||
217 | } | 242 | } |
218 | 243 | ||
219 | /* we're ready, commit */ | 244 | /* we're ready, commit */ |
220 | pr_info("PERCPU: Remapped at %p with large pages, static data " | 245 | pr_info("PERCPU: Remapped at %p with large pages, static data " |
221 | "%zu bytes\n", vm.addr, static_size); | 246 | "%zu bytes\n", pcpul_vm.addr, static_size); |
222 | 247 | ||
223 | ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, | 248 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, |
224 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | 249 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, |
225 | PMD_SIZE, vm.addr, NULL); | 250 | PMD_SIZE, pcpul_vm.addr, NULL); |
226 | goto out_free_ar; | 251 | |
252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
253 | for (i = 0; i < num_possible_cpus() - 1; i++) | ||
254 | for (j = i + 1; j < num_possible_cpus(); j++) | ||
255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
256 | struct pcpul_ent tmp = pcpul_map[i]; | ||
257 | pcpul_map[i] = pcpul_map[j]; | ||
258 | pcpul_map[j] = tmp; | ||
259 | } | ||
260 | |||
261 | return ret; | ||
227 | 262 | ||
228 | enomem: | 263 | enomem: |
229 | for_each_possible_cpu(cpu) | 264 | for_each_possible_cpu(cpu) |
230 | if (pcpur_ptrs[cpu]) | 265 | if (pcpul_map[cpu].ptr) |
231 | free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); | 266 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); |
232 | ret = -ENOMEM; | 267 | free_bootmem(__pa(pcpul_map), map_size); |
233 | out_free_ar: | 268 | return -ENOMEM; |
234 | free_bootmem(__pa(pcpur_ptrs), ptrs_size); | 269 | } |
235 | return ret; | 270 | |
271 | /** | ||
272 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
273 | * @kaddr: the kernel address in question | ||
274 | * | ||
275 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
276 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
277 | * mapping such that the same physical page is not mapped under | ||
278 | * different attributes. | ||
279 | * | ||
280 | * The recycled area is always at the tail of a partially used PMD | ||
281 | * page. | ||
282 | * | ||
283 | * RETURNS: | ||
284 | * Address of corresponding remapped pcpu address if match is found; | ||
285 | * otherwise, NULL. | ||
286 | */ | ||
287 | void *pcpu_lpage_remapped(void *kaddr) | ||
288 | { | ||
289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | ||
290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | ||
291 | int left = 0, right = num_possible_cpus() - 1; | ||
292 | int pos; | ||
293 | |||
294 | /* pcpul in use at all? */ | ||
295 | if (!pcpul_map) | ||
296 | return NULL; | ||
297 | |||
298 | /* okay, perform binary search */ | ||
299 | while (left <= right) { | ||
300 | pos = (left + right) / 2; | ||
301 | |||
302 | if (pcpul_map[pos].ptr < pmd_addr) | ||
303 | left = pos + 1; | ||
304 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
305 | right = pos - 1; | ||
306 | else { | ||
307 | /* it shouldn't be in the area for the first chunk */ | ||
308 | WARN_ON(offset < pcpul_size); | ||
309 | |||
310 | return pcpul_vm.addr + | ||
311 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
312 | } | ||
313 | } | ||
314 | |||
315 | return NULL; | ||
236 | } | 316 | } |
237 | #else | 317 | #else |
238 | static ssize_t __init setup_pcpu_remap(size_t static_size) | 318 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
239 | { | 319 | { |
240 | return -EINVAL; | 320 | return -EINVAL; |
241 | } | 321 | } |
@@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) | |||
249 | * mapping so that it can use PMD mapping without additional TLB | 329 | * mapping so that it can use PMD mapping without additional TLB |
250 | * pressure. | 330 | * pressure. |
251 | */ | 331 | */ |
252 | static ssize_t __init setup_pcpu_embed(size_t static_size) | 332 | static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) |
253 | { | 333 | { |
254 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | 334 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
255 | 335 | ||
@@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) | |||
258 | * this. Also, embedding allocation doesn't play well with | 338 | * this. Also, embedding allocation doesn't play well with |
259 | * NUMA. | 339 | * NUMA. |
260 | */ | 340 | */ |
261 | if (!cpu_has_pse || pcpu_need_numa()) | 341 | if (!chosen && (!cpu_has_pse || pcpu_need_numa())) |
262 | return -EINVAL; | 342 | return -EINVAL; |
263 | 343 | ||
264 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, | 344 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, |
@@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
308 | void *ptr; | 388 | void *ptr; |
309 | 389 | ||
310 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); | 390 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); |
311 | if (!ptr) | 391 | if (!ptr) { |
392 | pr_warning("PERCPU: failed to allocate " | ||
393 | "4k page for cpu%u\n", cpu); | ||
312 | goto enomem; | 394 | goto enomem; |
395 | } | ||
313 | 396 | ||
314 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | 397 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); |
315 | pcpu4k_pages[j++] = virt_to_page(ptr); | 398 | pcpu4k_pages[j++] = virt_to_page(ptr); |
@@ -333,6 +416,16 @@ out_free_ar: | |||
333 | return ret; | 416 | return ret; |
334 | } | 417 | } |
335 | 418 | ||
419 | /* for explicit first chunk allocator selection */ | ||
420 | static char pcpu_chosen_alloc[16] __initdata; | ||
421 | |||
422 | static int __init percpu_alloc_setup(char *str) | ||
423 | { | ||
424 | strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); | ||
425 | return 0; | ||
426 | } | ||
427 | early_param("percpu_alloc", percpu_alloc_setup); | ||
428 | |||
336 | static inline void setup_percpu_segment(int cpu) | 429 | static inline void setup_percpu_segment(int cpu) |
337 | { | 430 | { |
338 | #ifdef CONFIG_X86_32 | 431 | #ifdef CONFIG_X86_32 |
@@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu) | |||
346 | #endif | 439 | #endif |
347 | } | 440 | } |
348 | 441 | ||
349 | /* | ||
350 | * Great future plan: | ||
351 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | ||
352 | * Always point %gs to its beginning | ||
353 | */ | ||
354 | void __init setup_per_cpu_areas(void) | 442 | void __init setup_per_cpu_areas(void) |
355 | { | 443 | { |
356 | size_t static_size = __per_cpu_end - __per_cpu_start; | 444 | size_t static_size = __per_cpu_end - __per_cpu_start; |
@@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void) | |||
367 | * of large page mappings. Please read comments on top of | 455 | * of large page mappings. Please read comments on top of |
368 | * each allocator for details. | 456 | * each allocator for details. |
369 | */ | 457 | */ |
370 | ret = setup_pcpu_remap(static_size); | 458 | ret = -EINVAL; |
371 | if (ret < 0) | 459 | if (strlen(pcpu_chosen_alloc)) { |
372 | ret = setup_pcpu_embed(static_size); | 460 | if (strcmp(pcpu_chosen_alloc, "4k")) { |
461 | if (!strcmp(pcpu_chosen_alloc, "lpage")) | ||
462 | ret = setup_pcpu_lpage(static_size, true); | ||
463 | else if (!strcmp(pcpu_chosen_alloc, "embed")) | ||
464 | ret = setup_pcpu_embed(static_size, true); | ||
465 | else | ||
466 | pr_warning("PERCPU: unknown allocator %s " | ||
467 | "specified\n", pcpu_chosen_alloc); | ||
468 | if (ret < 0) | ||
469 | pr_warning("PERCPU: %s allocator failed (%zd), " | ||
470 | "falling back to 4k\n", | ||
471 | pcpu_chosen_alloc, ret); | ||
472 | } | ||
473 | } else { | ||
474 | ret = setup_pcpu_lpage(static_size, false); | ||
475 | if (ret < 0) | ||
476 | ret = setup_pcpu_embed(static_size, false); | ||
477 | } | ||
373 | if (ret < 0) | 478 | if (ret < 0) |
374 | ret = setup_pcpu_4k(static_size); | 479 | ret = setup_pcpu_4k(static_size); |
375 | if (ret < 0) | 480 | if (ret < 0) |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 124d40c575df..8ccabb8a2f6a 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode) | |||
711 | unsigned long pa; | 711 | unsigned long pa; |
712 | unsigned long m; | 712 | unsigned long m; |
713 | unsigned long n; | 713 | unsigned long n; |
714 | unsigned long mmr_image; | ||
715 | struct bau_desc *adp; | 714 | struct bau_desc *adp; |
716 | struct bau_desc *ad2; | 715 | struct bau_desc *ad2; |
717 | 716 | ||
@@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode) | |||
727 | n = pa >> uv_nshift; | 726 | n = pa >> uv_nshift; |
728 | m = pa & uv_mmask; | 727 | m = pa & uv_mmask; |
729 | 728 | ||
730 | mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); | 729 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
731 | if (mmr_image) { | 730 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
732 | uv_write_global_mmr64(pnode, (unsigned long) | ||
733 | UVH_LB_BAU_SB_DESCRIPTOR_BASE, | ||
734 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | ||
735 | } | ||
736 | 731 | ||
737 | /* | 732 | /* |
738 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 733 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5f935f0d5861..5204332f475d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <asm/traps.h> | 54 | #include <asm/traps.h> |
55 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
56 | #include <asm/i387.h> | 56 | #include <asm/i387.h> |
57 | #include <asm/mce.h> | ||
57 | 58 | ||
58 | #include <asm/mach_traps.h> | 59 | #include <asm/mach_traps.h> |
59 | 60 | ||
@@ -65,8 +66,6 @@ | |||
65 | #include <asm/setup.h> | 66 | #include <asm/setup.h> |
66 | #include <asm/traps.h> | 67 | #include <asm/traps.h> |
67 | 68 | ||
68 | #include "cpu/mcheck/mce.h" | ||
69 | |||
70 | asmlinkage int system_call(void); | 69 | asmlinkage int system_call(void); |
71 | 70 | ||
72 | /* Do we ignore FPU interrupts ? */ | 71 | /* Do we ignore FPU interrupts ? */ |
@@ -347,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
347 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); | 346 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); |
348 | show_registers(regs); | 347 | show_registers(regs); |
349 | 348 | ||
349 | if (panic_on_io_nmi) | ||
350 | panic("NMI IOCK error: Not continuing"); | ||
351 | |||
350 | /* Re-enable the IOCK line, wait for a few seconds */ | 352 | /* Re-enable the IOCK line, wait for a few seconds */ |
351 | reason = (reason & 0xf) | 8; | 353 | reason = (reason & 0xf) | 8; |
352 | outb(reason, 0x61); | 354 | outb(reason, 0x61); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b0597ad02c93..6e1a368d21d4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -590,22 +590,26 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); | |||
590 | */ | 590 | */ |
591 | 591 | ||
592 | DEFINE_PER_CPU(unsigned long, cyc2ns); | 592 | DEFINE_PER_CPU(unsigned long, cyc2ns); |
593 | DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
593 | 594 | ||
594 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | 595 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) |
595 | { | 596 | { |
596 | unsigned long long tsc_now, ns_now; | 597 | unsigned long long tsc_now, ns_now, *offset; |
597 | unsigned long flags, *scale; | 598 | unsigned long flags, *scale; |
598 | 599 | ||
599 | local_irq_save(flags); | 600 | local_irq_save(flags); |
600 | sched_clock_idle_sleep_event(); | 601 | sched_clock_idle_sleep_event(); |
601 | 602 | ||
602 | scale = &per_cpu(cyc2ns, cpu); | 603 | scale = &per_cpu(cyc2ns, cpu); |
604 | offset = &per_cpu(cyc2ns_offset, cpu); | ||
603 | 605 | ||
604 | rdtscll(tsc_now); | 606 | rdtscll(tsc_now); |
605 | ns_now = __cycles_2_ns(tsc_now); | 607 | ns_now = __cycles_2_ns(tsc_now); |
606 | 608 | ||
607 | if (cpu_khz) | 609 | if (cpu_khz) { |
608 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | 610 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; |
611 | *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); | ||
612 | } | ||
609 | 613 | ||
610 | sched_clock_idle_wakeup_event(0); | 614 | sched_clock_idle_wakeup_event(0); |
611 | local_irq_restore(flags); | 615 | local_irq_restore(flags); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5c3d6e81a7dc..7030b5f911bf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2157,7 +2157,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2157 | else | 2157 | else |
2158 | /* 32 bits PSE 4MB page */ | 2158 | /* 32 bits PSE 4MB page */ |
2159 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2159 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
2160 | context->rsvd_bits_mask[1][0] = ~0ull; | 2160 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; |
2161 | break; | 2161 | break; |
2162 | case PT32E_ROOT_LEVEL: | 2162 | case PT32E_ROOT_LEVEL: |
2163 | context->rsvd_bits_mask[0][2] = | 2163 | context->rsvd_bits_mask[0][2] = |
@@ -2170,7 +2170,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2170 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2170 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2171 | rsvd_bits(maxphyaddr, 62) | | 2171 | rsvd_bits(maxphyaddr, 62) | |
2172 | rsvd_bits(13, 20); /* large page */ | 2172 | rsvd_bits(13, 20); /* large page */ |
2173 | context->rsvd_bits_mask[1][0] = ~0ull; | 2173 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; |
2174 | break; | 2174 | break; |
2175 | case PT64_ROOT_LEVEL: | 2175 | case PT64_ROOT_LEVEL: |
2176 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2176 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
@@ -2186,7 +2186,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2186 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2186 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2187 | rsvd_bits(maxphyaddr, 51) | | 2187 | rsvd_bits(maxphyaddr, 51) | |
2188 | rsvd_bits(13, 20); /* large page */ | 2188 | rsvd_bits(13, 20); /* large page */ |
2189 | context->rsvd_bits_mask[1][0] = ~0ull; | 2189 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; |
2190 | break; | 2190 | break; |
2191 | } | 2191 | } |
2192 | } | 2192 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 258e4591e1ca..67785f635399 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -281,7 +281,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
281 | { | 281 | { |
282 | unsigned access = gw->pt_access; | 282 | unsigned access = gw->pt_access; |
283 | struct kvm_mmu_page *shadow_page; | 283 | struct kvm_mmu_page *shadow_page; |
284 | u64 spte, *sptep; | 284 | u64 spte, *sptep = NULL; |
285 | int direct; | 285 | int direct; |
286 | gfn_t table_gfn; | 286 | gfn_t table_gfn; |
287 | int r; | 287 | int r; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e770bf349ec4..356a0ce85c68 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -3012,6 +3012,12 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3012 | return 1; | 3012 | return 1; |
3013 | } | 3013 | } |
3014 | 3014 | ||
3015 | static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
3016 | { | ||
3017 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3018 | return 1; | ||
3019 | } | ||
3020 | |||
3015 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3021 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
3016 | { | 3022 | { |
3017 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3023 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -3198,6 +3204,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3198 | [EXIT_REASON_HLT] = handle_halt, | 3204 | [EXIT_REASON_HLT] = handle_halt, |
3199 | [EXIT_REASON_INVLPG] = handle_invlpg, | 3205 | [EXIT_REASON_INVLPG] = handle_invlpg, |
3200 | [EXIT_REASON_VMCALL] = handle_vmcall, | 3206 | [EXIT_REASON_VMCALL] = handle_vmcall, |
3207 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, | ||
3208 | [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, | ||
3209 | [EXIT_REASON_VMPTRLD] = handle_vmx_insn, | ||
3210 | [EXIT_REASON_VMPTRST] = handle_vmx_insn, | ||
3211 | [EXIT_REASON_VMREAD] = handle_vmx_insn, | ||
3212 | [EXIT_REASON_VMRESUME] = handle_vmx_insn, | ||
3213 | [EXIT_REASON_VMWRITE] = handle_vmx_insn, | ||
3214 | [EXIT_REASON_VMOFF] = handle_vmx_insn, | ||
3215 | [EXIT_REASON_VMON] = handle_vmx_insn, | ||
3201 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 3216 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
3202 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 3217 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
3203 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3218 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 249540f98513..fe5474aec41a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -898,6 +898,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
898 | case MSR_VM_HSAVE_PA: | 898 | case MSR_VM_HSAVE_PA: |
899 | case MSR_P6_EVNTSEL0: | 899 | case MSR_P6_EVNTSEL0: |
900 | case MSR_P6_EVNTSEL1: | 900 | case MSR_P6_EVNTSEL1: |
901 | case MSR_K7_EVNTSEL0: | ||
901 | data = 0; | 902 | data = 0; |
902 | break; | 903 | break; |
903 | case MSR_MTRRcap: | 904 | case MSR_MTRRcap: |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index c1b6c232e02b..616de4628d60 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -1361,7 +1361,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1361 | return 0; | 1361 | return 0; |
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1364 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
1365 | { | 1365 | { |
1366 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); | 1366 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); |
1367 | /* | 1367 | /* |
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index f4568605d7d5..ff485d361182 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c | |||
@@ -55,8 +55,10 @@ static void delay_tsc(unsigned long loops) | |||
55 | 55 | ||
56 | preempt_disable(); | 56 | preempt_disable(); |
57 | cpu = smp_processor_id(); | 57 | cpu = smp_processor_id(); |
58 | rdtsc_barrier(); | ||
58 | rdtscl(bclock); | 59 | rdtscl(bclock); |
59 | for (;;) { | 60 | for (;;) { |
61 | rdtsc_barrier(); | ||
60 | rdtscl(now); | 62 | rdtscl(now); |
61 | if ((now - bclock) >= loops) | 63 | if ((now - bclock) >= loops) |
62 | break; | 64 | break; |
@@ -78,6 +80,7 @@ static void delay_tsc(unsigned long loops) | |||
78 | if (unlikely(cpu != smp_processor_id())) { | 80 | if (unlikely(cpu != smp_processor_id())) { |
79 | loops -= (now - bclock); | 81 | loops -= (now - bclock); |
80 | cpu = smp_processor_id(); | 82 | cpu = smp_processor_id(); |
83 | rdtsc_barrier(); | ||
81 | rdtscl(bclock); | 84 | rdtscl(bclock); |
82 | } | 85 | } |
83 | } | 86 | } |
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index ec13cb5f17ed..b7c2849ffb66 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(__strnlen_user); | |||
127 | 127 | ||
128 | long strnlen_user(const char __user *s, long n) | 128 | long strnlen_user(const char __user *s, long n) |
129 | { | 129 | { |
130 | if (!access_ok(VERIFY_READ, s, n)) | 130 | if (!access_ok(VERIFY_READ, s, 1)) |
131 | return 0; | 131 | return 0; |
132 | return __strnlen_user(s, n); | 132 | return __strnlen_user(s, n); |
133 | } | 133 | } |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index baa0e86adfbc..78a5fff857be 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -952,8 +952,6 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
952 | tsk = current; | 952 | tsk = current; |
953 | mm = tsk->mm; | 953 | mm = tsk->mm; |
954 | 954 | ||
955 | prefetchw(&mm->mmap_sem); | ||
956 | |||
957 | /* Get the faulting address: */ | 955 | /* Get the faulting address: */ |
958 | address = read_cr2(); | 956 | address = read_cr2(); |
959 | 957 | ||
@@ -963,6 +961,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
963 | */ | 961 | */ |
964 | if (kmemcheck_active(regs)) | 962 | if (kmemcheck_active(regs)) |
965 | kmemcheck_hide(regs); | 963 | kmemcheck_hide(regs); |
964 | prefetchw(&mm->mmap_sem); | ||
966 | 965 | ||
967 | if (unlikely(kmmio_fault(regs, address))) | 966 | if (unlikely(kmmio_fault(regs, address))) |
968 | return; | 967 | return; |
@@ -1114,7 +1113,7 @@ good_area: | |||
1114 | * make sure we exit gracefully rather than endlessly redo | 1113 | * make sure we exit gracefully rather than endlessly redo |
1115 | * the fault: | 1114 | * the fault: |
1116 | */ | 1115 | */ |
1117 | fault = handle_mm_fault(mm, vma, address, write); | 1116 | fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); |
1118 | 1117 | ||
1119 | if (unlikely(fault & VM_FAULT_ERROR)) { | 1118 | if (unlikely(fault & VM_FAULT_ERROR)) { |
1120 | mm_fault_error(regs, error_code, address, fault); | 1119 | mm_fault_error(regs, error_code, address, fault); |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6340cef6798a..71da1bca13cb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | 14 | static inline pte_t gup_get_pte(pte_t *ptep) |
15 | { | 15 | { |
16 | #ifndef CONFIG_X86_PAE | 16 | #ifndef CONFIG_X86_PAE |
17 | return *ptep; | 17 | return ACCESS_ONCE(*ptep); |
18 | #else | 18 | #else |
19 | /* | 19 | /* |
20 | * With get_user_pages_fast, we walk down the pagetables without taking | 20 | * With get_user_pages_fast, we walk down the pagetables without taking |
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
219 | return 1; | 219 | return 1; |
220 | } | 220 | } |
221 | 221 | ||
222 | /* | ||
223 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | ||
224 | * back to the regular GUP. | ||
225 | */ | ||
226 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
227 | struct page **pages) | ||
228 | { | ||
229 | struct mm_struct *mm = current->mm; | ||
230 | unsigned long addr, len, end; | ||
231 | unsigned long next; | ||
232 | unsigned long flags; | ||
233 | pgd_t *pgdp; | ||
234 | int nr = 0; | ||
235 | |||
236 | start &= PAGE_MASK; | ||
237 | addr = start; | ||
238 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
239 | end = start + len; | ||
240 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
241 | (void __user *)start, len))) | ||
242 | return 0; | ||
243 | |||
244 | /* | ||
245 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
246 | * needs some instrumenting to determine the common sizes used by | ||
247 | * important workloads (eg. DB2), and whether limiting the batch size | ||
248 | * will decrease performance. | ||
249 | * | ||
250 | * It seems like we're in the clear for the moment. Direct-IO is | ||
251 | * the main guy that batches up lots of get_user_pages, and even | ||
252 | * they are limited to 64-at-a-time which is not so many. | ||
253 | */ | ||
254 | /* | ||
255 | * This doesn't prevent pagetable teardown, but does prevent | ||
256 | * the pagetables and pages from being freed on x86. | ||
257 | * | ||
258 | * So long as we atomically load page table pointers versus teardown | ||
259 | * (which we do on x86, with the above PAE exception), we can follow the | ||
260 | * address down to the the page and take a ref on it. | ||
261 | */ | ||
262 | local_irq_save(flags); | ||
263 | pgdp = pgd_offset(mm, addr); | ||
264 | do { | ||
265 | pgd_t pgd = *pgdp; | ||
266 | |||
267 | next = pgd_addr_end(addr, end); | ||
268 | if (pgd_none(pgd)) | ||
269 | break; | ||
270 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
271 | break; | ||
272 | } while (pgdp++, addr = next, addr != end); | ||
273 | local_irq_restore(flags); | ||
274 | |||
275 | return nr; | ||
276 | } | ||
277 | |||
222 | /** | 278 | /** |
223 | * get_user_pages_fast() - pin user pages in memory | 279 | * get_user_pages_fast() - pin user pages in memory |
224 | * @start: starting user address | 280 | * @start: starting user address |
@@ -247,11 +303,16 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
247 | start &= PAGE_MASK; | 303 | start &= PAGE_MASK; |
248 | addr = start; | 304 | addr = start; |
249 | len = (unsigned long) nr_pages << PAGE_SHIFT; | 305 | len = (unsigned long) nr_pages << PAGE_SHIFT; |
306 | |||
250 | end = start + len; | 307 | end = start + len; |
251 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | 308 | if (end < start) |
252 | (void __user *)start, len))) | ||
253 | goto slow_irqon; | 309 | goto slow_irqon; |
254 | 310 | ||
311 | #ifdef CONFIG_X86_64 | ||
312 | if (end >> __VIRTUAL_MASK_SHIFT) | ||
313 | goto slow_irqon; | ||
314 | #endif | ||
315 | |||
255 | /* | 316 | /* |
256 | * XXX: batch / limit 'nr', to avoid large irq off latency | 317 | * XXX: batch / limit 'nr', to avoid large irq off latency |
257 | * needs some instrumenting to determine the common sizes used by | 318 | * needs some instrumenting to determine the common sizes used by |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f53b57e4086f..47ce9a2ce5e7 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -177,20 +177,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, | |||
177 | return nr_range; | 177 | return nr_range; |
178 | } | 178 | } |
179 | 179 | ||
180 | #ifdef CONFIG_X86_64 | ||
181 | static void __init init_gbpages(void) | ||
182 | { | ||
183 | if (direct_gbpages && cpu_has_gbpages) | ||
184 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
185 | else | ||
186 | direct_gbpages = 0; | ||
187 | } | ||
188 | #else | ||
189 | static inline void init_gbpages(void) | ||
190 | { | ||
191 | } | ||
192 | #endif | ||
193 | |||
194 | /* | 180 | /* |
195 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | 181 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. |
196 | * This runs before bootmem is initialized and gets pages directly from | 182 | * This runs before bootmem is initialized and gets pages directly from |
@@ -210,9 +196,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
210 | 196 | ||
211 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); | 197 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); |
212 | 198 | ||
213 | if (!after_bootmem) | ||
214 | init_gbpages(); | ||
215 | |||
216 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | 199 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
217 | /* | 200 | /* |
218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 201 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9c543290a813..b177652251a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -527,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
527 | return phys_pud_init(pud, addr, end, page_size_mask); | 527 | return phys_pud_init(pud, addr, end, page_size_mask); |
528 | } | 528 | } |
529 | 529 | ||
530 | unsigned long __init | 530 | unsigned long __meminit |
531 | kernel_physical_mapping_init(unsigned long start, | 531 | kernel_physical_mapping_init(unsigned long start, |
532 | unsigned long end, | 532 | unsigned long end, |
533 | unsigned long page_size_mask) | 533 | unsigned long page_size_mask) |
@@ -598,6 +598,8 @@ void __init paging_init(void) | |||
598 | 598 | ||
599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | 599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
600 | sparse_init(); | 600 | sparse_init(); |
601 | /* clear the default setting with node 0 */ | ||
602 | nodes_clear(node_states[N_NORMAL_MEMORY]); | ||
601 | free_area_init_nodes(max_zone_pfns); | 603 | free_area_init_nodes(max_zone_pfns); |
602 | } | 604 | } |
603 | 605 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3cfe9ced8a4c..1b734d7a8966 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/debugfs.h> | 13 | #include <linux/debugfs.h> |
14 | #include <linux/pfn.h> | ||
14 | 15 | ||
15 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
16 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
@@ -681,8 +682,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); | |||
681 | static int cpa_process_alias(struct cpa_data *cpa) | 682 | static int cpa_process_alias(struct cpa_data *cpa) |
682 | { | 683 | { |
683 | struct cpa_data alias_cpa; | 684 | struct cpa_data alias_cpa; |
684 | int ret = 0; | 685 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); |
685 | unsigned long temp_cpa_vaddr, vaddr; | 686 | unsigned long vaddr, remapped; |
687 | int ret; | ||
686 | 688 | ||
687 | if (cpa->pfn >= max_pfn_mapped) | 689 | if (cpa->pfn >= max_pfn_mapped) |
688 | return 0; | 690 | return 0; |
@@ -706,42 +708,55 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
706 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { | 708 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { |
707 | 709 | ||
708 | alias_cpa = *cpa; | 710 | alias_cpa = *cpa; |
709 | temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | 711 | alias_cpa.vaddr = &laddr; |
710 | alias_cpa.vaddr = &temp_cpa_vaddr; | ||
711 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | 712 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); |
712 | 713 | ||
713 | |||
714 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | 714 | ret = __change_page_attr_set_clr(&alias_cpa, 0); |
715 | if (ret) | ||
716 | return ret; | ||
715 | } | 717 | } |
716 | 718 | ||
717 | #ifdef CONFIG_X86_64 | 719 | #ifdef CONFIG_X86_64 |
718 | if (ret) | ||
719 | return ret; | ||
720 | /* | 720 | /* |
721 | * No need to redo, when the primary call touched the high | 721 | * If the primary call didn't touch the high mapping already |
722 | * mapping already: | 722 | * and the physical address is inside the kernel map, we need |
723 | */ | ||
724 | if (within(vaddr, (unsigned long) _text, _brk_end)) | ||
725 | return 0; | ||
726 | |||
727 | /* | ||
728 | * If the physical address is inside the kernel map, we need | ||
729 | * to touch the high mapped kernel as well: | 723 | * to touch the high mapped kernel as well: |
730 | */ | 724 | */ |
731 | if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) | 725 | if (!within(vaddr, (unsigned long)_text, _brk_end) && |
732 | return 0; | 726 | within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { |
727 | unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + | ||
728 | __START_KERNEL_map - phys_base; | ||
729 | alias_cpa = *cpa; | ||
730 | alias_cpa.vaddr = &temp_cpa_vaddr; | ||
731 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
733 | 732 | ||
734 | alias_cpa = *cpa; | 733 | /* |
735 | temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; | 734 | * The high mapping range is imprecise, so ignore the |
736 | alias_cpa.vaddr = &temp_cpa_vaddr; | 735 | * return value. |
737 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | 736 | */ |
737 | __change_page_attr_set_clr(&alias_cpa, 0); | ||
738 | } | ||
739 | #endif | ||
738 | 740 | ||
739 | /* | 741 | /* |
740 | * The high mapping range is imprecise, so ignore the return value. | 742 | * If the PMD page was partially used for per-cpu remapping, |
743 | * the recycled area needs to be split and modified. Because | ||
744 | * the area is always proper subset of a PMD page | ||
745 | * cpa->numpages is guaranteed to be 1 for these areas, so | ||
746 | * there's no need to loop over and check for further remaps. | ||
741 | */ | 747 | */ |
742 | __change_page_attr_set_clr(&alias_cpa, 0); | 748 | remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); |
743 | #endif | 749 | if (remapped) { |
744 | return ret; | 750 | WARN_ON(cpa->numpages > 1); |
751 | alias_cpa = *cpa; | ||
752 | alias_cpa.vaddr = &remapped; | ||
753 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
754 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | ||
755 | if (ret) | ||
756 | return ret; | ||
757 | } | ||
758 | |||
759 | return 0; | ||
745 | } | 760 | } |
746 | 761 | ||
747 | static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | 762 | static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index c0ecf250fe51..b26626dc517c 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -38,15 +38,26 @@ count_resource(struct acpi_resource *acpi_res, void *data) | |||
38 | struct acpi_resource_address64 addr; | 38 | struct acpi_resource_address64 addr; |
39 | acpi_status status; | 39 | acpi_status status; |
40 | 40 | ||
41 | if (info->res_num >= PCI_BUS_NUM_RESOURCES) | ||
42 | return AE_OK; | ||
43 | |||
44 | status = resource_to_addr(acpi_res, &addr); | 41 | status = resource_to_addr(acpi_res, &addr); |
45 | if (ACPI_SUCCESS(status)) | 42 | if (ACPI_SUCCESS(status)) |
46 | info->res_num++; | 43 | info->res_num++; |
47 | return AE_OK; | 44 | return AE_OK; |
48 | } | 45 | } |
49 | 46 | ||
47 | static int | ||
48 | bus_has_transparent_bridge(struct pci_bus *bus) | ||
49 | { | ||
50 | struct pci_dev *dev; | ||
51 | |||
52 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
53 | u16 class = dev->class >> 8; | ||
54 | |||
55 | if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) | ||
56 | return true; | ||
57 | } | ||
58 | return false; | ||
59 | } | ||
60 | |||
50 | static acpi_status | 61 | static acpi_status |
51 | setup_resource(struct acpi_resource *acpi_res, void *data) | 62 | setup_resource(struct acpi_resource *acpi_res, void *data) |
52 | { | 63 | { |
@@ -56,9 +67,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
56 | acpi_status status; | 67 | acpi_status status; |
57 | unsigned long flags; | 68 | unsigned long flags; |
58 | struct resource *root; | 69 | struct resource *root; |
59 | 70 | int max_root_bus_resources = PCI_BUS_NUM_RESOURCES; | |
60 | if (info->res_num >= PCI_BUS_NUM_RESOURCES) | ||
61 | return AE_OK; | ||
62 | 71 | ||
63 | status = resource_to_addr(acpi_res, &addr); | 72 | status = resource_to_addr(acpi_res, &addr); |
64 | if (!ACPI_SUCCESS(status)) | 73 | if (!ACPI_SUCCESS(status)) |
@@ -82,6 +91,18 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
82 | res->end = res->start + addr.address_length - 1; | 91 | res->end = res->start + addr.address_length - 1; |
83 | res->child = NULL; | 92 | res->child = NULL; |
84 | 93 | ||
94 | if (bus_has_transparent_bridge(info->bus)) | ||
95 | max_root_bus_resources -= 3; | ||
96 | if (info->res_num >= max_root_bus_resources) { | ||
97 | printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " | ||
98 | "from %s for %s due to _CRS returning more than " | ||
99 | "%d resource descriptors\n", (unsigned long) res->start, | ||
100 | (unsigned long) res->end, root->name, info->name, | ||
101 | max_root_bus_resources); | ||
102 | info->res_num++; | ||
103 | return AE_OK; | ||
104 | } | ||
105 | |||
85 | if (insert_resource(root, res)) { | 106 | if (insert_resource(root, res)) { |
86 | printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " | 107 | printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " |
87 | "from %s for %s\n", (unsigned long) res->start, | 108 | "from %s for %s\n", (unsigned long) res->start, |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8766b0e216c5..712443ec6d43 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -523,6 +523,69 @@ reject: | |||
523 | 523 | ||
524 | static int __initdata known_bridge; | 524 | static int __initdata known_bridge; |
525 | 525 | ||
526 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
527 | |||
528 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ | ||
529 | struct acpi_mcfg_allocation *pci_mmcfg_config; | ||
530 | int pci_mmcfg_config_num; | ||
531 | |||
532 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
533 | { | ||
534 | if (!strcmp(mcfg->header.oem_id, "SGI")) | ||
535 | acpi_mcfg_64bit_base_addr = TRUE; | ||
536 | |||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | static int __init pci_parse_mcfg(struct acpi_table_header *header) | ||
541 | { | ||
542 | struct acpi_table_mcfg *mcfg; | ||
543 | unsigned long i; | ||
544 | int config_size; | ||
545 | |||
546 | if (!header) | ||
547 | return -EINVAL; | ||
548 | |||
549 | mcfg = (struct acpi_table_mcfg *)header; | ||
550 | |||
551 | /* how many config structures do we have */ | ||
552 | pci_mmcfg_config_num = 0; | ||
553 | i = header->length - sizeof(struct acpi_table_mcfg); | ||
554 | while (i >= sizeof(struct acpi_mcfg_allocation)) { | ||
555 | ++pci_mmcfg_config_num; | ||
556 | i -= sizeof(struct acpi_mcfg_allocation); | ||
557 | }; | ||
558 | if (pci_mmcfg_config_num == 0) { | ||
559 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); | ||
560 | return -ENODEV; | ||
561 | } | ||
562 | |||
563 | config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); | ||
564 | pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); | ||
565 | if (!pci_mmcfg_config) { | ||
566 | printk(KERN_WARNING PREFIX | ||
567 | "No memory for MCFG config tables\n"); | ||
568 | return -ENOMEM; | ||
569 | } | ||
570 | |||
571 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | ||
572 | |||
573 | acpi_mcfg_oem_check(mcfg); | ||
574 | |||
575 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | ||
576 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && | ||
577 | !acpi_mcfg_64bit_base_addr) { | ||
578 | printk(KERN_ERR PREFIX | ||
579 | "MMCONFIG not in low 4GB of memory\n"); | ||
580 | kfree(pci_mmcfg_config); | ||
581 | pci_mmcfg_config_num = 0; | ||
582 | return -ENODEV; | ||
583 | } | ||
584 | } | ||
585 | |||
586 | return 0; | ||
587 | } | ||
588 | |||
526 | static void __init __pci_mmcfg_init(int early) | 589 | static void __init __pci_mmcfg_init(int early) |
527 | { | 590 | { |
528 | /* MMCONFIG disabled */ | 591 | /* MMCONFIG disabled */ |
@@ -543,7 +606,7 @@ static void __init __pci_mmcfg_init(int early) | |||
543 | } | 606 | } |
544 | 607 | ||
545 | if (!known_bridge) | 608 | if (!known_bridge) |
546 | acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); | 609 | acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); |
547 | 610 | ||
548 | pci_mmcfg_reject_broken(early); | 611 | pci_mmcfg_reject_broken(early); |
549 | 612 | ||
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d277ef1eea51..b3d20b9cac63 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -244,7 +244,7 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
244 | do_fpu_end(); | 244 | do_fpu_end(); |
245 | mtrr_ap_init(); | 245 | mtrr_ap_init(); |
246 | 246 | ||
247 | #ifdef CONFIG_X86_32 | 247 | #ifdef CONFIG_X86_OLD_MCE |
248 | mcheck_init(&boot_cpu_data); | 248 | mcheck_init(&boot_cpu_data); |
249 | #endif | 249 | #endif |
250 | } | 250 | } |