aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-07-04 05:00:38 -0400
committerIngo Molnar <mingo@elte.hu>2009-07-04 05:00:42 -0400
commitd7e57676e3ed7ab9b2c7c4bcb7873e51eacbdb84 (patch)
treef7433f38cd407a0c35a8cbf2b7e3fd756087bce7 /arch/x86
parentfeaa0457ec8351cae855edc9a3052ac49322538e (diff)
parent746a99a5af60ee676afa2ba469ccd1373493c7e7 (diff)
Merge branch 'linus' into x86/cleanups
Merge reason: We were on an older pre-rc1 base, move to almost-rc2. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig16
-rw-r--r--arch/x86/boot/bioscall.S2
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c4
-rw-r--r--arch/x86/crypto/fpu.c4
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/atomic_32.h3
-rw-r--r--arch/x86/include/asm/boot.h6
-rw-r--r--arch/x86/include/asm/desc.h26
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/mce.h63
-rw-r--r--arch/x86/include/asm/msr.h7
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pci_x86.h3
-rw-r--r--arch/x86/include/asm/percpu.h10
-rw-r--r--arch/x86/include/asm/perf_counter.h8
-rw-r--r--arch/x86/include/asm/pgtable_32.h8
-rw-r--r--arch/x86/include/asm/pgtable_64.h5
-rw-r--r--arch/x86/include/asm/proto.h11
-rw-r--r--arch/x86/include/asm/therm_throt.h9
-rw-r--r--arch/x86/include/asm/timer.h6
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c80
-rw-r--r--arch/x86/kernel/acpi/cstate.c16
-rw-r--r--arch/x86/kernel/acpi/processor.c13
-rw-r--r--arch/x86/kernel/amd_iommu.c16
-rw-r--r--arch/x86/kernel/amd_iommu_init.c26
-rw-r--r--arch/x86/kernel/apic/io_apic.c15
-rw-r--r--arch/x86/kernel/apic/probe_32.c11
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile9
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c237
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h38
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd_64.c)0
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c250
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c248
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c106
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c3
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c158
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c12
-rw-r--r--arch/x86/kernel/crash.c6
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/efi.c31
-rw-r--r--arch/x86/kernel/entry_32.S66
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/ftrace.c6
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/pci-swiotlb.c3
-rw-r--r--arch/x86/kernel/setup.c16
-rw-r--r--arch/x86/kernel/setup_percpu.c219
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/tsc.c8
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c15
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--arch/x86/kvm/x86_emulate.c2
-rw-r--r--arch/x86/lib/delay.c3
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/gup.c67
-rw-r--r--arch/x86/mm/init.c17
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/pageattr.c65
-rw-r--r--arch/x86/pci/acpi.c33
-rw-r--r--arch/x86/pci/mmconfig-shared.c65
-rw-r--r--arch/x86/power/cpu.c2
81 files changed, 1286 insertions, 929 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 73c0bda73fcd..c07f72205909 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
34 select HAVE_DYNAMIC_FTRACE 34 select HAVE_DYNAMIC_FTRACE
35 select HAVE_FUNCTION_TRACER 35 select HAVE_FUNCTION_TRACER
36 select HAVE_FUNCTION_GRAPH_TRACER 36 select HAVE_FUNCTION_GRAPH_TRACER
37 select HAVE_FUNCTION_GRAPH_FP_TEST
37 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 38 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
38 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE 39 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
39 select HAVE_FTRACE_SYSCALLS 40 select HAVE_FTRACE_SYSCALLS
@@ -1912,25 +1913,14 @@ config DMAR_DEFAULT_ON
1912 recommended you say N here while the DMAR code remains 1913 recommended you say N here while the DMAR code remains
1913 experimental. 1914 experimental.
1914 1915
1915config DMAR_GFX_WA
1916 def_bool y
1917 prompt "Support for Graphics workaround"
1918 depends on DMAR
1919 ---help---
1920 Current Graphics drivers tend to use physical address
1921 for DMA and avoid using DMA APIs. Setting this config
1922 option permits the IOMMU driver to set a unity map for
1923 all the OS-visible memory. Hence the driver can continue
1924 to use physical addresses for DMA.
1925
1926config DMAR_FLOPPY_WA 1916config DMAR_FLOPPY_WA
1927 def_bool y 1917 def_bool y
1928 depends on DMAR 1918 depends on DMAR
1929 ---help--- 1919 ---help---
1930 Floppy disk drivers are know to bypass DMA API calls 1920 Floppy disk drivers are known to bypass DMA API calls
1931 thereby failing to work when IOMMU is enabled. This 1921 thereby failing to work when IOMMU is enabled. This
1932 workaround will setup a 1:1 mapping for the first 1922 workaround will setup a 1:1 mapping for the first
1933 16M to make floppy (an ISA device) work. 1923 16MiB to make floppy (an ISA device) work.
1934 1924
1935config INTR_REMAP 1925config INTR_REMAP
1936 bool "Support for Interrupt Remapping (EXPERIMENTAL)" 1926 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index 507793739ea5..1dfbf64e52a2 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -13,7 +13,7 @@
13 * touching registers they shouldn't be. 13 * touching registers they shouldn't be.
14 */ 14 */
15 15
16 .code16 16 .code16gcc
17 .text 17 .text
18 .globl intcall 18 .globl intcall
19 .type intcall, @function 19 .type intcall, @function
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index caba99601703..eb0566e83319 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -845,7 +845,7 @@ ENTRY(aesni_cbc_enc)
845 */ 845 */
846ENTRY(aesni_cbc_dec) 846ENTRY(aesni_cbc_dec)
847 cmp $16, LEN 847 cmp $16, LEN
848 jb .Lcbc_dec_ret 848 jb .Lcbc_dec_just_ret
849 mov 480(KEYP), KLEN 849 mov 480(KEYP), KLEN
850 add $240, KEYP 850 add $240, KEYP
851 movups (IVP), IV 851 movups (IVP), IV
@@ -891,6 +891,7 @@ ENTRY(aesni_cbc_dec)
891 add $16, OUTP 891 add $16, OUTP
892 cmp $16, LEN 892 cmp $16, LEN
893 jge .Lcbc_dec_loop1 893 jge .Lcbc_dec_loop1
894 movups IV, (IVP)
895.Lcbc_dec_ret: 894.Lcbc_dec_ret:
895 movups IV, (IVP)
896.Lcbc_dec_just_ret:
896 ret 897 ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 4e663398f77f..c580c5ec1cad 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -198,6 +198,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
198 198
199 blkcipher_walk_init(&walk, dst, src, nbytes); 199 blkcipher_walk_init(&walk, dst, src, nbytes);
200 err = blkcipher_walk_virt(desc, &walk); 200 err = blkcipher_walk_virt(desc, &walk);
201 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
201 202
202 kernel_fpu_begin(); 203 kernel_fpu_begin();
203 while ((nbytes = walk.nbytes)) { 204 while ((nbytes = walk.nbytes)) {
@@ -221,6 +222,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
221 222
222 blkcipher_walk_init(&walk, dst, src, nbytes); 223 blkcipher_walk_init(&walk, dst, src, nbytes);
223 err = blkcipher_walk_virt(desc, &walk); 224 err = blkcipher_walk_virt(desc, &walk);
225 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
224 226
225 kernel_fpu_begin(); 227 kernel_fpu_begin();
226 while ((nbytes = walk.nbytes)) { 228 while ((nbytes = walk.nbytes)) {
@@ -266,6 +268,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
266 268
267 blkcipher_walk_init(&walk, dst, src, nbytes); 269 blkcipher_walk_init(&walk, dst, src, nbytes);
268 err = blkcipher_walk_virt(desc, &walk); 270 err = blkcipher_walk_virt(desc, &walk);
271 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
269 272
270 kernel_fpu_begin(); 273 kernel_fpu_begin();
271 while ((nbytes = walk.nbytes)) { 274 while ((nbytes = walk.nbytes)) {
@@ -289,6 +292,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
289 292
290 blkcipher_walk_init(&walk, dst, src, nbytes); 293 blkcipher_walk_init(&walk, dst, src, nbytes);
291 err = blkcipher_walk_virt(desc, &walk); 294 err = blkcipher_walk_virt(desc, &walk);
295 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
292 296
293 kernel_fpu_begin(); 297 kernel_fpu_begin();
294 while ((nbytes = walk.nbytes)) { 298 while ((nbytes = walk.nbytes)) {
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index 5f9781a3815f..daef6cd2b45d 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -48,7 +48,7 @@ static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
48 struct blkcipher_desc desc = { 48 struct blkcipher_desc desc = {
49 .tfm = child, 49 .tfm = child,
50 .info = desc_in->info, 50 .info = desc_in->info,
51 .flags = desc_in->flags, 51 .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
52 }; 52 };
53 53
54 kernel_fpu_begin(); 54 kernel_fpu_begin();
@@ -67,7 +67,7 @@ static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
67 struct blkcipher_desc desc = { 67 struct blkcipher_desc desc = {
68 .tfm = child, 68 .tfm = child,
69 .info = desc_in->info, 69 .info = desc_in->info,
70 .flags = desc_in->flags, 70 .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
71 }; 71 };
72 72
73 kernel_fpu_begin(); 73 kernel_fpu_begin();
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 4518dc500903..20d1465a2ab0 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -144,6 +144,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
144 144
145#else /* !CONFIG_ACPI */ 145#else /* !CONFIG_ACPI */
146 146
147#define acpi_disabled 1
147#define acpi_lapic 0 148#define acpi_lapic 0
148#define acpi_ioapic 0 149#define acpi_ioapic 0
149static inline void acpi_noirq_set(void) { } 150static inline void acpi_noirq_set(void) { }
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 262e02820049..bdf96f119f06 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -29,9 +29,11 @@ extern void amd_iommu_detect(void);
29extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 29extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
30extern void amd_iommu_flush_all_domains(void); 30extern void amd_iommu_flush_all_domains(void);
31extern void amd_iommu_flush_all_devices(void); 31extern void amd_iommu_flush_all_devices(void);
32extern void amd_iommu_shutdown(void);
32#else 33#else
33static inline int amd_iommu_init(void) { return -ENODEV; } 34static inline int amd_iommu_init(void) { return -ENODEV; }
34static inline void amd_iommu_detect(void) { } 35static inline void amd_iommu_detect(void) { }
36static inline void amd_iommu_shutdown(void) { }
35#endif 37#endif
36 38
37#endif /* _ASM_X86_AMD_IOMMU_H */ 39#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 8cb9c814e120..2503d4e64c2a 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -257,7 +257,7 @@ typedef struct {
257 257
258/** 258/**
259 * atomic64_read - read atomic64 variable 259 * atomic64_read - read atomic64 variable
260 * @v: pointer of type atomic64_t 260 * @ptr: pointer of type atomic64_t
261 * 261 *
262 * Atomically reads the value of @v. 262 * Atomically reads the value of @v.
263 * Doesn't imply a read memory barrier. 263 * Doesn't imply a read memory barrier.
@@ -294,7 +294,6 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
294 * atomic64_xchg - xchg atomic64 variable 294 * atomic64_xchg - xchg atomic64 variable
295 * @ptr: pointer to type atomic64_t 295 * @ptr: pointer to type atomic64_t
296 * @new_val: value to assign 296 * @new_val: value to assign
297 * @old_val: old value that was there
298 * 297 *
299 * Atomically xchgs the value of @ptr to @new_val and returns 298 * Atomically xchgs the value of @ptr to @new_val and returns
300 * the old value. 299 * the old value.
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 418e632d4a80..7a1065958ba9 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -8,7 +8,7 @@
8 8
9#ifdef __KERNEL__ 9#ifdef __KERNEL__
10 10
11#include <asm/page_types.h> 11#include <asm/pgtable_types.h>
12 12
13/* Physical address where kernel should be loaded. */ 13/* Physical address where kernel should be loaded. */
14#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ 14#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
@@ -16,10 +16,10 @@
16 & ~(CONFIG_PHYSICAL_ALIGN - 1)) 16 & ~(CONFIG_PHYSICAL_ALIGN - 1))
17 17
18/* Minimum kernel alignment, as a power of two */ 18/* Minimum kernel alignment, as a power of two */
19#ifdef CONFIG_x86_64 19#ifdef CONFIG_X86_64
20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT 20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
21#else 21#else
22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) 22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER)
23#endif 23#endif
24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) 24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
25 25
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index c45f415ce315..c993e9e0fed4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -1,7 +1,6 @@
1#ifndef _ASM_X86_DESC_H 1#ifndef _ASM_X86_DESC_H
2#define _ASM_X86_DESC_H 2#define _ASM_X86_DESC_H
3 3
4#ifndef __ASSEMBLY__
5#include <asm/desc_defs.h> 4#include <asm/desc_defs.h>
6#include <asm/ldt.h> 5#include <asm/ldt.h>
7#include <asm/mmu.h> 6#include <asm/mmu.h>
@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
380 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); 379 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
381} 380}
382 381
383#else
384/*
385 * GET_DESC_BASE reads the descriptor base of the specified segment.
386 *
387 * Args:
388 * idx - descriptor index
389 * gdt - GDT pointer
390 * base - 32bit register to which the base will be written
391 * lo_w - lo word of the "base" register
392 * lo_b - lo byte of the "base" register
393 * hi_b - hi byte of the low word of the "base" register
394 *
395 * Example:
396 * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
397 * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
398 */
399#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
400 movb idx * 8 + 4(gdt), lo_b; \
401 movb idx * 8 + 7(gdt), hi_b; \
402 shll $16, base; \
403 movw idx * 8 + 2(gdt), lo_w;
404
405
406#endif /* __ASSEMBLY__ */
407
408#endif /* _ASM_X86_DESC_H */ 382#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index af326a2975b5..fd6d21bbee6c 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,6 +6,7 @@ extern void no_iommu_init(void);
6extern struct dma_map_ops nommu_dma_ops; 6extern struct dma_map_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9extern int iommu_pass_through;
9 10
10/* 10 seconds */ 11/* 10 seconds */
11#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) 12#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 540a466e50f5..5cdd8d100ec9 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -102,15 +102,39 @@ struct mce_log {
102 102
103#ifdef __KERNEL__ 103#ifdef __KERNEL__
104 104
105#include <linux/percpu.h>
106#include <linux/init.h>
107#include <asm/atomic.h>
108
105extern int mce_disabled; 109extern int mce_disabled;
110extern int mce_p5_enabled;
106 111
107#include <asm/atomic.h> 112#ifdef CONFIG_X86_MCE
108#include <linux/percpu.h> 113void mcheck_init(struct cpuinfo_x86 *c);
114#else
115static inline void mcheck_init(struct cpuinfo_x86 *c) {}
116#endif
117
118#ifdef CONFIG_X86_OLD_MCE
119extern int nr_mce_banks;
120void amd_mcheck_init(struct cpuinfo_x86 *c);
121void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
122void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
123#endif
124
125#ifdef CONFIG_X86_ANCIENT_MCE
126void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
127void winchip_mcheck_init(struct cpuinfo_x86 *c);
128static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
129#else
130static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
131static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
132static inline void enable_p5_mce(void) {}
133#endif
109 134
110void mce_setup(struct mce *m); 135void mce_setup(struct mce *m);
111void mce_log(struct mce *m); 136void mce_log(struct mce *m);
112DECLARE_PER_CPU(struct sys_device, mce_dev); 137DECLARE_PER_CPU(struct sys_device, mce_dev);
113extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
114 138
115/* 139/*
116 * To support more than 128 would need to escape the predefined 140 * To support more than 128 would need to escape the predefined
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
145DECLARE_PER_CPU(unsigned, mce_exception_count); 169DECLARE_PER_CPU(unsigned, mce_exception_count);
146DECLARE_PER_CPU(unsigned, mce_poll_count); 170DECLARE_PER_CPU(unsigned, mce_poll_count);
147 171
148void mce_log_therm_throt_event(__u64 status);
149
150extern atomic_t mce_entry; 172extern atomic_t mce_entry;
151 173
152void do_machine_check(struct pt_regs *, long);
153
154typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); 174typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
155DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); 175DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
156 176
@@ -167,13 +187,32 @@ void mce_notify_process(void);
167DECLARE_PER_CPU(struct mce, injectm); 187DECLARE_PER_CPU(struct mce, injectm);
168extern struct file_operations mce_chrdev_ops; 188extern struct file_operations mce_chrdev_ops;
169 189
170#ifdef CONFIG_X86_MCE 190/*
171void mcheck_init(struct cpuinfo_x86 *c); 191 * Exception handler
172#else 192 */
173#define mcheck_init(c) do { } while (0) 193
174#endif 194/* Call the installed machine check handler for this CPU setup. */
195extern void (*machine_check_vector)(struct pt_regs *, long error_code);
196void do_machine_check(struct pt_regs *, long);
197
198/*
199 * Threshold handler
200 */
175 201
176extern void (*mce_threshold_vector)(void); 202extern void (*mce_threshold_vector)(void);
203extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
204
205/*
206 * Thermal handler
207 */
208
209void intel_init_thermal(struct cpuinfo_x86 *c);
210
211#ifdef CONFIG_X86_NEW_MCE
212void mce_log_therm_throt_event(__u64 status);
213#else
214static inline void mce_log_therm_throt_event(__u64 status) {}
215#endif
177 216
178#endif /* __KERNEL__ */ 217#endif /* __KERNEL__ */
179#endif /* _ASM_X86_MCE_H */ 218#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 22603764e7db..48ad9d29484a 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -3,13 +3,10 @@
3 3
4#include <asm/msr-index.h> 4#include <asm/msr-index.h>
5 5
6#ifndef __ASSEMBLY__
7# include <linux/types.h>
8#endif
9
10#ifdef __KERNEL__ 6#ifdef __KERNEL__
11#ifndef __ASSEMBLY__ 7#ifndef __ASSEMBLY__
12 8
9#include <linux/types.h>
13#include <asm/asm.h> 10#include <asm/asm.h>
14#include <asm/errno.h> 11#include <asm/errno.h>
15#include <asm/cpumask.h> 12#include <asm/cpumask.h>
@@ -264,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
264#endif /* CONFIG_SMP */ 261#endif /* CONFIG_SMP */
265#endif /* __ASSEMBLY__ */ 262#endif /* __ASSEMBLY__ */
266#endif /* __KERNEL__ */ 263#endif /* __KERNEL__ */
267
268
269#endif /* _ASM_X86_MSR_H */ 264#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8d382d3abf38..7639dbf5d223 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -41,7 +41,7 @@
41 41
42/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ 42/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
43#define __PHYSICAL_MASK_SHIFT 46 43#define __PHYSICAL_MASK_SHIFT 46
44#define __VIRTUAL_MASK_SHIFT 48 44#define __VIRTUAL_MASK_SHIFT 47
45 45
46/* 46/*
47 * Kernel image size is limited to 512 MB (see level2_kernel_pgt in 47 * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index b51a1e8b0baf..1ff685ca221c 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -91,7 +91,7 @@ extern void pci_iommu_alloc(void);
91 91
92#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) 92#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
93 93
94#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) 94#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG)
95 95
96#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ 96#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
97 dma_addr_t ADDR_NAME; 97 dma_addr_t ADDR_NAME;
@@ -130,6 +130,7 @@ extern void pci_iommu_alloc(void);
130 130
131/* generic pci stuff */ 131/* generic pci stuff */
132#include <asm-generic/pci.h> 132#include <asm-generic/pci.h>
133#define PCIBIOS_MAX_MEM_32 0xffffffff
133 134
134#ifdef CONFIG_NUMA 135#ifdef CONFIG_NUMA
135/* Returns the node based on pci bus */ 136/* Returns the node based on pci bus */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index e60fd3e14bdf..b399988eee3a 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -121,6 +121,9 @@ extern int __init pcibios_init(void);
121extern int __init pci_mmcfg_arch_init(void); 121extern int __init pci_mmcfg_arch_init(void);
122extern void __init pci_mmcfg_arch_free(void); 122extern void __init pci_mmcfg_arch_free(void);
123 123
124extern struct acpi_mcfg_allocation *pci_mmcfg_config;
125extern int pci_mmcfg_config_num;
126
124/* 127/*
125 * AMD Fam10h CPUs are buggy, and cannot access MMIO config space 128 * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
126 * on their northbrige except through the * %eax register. As such, you MUST 129 * on their northbrige except through the * %eax register. As such, you MUST
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 02ecb30982a3..103f1ddb0d85 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -42,6 +42,7 @@
42 42
43#else /* ...!ASSEMBLY */ 43#else /* ...!ASSEMBLY */
44 44
45#include <linux/kernel.h>
45#include <linux/stringify.h> 46#include <linux/stringify.h>
46 47
47#ifdef CONFIG_SMP 48#ifdef CONFIG_SMP
@@ -155,6 +156,15 @@ do { \
155/* We can use this directly for local CPU (faster). */ 156/* We can use this directly for local CPU (faster). */
156DECLARE_PER_CPU(unsigned long, this_cpu_off); 157DECLARE_PER_CPU(unsigned long, this_cpu_off);
157 158
159#ifdef CONFIG_NEED_MULTIPLE_NODES
160void *pcpu_lpage_remapped(void *kaddr);
161#else
162static inline void *pcpu_lpage_remapped(void *kaddr)
163{
164 return NULL;
165}
166#endif
167
158#endif /* !__ASSEMBLY__ */ 168#endif /* !__ASSEMBLY__ */
159 169
160#ifdef CONFIG_SMP 170#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 876ed97147b3..fa64e401589d 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,14 +84,12 @@ union cpuid10_edx {
84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 84#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) 85#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
86 86
87extern void set_perf_counter_pending(void);
88
89#define clear_perf_counter_pending() do { } while (0)
90#define test_perf_counter_pending() (0)
91
92#ifdef CONFIG_PERF_COUNTERS 87#ifdef CONFIG_PERF_COUNTERS
93extern void init_hw_perf_counters(void); 88extern void init_hw_perf_counters(void);
94extern void perf_counters_lapic_init(void); 89extern void perf_counters_lapic_init(void);
90
91#define PERF_COUNTER_INDEX_OFFSET 0
92
95#else 93#else
96static inline void init_hw_perf_counters(void) { } 94static inline void init_hw_perf_counters(void) { }
97static inline void perf_counters_lapic_init(void) { } 95static inline void perf_counters_lapic_init(void) { }
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 31bd120cf2a2..01fd9461d323 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
49#endif 49#endif
50 50
51#if defined(CONFIG_HIGHPTE) 51#if defined(CONFIG_HIGHPTE)
52#define __KM_PTE \
53 (in_nmi() ? KM_NMI_PTE : \
54 in_irq() ? KM_IRQ_PTE : \
55 KM_PTE0)
52#define pte_offset_map(dir, address) \ 56#define pte_offset_map(dir, address) \
53 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ 57 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
54 pte_index((address))) 58 pte_index((address)))
55#define pte_offset_map_nested(dir, address) \ 59#define pte_offset_map_nested(dir, address) \
56 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ 60 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
57 pte_index((address))) 61 pte_index((address)))
58#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) 62#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
59#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) 63#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
60#else 64#else
61#define pte_offset_map(dir, address) \ 65#define pte_offset_map(dir, address) \
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index abde308fdb0f..c57a30117149 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -165,10 +165,7 @@ extern void cleanup_highmap(void);
165 165
166/* fs/proc/kcore.c */ 166/* fs/proc/kcore.c */
167#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) 167#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
168#define kc_offset_to_vaddr(o) \ 168#define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK)
169 (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \
170 ? ((o) | ~__VIRTUAL_MASK) \
171 : (o))
172 169
173#define __HAVE_ARCH_PTE_SAME 170#define __HAVE_ARCH_PTE_SAME
174#endif /* !__ASSEMBLY__ */ 171#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 49fb3ecf3bb3..621f56d73121 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -22,7 +22,14 @@ extern int reboot_force;
22 22
23long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); 23long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
24 24
25#define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) 25/*
26#define round_down(x, y) ((x) & ~((y) - 1)) 26 * This looks more complex than it should be. But we need to
27 * get the type for the ~ right in round_down (it needs to be
28 * as wide as the result!), and we want to evaluate the macro
29 * arguments just once each.
30 */
31#define __round_mask(x,y) ((__typeof__(x))((y)-1))
32#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
33#define round_down(x,y) ((x) & ~__round_mask(x,y))
27 34
28#endif /* _ASM_X86_PROTO_H */ 35#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h
deleted file mode 100644
index c62349ee7860..000000000000
--- a/arch/x86/include/asm/therm_throt.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _ASM_X86_THERM_THROT_H
2#define _ASM_X86_THERM_THROT_H
3
4#include <asm/atomic.h>
5
6extern atomic_t therm_throt_en;
7int therm_throt_process(int curr);
8
9#endif /* _ASM_X86_THERM_THROT_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index bd37ed444a21..20ca9c4d4686 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -45,12 +45,16 @@ extern int no_timer_check;
45 */ 45 */
46 46
47DECLARE_PER_CPU(unsigned long, cyc2ns); 47DECLARE_PER_CPU(unsigned long, cyc2ns);
48DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
48 49
49#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ 50#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
50 51
51static inline unsigned long long __cycles_2_ns(unsigned long long cyc) 52static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
52{ 53{
53 return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; 54 int cpu = smp_processor_id();
55 unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
56 ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR;
57 return ns;
54} 58}
55 59
56static inline unsigned long long cycles_2_ns(unsigned long long cyc) 60static inline unsigned long long cycles_2_ns(unsigned long long cyc)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index b685ece89d5c..20e6a795e160 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,7 @@
25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) 25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
26 26
27#define KERNEL_DS MAKE_MM_SEG(-1UL) 27#define KERNEL_DS MAKE_MM_SEG(-1UL)
28#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) 28#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
29 29
30#define get_ds() (KERNEL_DS) 30#define get_ds() (KERNEL_DS)
31#define get_fs() (current_thread_info()->addr_limit) 31#define get_fs() (current_thread_info()->addr_limit)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 631086159c53..6b8ca3a0285d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -44,11 +44,7 @@
44 44
45static int __initdata acpi_force = 0; 45static int __initdata acpi_force = 0;
46u32 acpi_rsdt_forced; 46u32 acpi_rsdt_forced;
47#ifdef CONFIG_ACPI 47int acpi_disabled;
48int acpi_disabled = 0;
49#else
50int acpi_disabled = 1;
51#endif
52EXPORT_SYMBOL(acpi_disabled); 48EXPORT_SYMBOL(acpi_disabled);
53 49
54#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
@@ -122,72 +118,6 @@ void __init __acpi_unmap_table(char *map, unsigned long size)
122 early_iounmap(map, size); 118 early_iounmap(map, size);
123} 119}
124 120
125#ifdef CONFIG_PCI_MMCONFIG
126
127static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
128
129/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
130struct acpi_mcfg_allocation *pci_mmcfg_config;
131int pci_mmcfg_config_num;
132
133static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
134{
135 if (!strcmp(mcfg->header.oem_id, "SGI"))
136 acpi_mcfg_64bit_base_addr = TRUE;
137
138 return 0;
139}
140
141int __init acpi_parse_mcfg(struct acpi_table_header *header)
142{
143 struct acpi_table_mcfg *mcfg;
144 unsigned long i;
145 int config_size;
146
147 if (!header)
148 return -EINVAL;
149
150 mcfg = (struct acpi_table_mcfg *)header;
151
152 /* how many config structures do we have */
153 pci_mmcfg_config_num = 0;
154 i = header->length - sizeof(struct acpi_table_mcfg);
155 while (i >= sizeof(struct acpi_mcfg_allocation)) {
156 ++pci_mmcfg_config_num;
157 i -= sizeof(struct acpi_mcfg_allocation);
158 };
159 if (pci_mmcfg_config_num == 0) {
160 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
161 return -ENODEV;
162 }
163
164 config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
165 pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
166 if (!pci_mmcfg_config) {
167 printk(KERN_WARNING PREFIX
168 "No memory for MCFG config tables\n");
169 return -ENOMEM;
170 }
171
172 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
173
174 acpi_mcfg_oem_check(mcfg);
175
176 for (i = 0; i < pci_mmcfg_config_num; ++i) {
177 if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
178 !acpi_mcfg_64bit_base_addr) {
179 printk(KERN_ERR PREFIX
180 "MMCONFIG not in low 4GB of memory\n");
181 kfree(pci_mmcfg_config);
182 pci_mmcfg_config_num = 0;
183 return -ENODEV;
184 }
185 }
186
187 return 0;
188}
189#endif /* CONFIG_PCI_MMCONFIG */
190
191#ifdef CONFIG_X86_LOCAL_APIC 121#ifdef CONFIG_X86_LOCAL_APIC
192static int __init acpi_parse_madt(struct acpi_table_header *table) 122static int __init acpi_parse_madt(struct acpi_table_header *table)
193{ 123{
@@ -1519,14 +1449,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1519 }, 1449 },
1520 { 1450 {
1521 .callback = force_acpi_ht, 1451 .callback = force_acpi_ht,
1522 .ident = "ASUS P4B266",
1523 .matches = {
1524 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1525 DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
1526 },
1527 },
1528 {
1529 .callback = force_acpi_ht,
1530 .ident = "ASUS P2B-DS", 1452 .ident = "ASUS P2B-DS",
1531 .matches = { 1453 .matches = {
1532 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), 1454 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index bbbe4bbb6f34..8c44c232efcb 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -34,12 +34,22 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
34 flags->bm_check = 1; 34 flags->bm_check = 1;
35 else if (c->x86_vendor == X86_VENDOR_INTEL) { 35 else if (c->x86_vendor == X86_VENDOR_INTEL) {
36 /* 36 /*
37 * Today all CPUs that support C3 share cache. 37 * Today all MP CPUs that support C3 share cache.
38 * TBD: This needs to look at cache shared map, once 38 * And caches should not be flushed by software while
39 * multi-core detection patch makes to the base. 39 * entering C3 type state.
40 */ 40 */
41 flags->bm_check = 1; 41 flags->bm_check = 1;
42 } 42 }
43
44 /*
45 * On all recent Intel platforms, ARB_DISABLE is a nop.
46 * So, set bm_control to zero to indicate that ARB_DISABLE
47 * is not required while entering C3 type state on
48 * P4, Core and beyond CPUs
49 */
50 if (c->x86_vendor == X86_VENDOR_INTEL &&
51 (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)))
52 flags->bm_control = 0;
43} 53}
44EXPORT_SYMBOL(acpi_processor_power_init_bm_check); 54EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
45 55
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index 7c074eec39fb..d296f4a195c9 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -72,6 +72,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
72 return; 72 return;
73} 73}
74 74
75
75/* Initialize _PDC data based on the CPU vendor */ 76/* Initialize _PDC data based on the CPU vendor */
76void arch_acpi_processor_init_pdc(struct acpi_processor *pr) 77void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
77{ 78{
@@ -85,3 +86,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
85} 86}
86 87
87EXPORT_SYMBOL(arch_acpi_processor_init_pdc); 88EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
89
90void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
91{
92 if (pr->pdc) {
93 kfree(pr->pdc->pointer->buffer.pointer);
94 kfree(pr->pdc->pointer);
95 kfree(pr->pdc);
96 pr->pdc = NULL;
97 }
98}
99
100EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 1c60554537c3..9372f0406ad4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -434,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
434 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 434 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
435} 435}
436 436
437/* Flush the whole IO/TLB for a given protection domain - including PDE */
438static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
439{
440 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
441
442 INC_STATS_COUNTER(domain_flush_single);
443
444 iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
445}
446
437/* 447/*
438 * This function is used to flush the IO/TLB for a given protection domain 448 * This function is used to flush the IO/TLB for a given protection domain
439 * on every IOMMU in the system 449 * on every IOMMU in the system
@@ -1078,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu,
1078 amd_iommu_pd_table[devid] = domain; 1088 amd_iommu_pd_table[devid] = domain;
1079 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1089 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1080 1090
1091 /*
1092 * We might boot into a crash-kernel here. The crashed kernel
1093 * left the caches in the IOMMU dirty. So we have to flush
1094 * here to evict all dirty stuff.
1095 */
1081 iommu_queue_inv_dev_entry(iommu, devid); 1096 iommu_queue_inv_dev_entry(iommu, devid);
1097 iommu_flush_tlb_pde(iommu, domain->id);
1082} 1098}
1083 1099
1084/* 1100/*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 238989ec077d..10b2accd12ea 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -260,6 +260,14 @@ static void iommu_enable(struct amd_iommu *iommu)
260 260
261static void iommu_disable(struct amd_iommu *iommu) 261static void iommu_disable(struct amd_iommu *iommu)
262{ 262{
263 /* Disable command buffer */
264 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
265
266 /* Disable event logging and event interrupts */
267 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
268 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
269
270 /* Disable IOMMU hardware itself */
263 iommu_feature_disable(iommu, CONTROL_IOMMU_EN); 271 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
264} 272}
265 273
@@ -478,6 +486,10 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu)
478 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, 486 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
479 &entry, sizeof(entry)); 487 &entry, sizeof(entry));
480 488
489 /* set head and tail to zero manually */
490 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
491 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
492
481 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 493 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
482} 494}
483 495
@@ -1042,6 +1054,7 @@ static void enable_iommus(void)
1042 struct amd_iommu *iommu; 1054 struct amd_iommu *iommu;
1043 1055
1044 for_each_iommu(iommu) { 1056 for_each_iommu(iommu) {
1057 iommu_disable(iommu);
1045 iommu_set_device_table(iommu); 1058 iommu_set_device_table(iommu);
1046 iommu_enable_command_buffer(iommu); 1059 iommu_enable_command_buffer(iommu);
1047 iommu_enable_event_buffer(iommu); 1060 iommu_enable_event_buffer(iommu);
@@ -1066,12 +1079,6 @@ static void disable_iommus(void)
1066 1079
1067static int amd_iommu_resume(struct sys_device *dev) 1080static int amd_iommu_resume(struct sys_device *dev)
1068{ 1081{
1069 /*
1070 * Disable IOMMUs before reprogramming the hardware registers.
1071 * IOMMU is still enabled from the resume kernel.
1072 */
1073 disable_iommus();
1074
1075 /* re-load the hardware */ 1082 /* re-load the hardware */
1076 enable_iommus(); 1083 enable_iommus();
1077 1084
@@ -1079,8 +1086,8 @@ static int amd_iommu_resume(struct sys_device *dev)
1079 * we have to flush after the IOMMUs are enabled because a 1086 * we have to flush after the IOMMUs are enabled because a
1080 * disabled IOMMU will never execute the commands we send 1087 * disabled IOMMU will never execute the commands we send
1081 */ 1088 */
1082 amd_iommu_flush_all_domains();
1083 amd_iommu_flush_all_devices(); 1089 amd_iommu_flush_all_devices();
1090 amd_iommu_flush_all_domains();
1084 1091
1085 return 0; 1092 return 0;
1086} 1093}
@@ -1273,6 +1280,11 @@ free:
1273 goto out; 1280 goto out;
1274} 1281}
1275 1282
1283void amd_iommu_shutdown(void)
1284{
1285 disable_iommus();
1286}
1287
1276/**************************************************************************** 1288/****************************************************************************
1277 * 1289 *
1278 * Early detect code. This code runs at IOMMU detection time in the DMA 1290 * Early detect code. This code runs at IOMMU detection time in the DMA
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ef8d9290c7ea..4d0216fcb36c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -462,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
462static void 462static void
463__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 463__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
464{ 464{
465 union entry_union eu; 465 union entry_union eu = {{0, 0}};
466
466 eu.entry = e; 467 eu.entry = e;
467 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 468 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
468 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 469 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -1413,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq,
1413 irte.vector = vector; 1414 irte.vector = vector;
1414 irte.dest_id = IRTE_DEST(destination); 1415 irte.dest_id = IRTE_DEST(destination);
1415 1416
1417 /* Set source-id of interrupt request */
1418 set_ioapic_sid(&irte, apic_id);
1419
1416 modify_irte(irq, &irte); 1420 modify_irte(irq, &irte);
1417 1421
1418 ir_entry->index2 = (index >> 15) & 0x1; 1422 ir_entry->index2 = (index >> 15) & 0x1;
@@ -2003,7 +2007,9 @@ void disable_IO_APIC(void)
2003 /* 2007 /*
2004 * Use virtual wire A mode when interrupt remapping is enabled. 2008 * Use virtual wire A mode when interrupt remapping is enabled.
2005 */ 2009 */
2006 disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); 2010 if (cpu_has_apic)
2011 disconnect_bsp_APIC(!intr_remapping_enabled &&
2012 ioapic_i8259.pin != -1);
2007} 2013}
2008 2014
2009#ifdef CONFIG_X86_32 2015#ifdef CONFIG_X86_32
@@ -3287,6 +3293,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3287 irte.vector = cfg->vector; 3293 irte.vector = cfg->vector;
3288 irte.dest_id = IRTE_DEST(dest); 3294 irte.dest_id = IRTE_DEST(dest);
3289 3295
3296 /* Set source-id of interrupt request */
3297 set_msi_sid(&irte, pdev);
3298
3290 modify_irte(irq, &irte); 3299 modify_irte(irq, &irte);
3291 3300
3292 msg->address_hi = MSI_ADDR_BASE_HI; 3301 msg->address_hi = MSI_ADDR_BASE_HI;
@@ -3567,7 +3576,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3567 3576
3568#endif /* CONFIG_SMP */ 3577#endif /* CONFIG_SMP */
3569 3578
3570struct irq_chip dmar_msi_type = { 3579static struct irq_chip dmar_msi_type = {
3571 .name = "DMAR_MSI", 3580 .name = "DMAR_MSI",
3572 .unmask = dmar_msi_unmask, 3581 .unmask = dmar_msi_unmask,
3573 .mask = dmar_msi_mask, 3582 .mask = dmar_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 440a8bccd91a..0c0182cc947d 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -20,23 +20,12 @@
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/setup.h> 21#include <asm/setup.h>
22 22
23#include <linux/threads.h>
24#include <linux/cpumask.h>
25#include <asm/mpspec.h>
26#include <asm/fixmap.h>
27#include <asm/apicdef.h>
28#include <linux/kernel.h>
29#include <linux/string.h>
30#include <linux/smp.h> 23#include <linux/smp.h>
31#include <linux/init.h>
32#include <asm/ipi.h> 24#include <asm/ipi.h>
33 25
34#include <linux/smp.h>
35#include <linux/init.h>
36#include <linux/interrupt.h> 26#include <linux/interrupt.h>
37#include <asm/acpi.h> 27#include <asm/acpi.h>
38#include <asm/e820.h> 28#include <asm/e820.h>
39#include <asm/setup.h>
40 29
41#ifdef CONFIG_HOTPLUG_CPU 30#ifdef CONFIG_HOTPLUG_CPU
42#define DEFAULT_SEND_IPI (1) 31#define DEFAULT_SEND_IPI (1)
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 344eee4ac0a4..eafdfbd1ea95 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -44,7 +44,6 @@
44#include <asm/ipi.h> 44#include <asm/ipi.h>
45#include <linux/kernel.h> 45#include <linux/kernel.h>
46#include <linux/string.h> 46#include <linux/string.h>
47#include <linux/init.h>
48#include <linux/gfp.h> 47#include <linux/gfp.h>
49#include <linux/smp.h> 48#include <linux/smp.h>
50 49
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e5b27d8f1b47..28e5f5956042 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
258{ 258{
259#ifdef CONFIG_X86_HT 259#ifdef CONFIG_X86_HT
260 unsigned bits; 260 unsigned bits;
261 int cpu = smp_processor_id();
261 262
262 bits = c->x86_coreid_bits; 263 bits = c->x86_coreid_bits;
263
264 /* Low order bits define the core id (index of core in socket) */ 264 /* Low order bits define the core id (index of core in socket) */
265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
266 /* Convert the initial APIC ID into the socket ID */ 266 /* Convert the initial APIC ID into the socket ID */
267 c->phys_proc_id = c->initial_apicid >> bits; 267 c->phys_proc_id = c->initial_apicid >> bits;
268 /* use socket ID also for last level cache */
269 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
268#endif 270#endif
269} 271}
270 272
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9fa33886c0d7..f1961c07af9a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -108,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
108 /* data */ 108 /* data */
109 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 109 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
110 110
111 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 111 [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
112 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 112 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
113 GDT_STACK_CANARY_INIT 113 GDT_STACK_CANARY_INIT
114#endif 114#endif
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 45004faf67ea..188a1ca5ad2b 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,12 @@
1obj-y = mce.o therm_throt.o 1obj-y = mce.o
2 2
3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o 3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o 4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
6obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o 6obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
7obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o 7obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
8obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
9obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 8obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
10obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o 9obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
11obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 10obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
11
12obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index 89e510424152..b945d5dbc609 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -10,10 +10,9 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* Machine Check Handler For AMD Athlon/Duron: */ 16/* Machine Check Handler For AMD Athlon/Duron: */
18static void k7_machine_check(struct pt_regs *regs, long error_code) 17static void k7_machine_check(struct pt_regs *regs, long error_code)
19{ 18{
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fabba15e4558..af425b83202b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -44,7 +44,6 @@
44#include <asm/msr.h> 44#include <asm/msr.h>
45 45
46#include "mce-internal.h" 46#include "mce-internal.h"
47#include "mce.h"
48 47
49/* Handle unconfigured int18 (should never happen) */ 48/* Handle unconfigured int18 (should never happen) */
50static void unexpected_machine_check(struct pt_regs *regs, long error_code) 49static void unexpected_machine_check(struct pt_regs *regs, long error_code)
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
57void (*machine_check_vector)(struct pt_regs *, long error_code) = 56void (*machine_check_vector)(struct pt_regs *, long error_code) =
58 unexpected_machine_check; 57 unexpected_machine_check;
59 58
60int mce_disabled; 59int mce_disabled __read_mostly;
61 60
62#ifdef CONFIG_X86_NEW_MCE 61#ifdef CONFIG_X86_NEW_MCE
63 62
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
76 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 75 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
77 * 3: never panic or SIGBUS, log all errors (for testing only) 76 * 3: never panic or SIGBUS, log all errors (for testing only)
78 */ 77 */
79static int tolerant = 1; 78static int tolerant __read_mostly = 1;
80static int banks; 79static int banks __read_mostly;
81static u64 *bank; 80static u64 *bank __read_mostly;
82static unsigned long notify_user; 81static int rip_msr __read_mostly;
83static int rip_msr; 82static int mce_bootlog __read_mostly = -1;
84static int mce_bootlog = -1; 83static int monarch_timeout __read_mostly = -1;
85static int monarch_timeout = -1; 84static int mce_panic_timeout __read_mostly;
86static int mce_panic_timeout; 85static int mce_dont_log_ce __read_mostly;
87static int mce_dont_log_ce; 86int mce_cmci_disabled __read_mostly;
88int mce_cmci_disabled; 87int mce_ignore_ce __read_mostly;
89int mce_ignore_ce; 88int mce_ser __read_mostly;
90int mce_ser; 89
91 90/* User mode helper program triggered by machine check event */
92static char trigger[128]; 91static unsigned long mce_need_notify;
93static char *trigger_argv[2] = { trigger, NULL }; 92static char mce_helper[128];
93static char *mce_helper_argv[2] = { mce_helper, NULL };
94 94
95static unsigned long dont_init_banks; 95static unsigned long dont_init_banks;
96 96
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
180 wmb(); 180 wmb();
181 181
182 mce->finished = 1; 182 mce->finished = 1;
183 set_bit(0, &notify_user); 183 set_bit(0, &mce_need_notify);
184} 184}
185 185
186static void print_mce(struct mce *m) 186static void print_mce(struct mce *m)
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
691 * in the entry order. 691 * in the entry order.
692 * TBD double check parallel CPU hotunplug 692 * TBD double check parallel CPU hotunplug
693 */ 693 */
694static int mce_start(int no_way_out, int *order) 694static int mce_start(int *no_way_out)
695{ 695{
696 int nwo; 696 int order;
697 int cpus = num_online_cpus(); 697 int cpus = num_online_cpus();
698 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 698 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
699 699
700 if (!timeout) { 700 if (!timeout)
701 *order = -1; 701 return -1;
702 return no_way_out;
703 }
704 702
705 atomic_add(no_way_out, &global_nwo); 703 atomic_add(*no_way_out, &global_nwo);
704 /*
705 * global_nwo should be updated before mce_callin
706 */
707 smp_wmb();
708 order = atomic_add_return(1, &mce_callin);
706 709
707 /* 710 /*
708 * Wait for everyone. 711 * Wait for everyone.
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
710 while (atomic_read(&mce_callin) != cpus) { 713 while (atomic_read(&mce_callin) != cpus) {
711 if (mce_timed_out(&timeout)) { 714 if (mce_timed_out(&timeout)) {
712 atomic_set(&global_nwo, 0); 715 atomic_set(&global_nwo, 0);
713 *order = -1; 716 return -1;
714 return no_way_out;
715 } 717 }
716 ndelay(SPINUNIT); 718 ndelay(SPINUNIT);
717 } 719 }
718 720
719 /* 721 /*
720 * Cache the global no_way_out state. 722 * mce_callin should be read before global_nwo
721 */ 723 */
722 nwo = atomic_read(&global_nwo); 724 smp_rmb();
723 725
724 /* 726 if (order == 1) {
725 * Monarch starts executing now, the others wait. 727 /*
726 */ 728 * Monarch: Starts executing now, the others wait.
727 if (*order == 1) { 729 */
728 atomic_set(&mce_executing, 1); 730 atomic_set(&mce_executing, 1);
729 return nwo; 731 } else {
732 /*
733 * Subject: Now start the scanning loop one by one in
734 * the original callin order.
735 * This way when there are any shared banks it will be
736 * only seen by one CPU before cleared, avoiding duplicates.
737 */
738 while (atomic_read(&mce_executing) < order) {
739 if (mce_timed_out(&timeout)) {
740 atomic_set(&global_nwo, 0);
741 return -1;
742 }
743 ndelay(SPINUNIT);
744 }
730 } 745 }
731 746
732 /* 747 /*
733 * Now start the scanning loop one by one 748 * Cache the global no_way_out state.
734 * in the original callin order.
735 * This way when there are any shared banks it will
736 * be only seen by one CPU before cleared, avoiding duplicates.
737 */ 749 */
738 while (atomic_read(&mce_executing) < *order) { 750 *no_way_out = atomic_read(&global_nwo);
739 if (mce_timed_out(&timeout)) { 751
740 atomic_set(&global_nwo, 0); 752 return order;
741 *order = -1;
742 return no_way_out;
743 }
744 ndelay(SPINUNIT);
745 }
746 return nwo;
747} 753}
748 754
749/* 755/*
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
863 * check handler. 869 * check handler.
864 */ 870 */
865 int order; 871 int order;
866
867 /* 872 /*
868 * If no_way_out gets set, there is no safe way to recover from this 873 * If no_way_out gets set, there is no safe way to recover from this
869 * MCE. If tolerant is cranked up, we'll try anyway. 874 * MCE. If tolerant is cranked up, we'll try anyway.
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
887 if (!banks) 892 if (!banks)
888 goto out; 893 goto out;
889 894
890 order = atomic_add_return(1, &mce_callin);
891 mce_setup(&m); 895 mce_setup(&m);
892 896
893 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 897 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
909 * This way we don't report duplicated events on shared banks 913 * This way we don't report duplicated events on shared banks
910 * because the first one to see it will clear it. 914 * because the first one to see it will clear it.
911 */ 915 */
912 no_way_out = mce_start(no_way_out, &order); 916 order = mce_start(&no_way_out);
913 for (i = 0; i < banks; i++) { 917 for (i = 0; i < banks; i++) {
914 __clear_bit(i, toclear); 918 __clear_bit(i, toclear);
915 if (!bank[i]) 919 if (!bank[i])
@@ -1113,12 +1117,12 @@ static void mcheck_timer(unsigned long data)
1113 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 1117 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
1114 1118
1115 t->expires = jiffies + *n; 1119 t->expires = jiffies + *n;
1116 add_timer(t); 1120 add_timer_on(t, smp_processor_id());
1117} 1121}
1118 1122
1119static void mce_do_trigger(struct work_struct *work) 1123static void mce_do_trigger(struct work_struct *work)
1120{ 1124{
1121 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); 1125 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
1122} 1126}
1123 1127
1124static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 1128static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
1135 1139
1136 clear_thread_flag(TIF_MCE_NOTIFY); 1140 clear_thread_flag(TIF_MCE_NOTIFY);
1137 1141
1138 if (test_and_clear_bit(0, &notify_user)) { 1142 if (test_and_clear_bit(0, &mce_need_notify)) {
1139 wake_up_interruptible(&mce_wait); 1143 wake_up_interruptible(&mce_wait);
1140 1144
1141 /* 1145 /*
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
1143 * work_pending is always cleared before the function is 1147 * work_pending is always cleared before the function is
1144 * executed. 1148 * executed.
1145 */ 1149 */
1146 if (trigger[0] && !work_pending(&mce_trigger_work)) 1150 if (mce_helper[0] && !work_pending(&mce_trigger_work))
1147 schedule_work(&mce_trigger_work); 1151 schedule_work(&mce_trigger_work);
1148 1152
1149 if (__ratelimit(&ratelimit)) 1153 if (__ratelimit(&ratelimit))
@@ -1245,7 +1249,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
1245 * Various K7s with broken bank 0 around. Always disable 1249 * Various K7s with broken bank 0 around. Always disable
1246 * by default. 1250 * by default.
1247 */ 1251 */
1248 if (c->x86 == 6) 1252 if (c->x86 == 6 && banks > 0)
1249 bank[0] = 0; 1253 bank[0] = 0;
1250 } 1254 }
1251 1255
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
1282 return; 1286 return;
1283 switch (c->x86_vendor) { 1287 switch (c->x86_vendor) {
1284 case X86_VENDOR_INTEL: 1288 case X86_VENDOR_INTEL:
1285 if (mce_p5_enabled()) 1289 intel_p5_mcheck_init(c);
1286 intel_p5_mcheck_init(c);
1287 break; 1290 break;
1288 case X86_VENDOR_CENTAUR: 1291 case X86_VENDOR_CENTAUR:
1289 winchip_mcheck_init(c); 1292 winchip_mcheck_init(c);
@@ -1318,7 +1321,7 @@ static void mce_init_timer(void)
1318 return; 1321 return;
1319 setup_timer(t, mcheck_timer, smp_processor_id()); 1322 setup_timer(t, mcheck_timer, smp_processor_id());
1320 t->expires = round_jiffies(jiffies + *n); 1323 t->expires = round_jiffies(jiffies + *n);
1321 add_timer(t); 1324 add_timer_on(t, smp_processor_id());
1322} 1325}
1323 1326
1324/* 1327/*
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
1609static void mce_cpu_restart(void *data) 1612static void mce_cpu_restart(void *data)
1610{ 1613{
1611 del_timer_sync(&__get_cpu_var(mce_timer)); 1614 del_timer_sync(&__get_cpu_var(mce_timer));
1612 if (mce_available(&current_cpu_data)) 1615 if (!mce_available(&current_cpu_data))
1613 mce_init(); 1616 return;
1617 mce_init();
1614 mce_init_timer(); 1618 mce_init_timer();
1615} 1619}
1616 1620
@@ -1620,6 +1624,26 @@ static void mce_restart(void)
1620 on_each_cpu(mce_cpu_restart, NULL, 1); 1624 on_each_cpu(mce_cpu_restart, NULL, 1);
1621} 1625}
1622 1626
1627/* Toggle features for corrected errors */
1628static void mce_disable_ce(void *all)
1629{
1630 if (!mce_available(&current_cpu_data))
1631 return;
1632 if (all)
1633 del_timer_sync(&__get_cpu_var(mce_timer));
1634 cmci_clear();
1635}
1636
1637static void mce_enable_ce(void *all)
1638{
1639 if (!mce_available(&current_cpu_data))
1640 return;
1641 cmci_reenable();
1642 cmci_recheck();
1643 if (all)
1644 mce_init_timer();
1645}
1646
1623static struct sysdev_class mce_sysclass = { 1647static struct sysdev_class mce_sysclass = {
1624 .suspend = mce_suspend, 1648 .suspend = mce_suspend,
1625 .shutdown = mce_shutdown, 1649 .shutdown = mce_shutdown,
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1659static ssize_t 1683static ssize_t
1660show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) 1684show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
1661{ 1685{
1662 strcpy(buf, trigger); 1686 strcpy(buf, mce_helper);
1663 strcat(buf, "\n"); 1687 strcat(buf, "\n");
1664 return strlen(trigger) + 1; 1688 return strlen(mce_helper) + 1;
1665} 1689}
1666 1690
1667static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1691static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1670 char *p; 1694 char *p;
1671 int len; 1695 int len;
1672 1696
1673 strncpy(trigger, buf, sizeof(trigger)); 1697 strncpy(mce_helper, buf, sizeof(mce_helper));
1674 trigger[sizeof(trigger)-1] = 0; 1698 mce_helper[sizeof(mce_helper)-1] = 0;
1675 len = strlen(trigger); 1699 len = strlen(mce_helper);
1676 p = strchr(trigger, '\n'); 1700 p = strchr(mce_helper, '\n');
1677 1701
1678 if (*p) 1702 if (*p)
1679 *p = 0; 1703 *p = 0;
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1681 return len; 1705 return len;
1682} 1706}
1683 1707
1708static ssize_t set_ignore_ce(struct sys_device *s,
1709 struct sysdev_attribute *attr,
1710 const char *buf, size_t size)
1711{
1712 u64 new;
1713
1714 if (strict_strtoull(buf, 0, &new) < 0)
1715 return -EINVAL;
1716
1717 if (mce_ignore_ce ^ !!new) {
1718 if (new) {
1719 /* disable ce features */
1720 on_each_cpu(mce_disable_ce, (void *)1, 1);
1721 mce_ignore_ce = 1;
1722 } else {
1723 /* enable ce features */
1724 mce_ignore_ce = 0;
1725 on_each_cpu(mce_enable_ce, (void *)1, 1);
1726 }
1727 }
1728 return size;
1729}
1730
1731static ssize_t set_cmci_disabled(struct sys_device *s,
1732 struct sysdev_attribute *attr,
1733 const char *buf, size_t size)
1734{
1735 u64 new;
1736
1737 if (strict_strtoull(buf, 0, &new) < 0)
1738 return -EINVAL;
1739
1740 if (mce_cmci_disabled ^ !!new) {
1741 if (new) {
1742 /* disable cmci */
1743 on_each_cpu(mce_disable_ce, NULL, 1);
1744 mce_cmci_disabled = 1;
1745 } else {
1746 /* enable cmci */
1747 mce_cmci_disabled = 0;
1748 on_each_cpu(mce_enable_ce, NULL, 1);
1749 }
1750 }
1751 return size;
1752}
1753
1684static ssize_t store_int_with_restart(struct sys_device *s, 1754static ssize_t store_int_with_restart(struct sys_device *s,
1685 struct sysdev_attribute *attr, 1755 struct sysdev_attribute *attr,
1686 const char *buf, size_t size) 1756 const char *buf, size_t size)
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
1693static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 1763static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
1694static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 1764static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
1695static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 1765static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
1766static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
1696 1767
1697static struct sysdev_ext_attribute attr_check_interval = { 1768static struct sysdev_ext_attribute attr_check_interval = {
1698 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, 1769 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
1700 &check_interval 1771 &check_interval
1701}; 1772};
1702 1773
1774static struct sysdev_ext_attribute attr_ignore_ce = {
1775 _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
1776 &mce_ignore_ce
1777};
1778
1779static struct sysdev_ext_attribute attr_cmci_disabled = {
1780 _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
1781 &mce_cmci_disabled
1782};
1783
1703static struct sysdev_attribute *mce_attrs[] = { 1784static struct sysdev_attribute *mce_attrs[] = {
1704 &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, 1785 &attr_tolerant.attr,
1786 &attr_check_interval.attr,
1787 &attr_trigger,
1705 &attr_monarch_timeout.attr, 1788 &attr_monarch_timeout.attr,
1789 &attr_dont_log_ce.attr,
1790 &attr_ignore_ce.attr,
1791 &attr_cmci_disabled.attr,
1706 NULL 1792 NULL
1707}; 1793};
1708 1794
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
1712static __cpuinit int mce_create_device(unsigned int cpu) 1798static __cpuinit int mce_create_device(unsigned int cpu)
1713{ 1799{
1714 int err; 1800 int err;
1715 int i; 1801 int i, j;
1716 1802
1717 if (!mce_available(&boot_cpu_data)) 1803 if (!mce_available(&boot_cpu_data))
1718 return -EIO; 1804 return -EIO;
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1730 if (err) 1816 if (err)
1731 goto error; 1817 goto error;
1732 } 1818 }
1733 for (i = 0; i < banks; i++) { 1819 for (j = 0; j < banks; j++) {
1734 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1820 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1735 &bank_attrs[i]); 1821 &bank_attrs[j]);
1736 if (err) 1822 if (err)
1737 goto error2; 1823 goto error2;
1738 } 1824 }
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1740 1826
1741 return 0; 1827 return 0;
1742error2: 1828error2:
1743 while (--i >= 0) 1829 while (--j >= 0)
1744 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); 1830 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
1745error: 1831error:
1746 while (--i >= 0) 1832 while (--i >= 0)
1747 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1833 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
1883 if (!mce_available(&boot_cpu_data)) 1969 if (!mce_available(&boot_cpu_data))
1884 return -EIO; 1970 return -EIO;
1885 1971
1886 alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 1972 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1887 1973
1888 err = mce_init_banks(); 1974 err = mce_init_banks();
1889 if (err) 1975 if (err)
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
1915/* This has to be run for each processor */ 2001/* This has to be run for each processor */
1916void mcheck_init(struct cpuinfo_x86 *c) 2002void mcheck_init(struct cpuinfo_x86 *c)
1917{ 2003{
1918 if (mce_disabled == 1) 2004 if (mce_disabled)
1919 return; 2005 return;
1920 2006
1921 switch (c->x86_vendor) { 2007 switch (c->x86_vendor) {
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
1945 2031
1946static int __init mcheck_enable(char *str) 2032static int __init mcheck_enable(char *str)
1947{ 2033{
1948 mce_disabled = -1; 2034 mce_p5_enabled = 1;
1949 return 1; 2035 return 1;
1950} 2036}
1951
1952__setup("mce", mcheck_enable); 2037__setup("mce", mcheck_enable);
1953 2038
1954#endif /* CONFIG_X86_OLD_MCE */ 2039#endif /* CONFIG_X86_OLD_MCE */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
deleted file mode 100644
index 84a552b458c8..000000000000
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ /dev/null
@@ -1,38 +0,0 @@
1#include <linux/init.h>
2#include <asm/mce.h>
3
4#ifdef CONFIG_X86_OLD_MCE
5void amd_mcheck_init(struct cpuinfo_x86 *c);
6void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
7void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
8#endif
9
10#ifdef CONFIG_X86_ANCIENT_MCE
11void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
12void winchip_mcheck_init(struct cpuinfo_x86 *c);
13extern int mce_p5_enable;
14static inline int mce_p5_enabled(void) { return mce_p5_enable; }
15static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
16#else
17static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
18static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
19static inline int mce_p5_enabled(void) { return 0; }
20static inline void enable_p5_mce(void) { }
21#endif
22
23/* Call the installed machine check handler for this CPU setup. */
24extern void (*machine_check_vector)(struct pt_regs *, long error_code);
25
26#ifdef CONFIG_X86_OLD_MCE
27
28extern int nr_mce_banks;
29
30void intel_set_thermal_handler(void);
31
32#else
33
34static inline void intel_set_thermal_handler(void) { }
35
36#endif
37
38void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bda..ddae21620bda 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 2b011d2d8579..e1acec0f7a32 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -1,74 +1,226 @@
1/* 1/*
2 * Common code for Intel machine checks 2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
3 */ 6 */
4#include <linux/interrupt.h>
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/smp.h>
9 7
10#include <asm/therm_throt.h> 8#include <linux/init.h>
11#include <asm/processor.h> 9#include <linux/interrupt.h>
12#include <asm/system.h> 10#include <linux/percpu.h>
13#include <asm/apic.h> 11#include <asm/apic.h>
12#include <asm/processor.h>
14#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/mce.h>
15
16/*
17 * Support for Intel Correct Machine Check Interrupts. This allows
18 * the CPU to raise an interrupt when a corrected machine check happened.
19 * Normally we pick those up using a regular polling timer.
20 * Also supports reliable discovery of shared banks.
21 */
15 22
16#include "mce.h" 23static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
17 24
18void intel_init_thermal(struct cpuinfo_x86 *c) 25/*
26 * cmci_discover_lock protects against parallel discovery attempts
27 * which could race against each other.
28 */
29static DEFINE_SPINLOCK(cmci_discover_lock);
30
31#define CMCI_THRESHOLD 1
32
33static int cmci_supported(int *banks)
19{ 34{
20 unsigned int cpu = smp_processor_id(); 35 u64 cap;
21 int tm2 = 0;
22 u32 l, h;
23 36
24 /* Thermal monitoring depends on ACPI and clock modulation*/ 37 if (mce_cmci_disabled || mce_ignore_ce)
25 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) 38 return 0;
26 return;
27 39
28 /* 40 /*
29 * First check if its enabled already, in which case there might 41 * Vendor check is not strictly needed, but the initial
30 * be some SMM goo which handles it, so we can't even put a handler 42 * initialization is vendor keyed and this
31 * since it might be delivered via SMI already: 43 * makes sure none of the backdoors are entered otherwise.
32 */ 44 */
33 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 45 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
34 h = apic_read(APIC_LVTTHMR); 46 return 0;
35 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 47 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
36 printk(KERN_DEBUG 48 return 0;
37 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 49 rdmsrl(MSR_IA32_MCG_CAP, cap);
38 return; 50 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
51 return !!(cap & MCG_CMCI_P);
52}
53
54/*
55 * The interrupt handler. This is called on every event.
56 * Just call the poller directly to log any events.
57 * This could in theory increase the threshold under high load,
58 * but doesn't for now.
59 */
60static void intel_threshold_interrupt(void)
61{
62 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
63 mce_notify_irq();
64}
65
66static void print_update(char *type, int *hdr, int num)
67{
68 if (*hdr == 0)
69 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
70 *hdr = 1;
71 printk(KERN_CONT " %s:%d", type, num);
72}
73
74/*
75 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
76 * on this CPU. Use the algorithm recommended in the SDM to discover shared
77 * banks.
78 */
79static void cmci_discover(int banks, int boot)
80{
81 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
82 unsigned long flags;
83 int hdr = 0;
84 int i;
85
86 spin_lock_irqsave(&cmci_discover_lock, flags);
87 for (i = 0; i < banks; i++) {
88 u64 val;
89
90 if (test_bit(i, owned))
91 continue;
92
93 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
94
95 /* Already owned by someone else? */
96 if (val & CMCI_EN) {
97 if (test_and_clear_bit(i, owned) || boot)
98 print_update("SHD", &hdr, i);
99 __clear_bit(i, __get_cpu_var(mce_poll_banks));
100 continue;
101 }
102
103 val |= CMCI_EN | CMCI_THRESHOLD;
104 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
105 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
106
107 /* Did the enable bit stick? -- the bank supports CMCI */
108 if (val & CMCI_EN) {
109 if (!test_and_set_bit(i, owned) || boot)
110 print_update("CMCI", &hdr, i);
111 __clear_bit(i, __get_cpu_var(mce_poll_banks));
112 } else {
113 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
114 }
39 } 115 }
116 spin_unlock_irqrestore(&cmci_discover_lock, flags);
117 if (hdr)
118 printk(KERN_CONT "\n");
119}
120
121/*
122 * Just in case we missed an event during initialization check
123 * all the CMCI owned banks.
124 */
125void cmci_recheck(void)
126{
127 unsigned long flags;
128 int banks;
129
130 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
131 return;
132 local_irq_save(flags);
133 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
134 local_irq_restore(flags);
135}
40 136
41 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) 137/*
42 tm2 = 1; 138 * Disable CMCI on this CPU for all banks it owns when it goes down.
139 * This allows other CPUs to claim the banks on rediscovery.
140 */
141void cmci_clear(void)
142{
143 unsigned long flags;
144 int i;
145 int banks;
146 u64 val;
43 147
44 /* Check whether a vector already exists */ 148 if (!cmci_supported(&banks))
45 if (h & APIC_VECTOR_MASK) {
46 printk(KERN_DEBUG
47 "CPU%d: Thermal LVT vector (%#x) already installed\n",
48 cpu, (h & APIC_VECTOR_MASK));
49 return; 149 return;
150 spin_lock_irqsave(&cmci_discover_lock, flags);
151 for (i = 0; i < banks; i++) {
152 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
153 continue;
154 /* Disable CMCI */
155 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
157 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
158 __clear_bit(i, __get_cpu_var(mce_banks_owned));
50 } 159 }
160 spin_unlock_irqrestore(&cmci_discover_lock, flags);
161}
162
163/*
164 * After a CPU went down cycle through all the others and rediscover
165 * Must run in process context.
166 */
167void cmci_rediscover(int dying)
168{
169 int banks;
170 int cpu;
171 cpumask_var_t old;
172
173 if (!cmci_supported(&banks))
174 return;
175 if (!alloc_cpumask_var(&old, GFP_KERNEL))
176 return;
177 cpumask_copy(old, &current->cpus_allowed);
51 178
52 /* We'll mask the thermal vector in the lapic till we're ready: */ 179 for_each_online_cpu(cpu) {
53 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 180 if (cpu == dying)
54 apic_write(APIC_LVTTHMR, h); 181 continue;
182 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
183 continue;
184 /* Recheck banks in case CPUs don't all have the same */
185 if (cmci_supported(&banks))
186 cmci_discover(banks, 0);
187 }
55 188
56 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 189 set_cpus_allowed_ptr(current, old);
57 wrmsr(MSR_IA32_THERM_INTERRUPT, 190 free_cpumask_var(old);
58 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); 191}
59 192
60 intel_set_thermal_handler(); 193/*
194 * Reenable CMCI on this CPU in case a CPU down failed.
195 */
196void cmci_reenable(void)
197{
198 int banks;
199 if (cmci_supported(&banks))
200 cmci_discover(banks, 0);
201}
61 202
62 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 203static void intel_init_cmci(void)
63 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); 204{
205 int banks;
64 206
65 /* Unmask the thermal vector: */ 207 if (!cmci_supported(&banks))
66 l = apic_read(APIC_LVTTHMR); 208 return;
67 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
68 209
69 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 210 mce_threshold_vector = intel_threshold_interrupt;
70 cpu, tm2 ? "TM2" : "TM1"); 211 cmci_discover(banks, 1);
212 /*
213 * For CPU #0 this runs with still disabled APIC, but that's
214 * ok because only the vector is set up. We still do another
215 * check for the banks later for CPU #0 just to make sure
216 * to not miss any events.
217 */
218 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
219 cmci_recheck();
220}
71 221
72 /* enable thermal throttle processing */ 222void mce_intel_feature_init(struct cpuinfo_x86 *c)
73 atomic_set(&therm_throt_en, 1); 223{
224 intel_init_thermal(c);
225 intel_init_cmci();
74} 226}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
deleted file mode 100644
index f2ef6952c400..000000000000
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ /dev/null
@@ -1,248 +0,0 @@
1/*
2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
6 */
7
8#include <linux/init.h>
9#include <linux/interrupt.h>
10#include <linux/percpu.h>
11#include <asm/processor.h>
12#include <asm/apic.h>
13#include <asm/msr.h>
14#include <asm/mce.h>
15#include <asm/hw_irq.h>
16#include <asm/idle.h>
17#include <asm/therm_throt.h>
18
19#include "mce.h"
20
21asmlinkage void smp_thermal_interrupt(void)
22{
23 __u64 msr_val;
24
25 ack_APIC_irq();
26
27 exit_idle();
28 irq_enter();
29
30 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
31 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
32 mce_log_therm_throt_event(msr_val);
33
34 inc_irq_stat(irq_thermal_count);
35 irq_exit();
36}
37
38/*
39 * Support for Intel Correct Machine Check Interrupts. This allows
40 * the CPU to raise an interrupt when a corrected machine check happened.
41 * Normally we pick those up using a regular polling timer.
42 * Also supports reliable discovery of shared banks.
43 */
44
45static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
46
47/*
48 * cmci_discover_lock protects against parallel discovery attempts
49 * which could race against each other.
50 */
51static DEFINE_SPINLOCK(cmci_discover_lock);
52
53#define CMCI_THRESHOLD 1
54
55static int cmci_supported(int *banks)
56{
57 u64 cap;
58
59 if (mce_cmci_disabled || mce_ignore_ce)
60 return 0;
61
62 /*
63 * Vendor check is not strictly needed, but the initial
64 * initialization is vendor keyed and this
65 * makes sure none of the backdoors are entered otherwise.
66 */
67 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
68 return 0;
69 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
70 return 0;
71 rdmsrl(MSR_IA32_MCG_CAP, cap);
72 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
73 return !!(cap & MCG_CMCI_P);
74}
75
76/*
77 * The interrupt handler. This is called on every event.
78 * Just call the poller directly to log any events.
79 * This could in theory increase the threshold under high load,
80 * but doesn't for now.
81 */
82static void intel_threshold_interrupt(void)
83{
84 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
85 mce_notify_irq();
86}
87
88static void print_update(char *type, int *hdr, int num)
89{
90 if (*hdr == 0)
91 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
92 *hdr = 1;
93 printk(KERN_CONT " %s:%d", type, num);
94}
95
96/*
97 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
98 * on this CPU. Use the algorithm recommended in the SDM to discover shared
99 * banks.
100 */
101static void cmci_discover(int banks, int boot)
102{
103 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
104 unsigned long flags;
105 int hdr = 0;
106 int i;
107
108 spin_lock_irqsave(&cmci_discover_lock, flags);
109 for (i = 0; i < banks; i++) {
110 u64 val;
111
112 if (test_bit(i, owned))
113 continue;
114
115 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
116
117 /* Already owned by someone else? */
118 if (val & CMCI_EN) {
119 if (test_and_clear_bit(i, owned) || boot)
120 print_update("SHD", &hdr, i);
121 __clear_bit(i, __get_cpu_var(mce_poll_banks));
122 continue;
123 }
124
125 val |= CMCI_EN | CMCI_THRESHOLD;
126 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
127 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
128
129 /* Did the enable bit stick? -- the bank supports CMCI */
130 if (val & CMCI_EN) {
131 if (!test_and_set_bit(i, owned) || boot)
132 print_update("CMCI", &hdr, i);
133 __clear_bit(i, __get_cpu_var(mce_poll_banks));
134 } else {
135 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
136 }
137 }
138 spin_unlock_irqrestore(&cmci_discover_lock, flags);
139 if (hdr)
140 printk(KERN_CONT "\n");
141}
142
143/*
144 * Just in case we missed an event during initialization check
145 * all the CMCI owned banks.
146 */
147void cmci_recheck(void)
148{
149 unsigned long flags;
150 int banks;
151
152 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
153 return;
154 local_irq_save(flags);
155 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
156 local_irq_restore(flags);
157}
158
159/*
160 * Disable CMCI on this CPU for all banks it owns when it goes down.
161 * This allows other CPUs to claim the banks on rediscovery.
162 */
163void cmci_clear(void)
164{
165 unsigned long flags;
166 int i;
167 int banks;
168 u64 val;
169
170 if (!cmci_supported(&banks))
171 return;
172 spin_lock_irqsave(&cmci_discover_lock, flags);
173 for (i = 0; i < banks; i++) {
174 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
175 continue;
176 /* Disable CMCI */
177 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
178 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
179 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
180 __clear_bit(i, __get_cpu_var(mce_banks_owned));
181 }
182 spin_unlock_irqrestore(&cmci_discover_lock, flags);
183}
184
185/*
186 * After a CPU went down cycle through all the others and rediscover
187 * Must run in process context.
188 */
189void cmci_rediscover(int dying)
190{
191 int banks;
192 int cpu;
193 cpumask_var_t old;
194
195 if (!cmci_supported(&banks))
196 return;
197 if (!alloc_cpumask_var(&old, GFP_KERNEL))
198 return;
199 cpumask_copy(old, &current->cpus_allowed);
200
201 for_each_online_cpu(cpu) {
202 if (cpu == dying)
203 continue;
204 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
205 continue;
206 /* Recheck banks in case CPUs don't all have the same */
207 if (cmci_supported(&banks))
208 cmci_discover(banks, 0);
209 }
210
211 set_cpus_allowed_ptr(current, old);
212 free_cpumask_var(old);
213}
214
215/*
216 * Reenable CMCI on this CPU in case a CPU down failed.
217 */
218void cmci_reenable(void)
219{
220 int banks;
221 if (cmci_supported(&banks))
222 cmci_discover(banks, 0);
223}
224
225static void intel_init_cmci(void)
226{
227 int banks;
228
229 if (!cmci_supported(&banks))
230 return;
231
232 mce_threshold_vector = intel_threshold_interrupt;
233 cmci_discover(banks, 1);
234 /*
235 * For CPU #0 this runs with still disabled APIC, but that's
236 * ok because only the vector is set up. We still do another
237 * check for the banks later for CPU #0 just to make sure
238 * to not miss any events.
239 */
240 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
241 cmci_recheck();
242}
243
244void mce_intel_feature_init(struct cpuinfo_x86 *c)
245{
246 intel_init_thermal(c);
247 intel_init_cmci();
248}
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 70b710420f74..f5f2d6f71fb6 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -17,10 +17,9 @@
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/mce.h>
20#include <asm/msr.h> 21#include <asm/msr.h>
21 22
22#include "mce.h"
23
24static int firstbank; 23static int firstbank;
25 24
26#define MCE_RATE (15*HZ) /* timer rate is 15s */ 25#define MCE_RATE (15*HZ) /* timer rate is 15s */
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 82cee108a2d3..4482aea9aa2e 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -1,21 +1,15 @@
1/* 1/*
2 * P4 specific Machine Check Exception Reporting 2 * P4 specific Machine Check Exception Reporting
3 */ 3 */
4
5#include <linux/interrupt.h>
6#include <linux/kernel.h> 4#include <linux/kernel.h>
7#include <linux/types.h> 5#include <linux/types.h>
8#include <linux/init.h> 6#include <linux/init.h>
9#include <linux/smp.h> 7#include <linux/smp.h>
10 8
11#include <asm/therm_throt.h>
12#include <asm/processor.h> 9#include <asm/processor.h>
13#include <asm/system.h> 10#include <asm/mce.h>
14#include <asm/apic.h>
15#include <asm/msr.h> 11#include <asm/msr.h>
16 12
17#include "mce.h"
18
19/* as supported by the P4/Xeon family */ 13/* as supported by the P4/Xeon family */
20struct intel_mce_extended_msrs { 14struct intel_mce_extended_msrs {
21 u32 eax; 15 u32 eax;
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
33 27
34static int mce_num_extended_msrs; 28static int mce_num_extended_msrs;
35 29
36
37#ifdef CONFIG_X86_MCE_P4THERMAL
38
39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK);
44}
45
46/* P4/Xeon Thermal transition interrupt handler: */
47static void intel_thermal_interrupt(struct pt_regs *regs)
48{
49 __u64 msr_val;
50
51 ack_APIC_irq();
52
53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54 therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
55}
56
57/* Thermal interrupt handler for this CPU setup: */
58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
59 unexpected_thermal_interrupt;
60
61void smp_thermal_interrupt(struct pt_regs *regs)
62{
63 irq_enter();
64 vendor_thermal_interrupt(regs);
65 __get_cpu_var(irq_stat).irq_thermal_count++;
66 irq_exit();
67}
68
69void intel_set_thermal_handler(void)
70{
71 vendor_thermal_interrupt = intel_thermal_interrupt;
72}
73
74#endif /* CONFIG_X86_MCE_P4THERMAL */
75
76/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ 30/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
77static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) 31static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
78{ 32{
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 015f481ab1b0..5c0e6533d9bc 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -10,12 +10,11 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* By default disabled */ 16/* By default disabled */
18int mce_p5_enable; 17int mce_p5_enabled __read_mostly;
19 18
20/* Machine check handler for Pentium class Intel CPUs: */ 19/* Machine check handler for Pentium class Intel CPUs: */
21static void pentium_machine_check(struct pt_regs *regs, long error_code) 20static void pentium_machine_check(struct pt_regs *regs, long error_code)
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
43{ 42{
44 u32 l, h; 43 u32 l, h;
45 44
46 /* Check for MCE support: */ 45 /* Default P5 to off as its often misconnected: */
47 if (!cpu_has(c, X86_FEATURE_MCE)) 46 if (!mce_p5_enabled)
48 return; 47 return;
49 48
50#ifdef CONFIG_X86_OLD_MCE 49 /* Check for MCE support: */
51 /* Default P5 to off as its often misconnected: */ 50 if (!cpu_has(c, X86_FEATURE_MCE))
52 if (mce_disabled != -1)
53 return; 51 return;
54#endif
55 52
56 machine_check_vector = pentium_machine_check; 53 machine_check_vector = pentium_machine_check;
57 /* Make sure the vector pointer is visible before we enable MCEs: */ 54 /* Make sure the vector pointer is visible before we enable MCEs: */
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 43c24e667457..01e4f8178183 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -10,10 +10,9 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* Machine Check Handler For PII/PIII */ 16/* Machine Check Handler For PII/PIII */
18static void intel_machine_check(struct pt_regs *regs, long error_code) 17static void intel_machine_check(struct pt_regs *regs, long error_code)
19{ 18{
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 7b1ae2e20ba5..bff8dd191dd5 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -13,13 +13,23 @@
13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. 13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14 * Inspired by Ross Biro's and Al Borchers' counter code. 14 * Inspired by Ross Biro's and Al Borchers' counter code.
15 */ 15 */
16#include <linux/interrupt.h>
16#include <linux/notifier.h> 17#include <linux/notifier.h>
17#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/kernel.h>
18#include <linux/percpu.h> 20#include <linux/percpu.h>
19#include <linux/sysdev.h> 21#include <linux/sysdev.h>
22#include <linux/types.h>
23#include <linux/init.h>
24#include <linux/smp.h>
20#include <linux/cpu.h> 25#include <linux/cpu.h>
21 26
22#include <asm/therm_throt.h> 27#include <asm/processor.h>
28#include <asm/system.h>
29#include <asm/apic.h>
30#include <asm/idle.h>
31#include <asm/mce.h>
32#include <asm/msr.h>
23 33
24/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
25#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
@@ -27,7 +37,7 @@
27static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
28static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
29 39
30atomic_t therm_throt_en = ATOMIC_INIT(0); 40static atomic_t therm_throt_en = ATOMIC_INIT(0);
31 41
32#ifdef CONFIG_SYSFS 42#ifdef CONFIG_SYSFS
33#define define_therm_throt_sysdev_one_ro(_name) \ 43#define define_therm_throt_sysdev_one_ro(_name) \
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
82 * 1 : Event should be logged further, and a message has been 92 * 1 : Event should be logged further, and a message has been
83 * printed to the syslog. 93 * printed to the syslog.
84 */ 94 */
85int therm_throt_process(int curr) 95static int therm_throt_process(int curr)
86{ 96{
87 unsigned int cpu = smp_processor_id(); 97 unsigned int cpu = smp_processor_id();
88 __u64 tmp_jiffs = get_jiffies_64(); 98 __u64 tmp_jiffs = get_jiffies_64();
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
186 196
187 return 0; 197 return 0;
188} 198}
189
190device_initcall(thermal_throttle_init_device); 199device_initcall(thermal_throttle_init_device);
200
191#endif /* CONFIG_SYSFS */ 201#endif /* CONFIG_SYSFS */
202
203/* Thermal transition interrupt handler */
204static void intel_thermal_interrupt(void)
205{
206 __u64 msr_val;
207
208 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
209 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
210 mce_log_therm_throt_event(msr_val);
211}
212
213static void unexpected_thermal_interrupt(void)
214{
215 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
216 smp_processor_id());
217 add_taint(TAINT_MACHINE_CHECK);
218}
219
220static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
221
222asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
223{
224 exit_idle();
225 irq_enter();
226 inc_irq_stat(irq_thermal_count);
227 smp_thermal_vector();
228 irq_exit();
229 /* Ack only at the end to avoid potential reentry */
230 ack_APIC_irq();
231}
232
233void intel_init_thermal(struct cpuinfo_x86 *c)
234{
235 unsigned int cpu = smp_processor_id();
236 int tm2 = 0;
237 u32 l, h;
238
239 /* Thermal monitoring depends on ACPI and clock modulation*/
240 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
241 return;
242
243 /*
244 * First check if its enabled already, in which case there might
245 * be some SMM goo which handles it, so we can't even put a handler
246 * since it might be delivered via SMI already:
247 */
248 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
249 h = apic_read(APIC_LVTTHMR);
250 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
251 printk(KERN_DEBUG
252 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
253 return;
254 }
255
256 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
257 tm2 = 1;
258
259 /* Check whether a vector already exists */
260 if (h & APIC_VECTOR_MASK) {
261 printk(KERN_DEBUG
262 "CPU%d: Thermal LVT vector (%#x) already installed\n",
263 cpu, (h & APIC_VECTOR_MASK));
264 return;
265 }
266
267 /* We'll mask the thermal vector in the lapic till we're ready: */
268 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
269 apic_write(APIC_LVTTHMR, h);
270
271 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
272 wrmsr(MSR_IA32_THERM_INTERRUPT,
273 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
274
275 smp_thermal_vector = intel_thermal_interrupt;
276
277 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
278 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
279
280 /* Unmask the thermal vector: */
281 l = apic_read(APIC_LVTTHMR);
282 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
283
284 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
285 cpu, tm2 ? "TM2" : "TM1");
286
287 /* enable thermal throttle processing */
288 atomic_set(&therm_throt_en, 1);
289}
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 81b02487090b..54060f565974 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -9,10 +9,9 @@
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/mce.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13 14
14#include "mce.h"
15
16/* Machine check handler for WinChip C6: */ 15/* Machine check handler for WinChip C6: */
17static void winchip_machine_check(struct pt_regs *regs, long error_code) 16static void winchip_machine_check(struct pt_regs *regs, long error_code)
18{ 17{
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 275bc142cd5d..d4cf4ce19aac 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
19#include <linux/kdebug.h> 19#include <linux/kdebug.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/highmem.h>
22 23
23#include <asm/apic.h> 24#include <asm/apic.h>
24#include <asm/stacktrace.h> 25#include <asm/stacktrace.h>
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event)
389 return event & CORE_EVNTSEL_MASK; 390 return event & CORE_EVNTSEL_MASK;
390} 391}
391 392
392static const u64 amd_0f_hw_cache_event_ids 393static const u64 amd_hw_cache_event_ids
393 [PERF_COUNT_HW_CACHE_MAX] 394 [PERF_COUNT_HW_CACHE_MAX]
394 [PERF_COUNT_HW_CACHE_OP_MAX] 395 [PERF_COUNT_HW_CACHE_OP_MAX]
395 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 396 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
396{ 397{
397 [ C(L1D) ] = { 398 [ C(L1D) ] = {
398 [ C(OP_READ) ] = { 399 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0, 400 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
400 [ C(RESULT_MISS) ] = 0, 401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
401 }, 402 },
402 [ C(OP_WRITE) ] = { 403 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0, 404 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
404 [ C(RESULT_MISS) ] = 0, 405 [ C(RESULT_MISS) ] = 0,
405 }, 406 },
406 [ C(OP_PREFETCH) ] = { 407 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0, 408 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
408 [ C(RESULT_MISS) ] = 0, 409 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
409 }, 410 },
410 }, 411 },
411 [ C(L1I ) ] = { 412 [ C(L1I ) ] = {
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids
418 [ C(RESULT_MISS) ] = -1, 419 [ C(RESULT_MISS) ] = -1,
419 }, 420 },
420 [ C(OP_PREFETCH) ] = { 421 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0, 422 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
422 [ C(RESULT_MISS) ] = 0, 423 [ C(RESULT_MISS) ] = 0,
423 }, 424 },
424 }, 425 },
425 [ C(LL ) ] = { 426 [ C(LL ) ] = {
426 [ C(OP_READ) ] = { 427 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0, 428 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
428 [ C(RESULT_MISS) ] = 0, 429 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
429 }, 430 },
430 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = 0, 432 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
432 [ C(RESULT_MISS) ] = 0, 433 [ C(RESULT_MISS) ] = 0,
433 }, 434 },
434 [ C(OP_PREFETCH) ] = { 435 [ C(OP_PREFETCH) ] = {
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids
438 }, 439 },
439 [ C(DTLB) ] = { 440 [ C(DTLB) ] = {
440 [ C(OP_READ) ] = { 441 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0, 442 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
442 [ C(RESULT_MISS) ] = 0, 443 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
443 }, 444 },
444 [ C(OP_WRITE) ] = { 445 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = 0, 446 [ C(RESULT_ACCESS) ] = 0,
@@ -911,6 +912,8 @@ x86_perf_counter_set_period(struct perf_counter *counter,
911 err = checking_wrmsrl(hwc->counter_base + idx, 912 err = checking_wrmsrl(hwc->counter_base + idx,
912 (u64)(-left) & x86_pmu.counter_mask); 913 (u64)(-left) & x86_pmu.counter_mask);
913 914
915 perf_counter_update_userpage(counter);
916
914 return ret; 917 return ret;
915} 918}
916 919
@@ -968,13 +971,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
968 if (!x86_pmu.num_counters_fixed) 971 if (!x86_pmu.num_counters_fixed)
969 return -1; 972 return -1;
970 973
971 /*
972 * Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
973 */
974 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
975 boot_cpu_data.x86_model == 28)
976 return -1;
977
978 event = hwc->config & ARCH_PERFMON_EVENT_MASK; 974 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
979 975
980 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 976 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
@@ -1040,6 +1036,8 @@ try_generic:
1040 x86_perf_counter_set_period(counter, hwc, idx); 1036 x86_perf_counter_set_period(counter, hwc, idx);
1041 x86_pmu.enable(hwc, idx); 1037 x86_pmu.enable(hwc, idx);
1042 1038
1039 perf_counter_update_userpage(counter);
1040
1043 return 0; 1041 return 0;
1044} 1042}
1045 1043
@@ -1132,6 +1130,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
1132 x86_perf_counter_update(counter, hwc, idx); 1130 x86_perf_counter_update(counter, hwc, idx);
1133 cpuc->counters[idx] = NULL; 1131 cpuc->counters[idx] = NULL;
1134 clear_bit(idx, cpuc->used_mask); 1132 clear_bit(idx, cpuc->used_mask);
1133
1134 perf_counter_update_userpage(counter);
1135} 1135}
1136 1136
1137/* 1137/*
@@ -1223,6 +1223,8 @@ again:
1223 if (!intel_pmu_save_and_restart(counter)) 1223 if (!intel_pmu_save_and_restart(counter))
1224 continue; 1224 continue;
1225 1225
1226 data.period = counter->hw.last_period;
1227
1226 if (perf_counter_overflow(counter, 1, &data)) 1228 if (perf_counter_overflow(counter, 1, &data))
1227 intel_pmu_disable_counter(&counter->hw, bit); 1229 intel_pmu_disable_counter(&counter->hw, bit);
1228 } 1230 }
@@ -1425,8 +1427,6 @@ static int intel_pmu_init(void)
1425 */ 1427 */
1426 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 1428 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1427 1429
1428 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1429
1430 /* 1430 /*
1431 * Install the hw-cache-events table: 1431 * Install the hw-cache-events table:
1432 */ 1432 */
@@ -1459,18 +1459,16 @@ static int intel_pmu_init(void)
1459 1459
1460static int amd_pmu_init(void) 1460static int amd_pmu_init(void)
1461{ 1461{
1462 /* Performance-monitoring supported from K7 and later: */
1463 if (boot_cpu_data.x86 < 6)
1464 return -ENODEV;
1465
1462 x86_pmu = amd_pmu; 1466 x86_pmu = amd_pmu;
1463 1467
1464 switch (boot_cpu_data.x86) { 1468 /* Events are common for all AMDs */
1465 case 0x0f: 1469 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1466 case 0x10: 1470 sizeof(hw_cache_event_ids));
1467 case 0x11:
1468 memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
1469 sizeof(hw_cache_event_ids));
1470 1471
1471 pr_cont("AMD Family 0f/10/11 events, ");
1472 break;
1473 }
1474 return 0; 1472 return 0;
1475} 1473}
1476 1474
@@ -1498,21 +1496,22 @@ void __init init_hw_perf_counters(void)
1498 pr_cont("%s PMU driver.\n", x86_pmu.name); 1496 pr_cont("%s PMU driver.\n", x86_pmu.name);
1499 1497
1500 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1498 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1501 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1502 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", 1499 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1503 x86_pmu.num_counters, X86_PMC_MAX_GENERIC); 1500 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1501 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1504 } 1502 }
1505 perf_counter_mask = (1 << x86_pmu.num_counters) - 1; 1503 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1506 perf_max_counters = x86_pmu.num_counters; 1504 perf_max_counters = x86_pmu.num_counters;
1507 1505
1508 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1506 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1509 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1510 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", 1507 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1511 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); 1508 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1509 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1512 } 1510 }
1513 1511
1514 perf_counter_mask |= 1512 perf_counter_mask |=
1515 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1513 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1514 x86_pmu.intel_ctrl = perf_counter_mask;
1516 1515
1517 perf_counters_lapic_init(); 1516 perf_counters_lapic_init();
1518 register_die_notifier(&perf_counter_nmi_notifier); 1517 register_die_notifier(&perf_counter_nmi_notifier);
@@ -1554,9 +1553,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1554 */ 1553 */
1555 1554
1556static inline 1555static inline
1557void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) 1556void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1558{ 1557{
1559 if (entry->nr < MAX_STACK_DEPTH) 1558 if (entry->nr < PERF_MAX_STACK_DEPTH)
1560 entry->ip[entry->nr++] = ip; 1559 entry->ip[entry->nr++] = ip;
1561} 1560}
1562 1561
@@ -1577,8 +1576,8 @@ static void backtrace_warning(void *data, char *msg)
1577 1576
1578static int backtrace_stack(void *data, char *name) 1577static int backtrace_stack(void *data, char *name)
1579{ 1578{
1580 /* Don't bother with IRQ stacks for now */ 1579 /* Process all stacks: */
1581 return -1; 1580 return 0;
1582} 1581}
1583 1582
1584static void backtrace_address(void *data, unsigned long addr, int reliable) 1583static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1596,47 +1595,59 @@ static const struct stacktrace_ops backtrace_ops = {
1596 .address = backtrace_address, 1595 .address = backtrace_address,
1597}; 1596};
1598 1597
1598#include "../dumpstack.h"
1599
1599static void 1600static void
1600perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1601perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1601{ 1602{
1602 unsigned long bp; 1603 callchain_store(entry, PERF_CONTEXT_KERNEL);
1603 char *stack; 1604 callchain_store(entry, regs->ip);
1604 int nr = entry->nr;
1605 1605
1606 callchain_store(entry, instruction_pointer(regs)); 1606 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1607}
1607 1608
1608 stack = ((char *)regs + sizeof(struct pt_regs)); 1609/*
1609#ifdef CONFIG_FRAME_POINTER 1610 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1610 bp = frame_pointer(regs); 1611 */
1611#else 1612static unsigned long
1612 bp = 0; 1613copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1613#endif 1614{
1615 unsigned long offset, addr = (unsigned long)from;
1616 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1617 unsigned long size, len = 0;
1618 struct page *page;
1619 void *map;
1620 int ret;
1614 1621
1615 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); 1622 do {
1623 ret = __get_user_pages_fast(addr, 1, 0, &page);
1624 if (!ret)
1625 break;
1616 1626
1617 entry->kernel = entry->nr - nr; 1627 offset = addr & (PAGE_SIZE - 1);
1618} 1628 size = min(PAGE_SIZE - offset, n - len);
1619 1629
1630 map = kmap_atomic(page, type);
1631 memcpy(to, map+offset, size);
1632 kunmap_atomic(map, type);
1633 put_page(page);
1620 1634
1621struct stack_frame { 1635 len += size;
1622 const void __user *next_fp; 1636 to += size;
1623 unsigned long return_address; 1637 addr += size;
1624}; 1638
1639 } while (len < n);
1640
1641 return len;
1642}
1625 1643
1626static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1644static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1627{ 1645{
1628 int ret; 1646 unsigned long bytes;
1629
1630 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1631 return 0;
1632 1647
1633 ret = 1; 1648 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1634 pagefault_disable();
1635 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1636 ret = 0;
1637 pagefault_enable();
1638 1649
1639 return ret; 1650 return bytes == sizeof(*frame);
1640} 1651}
1641 1652
1642static void 1653static void
@@ -1644,28 +1655,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1644{ 1655{
1645 struct stack_frame frame; 1656 struct stack_frame frame;
1646 const void __user *fp; 1657 const void __user *fp;
1647 int nr = entry->nr;
1648 1658
1649 regs = (struct pt_regs *)current->thread.sp0 - 1; 1659 if (!user_mode(regs))
1650 fp = (void __user *)regs->bp; 1660 regs = task_pt_regs(current);
1661
1662 fp = (void __user *)regs->bp;
1651 1663
1664 callchain_store(entry, PERF_CONTEXT_USER);
1652 callchain_store(entry, regs->ip); 1665 callchain_store(entry, regs->ip);
1653 1666
1654 while (entry->nr < MAX_STACK_DEPTH) { 1667 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1655 frame.next_fp = NULL; 1668 frame.next_frame = NULL;
1656 frame.return_address = 0; 1669 frame.return_address = 0;
1657 1670
1658 if (!copy_stack_frame(fp, &frame)) 1671 if (!copy_stack_frame(fp, &frame))
1659 break; 1672 break;
1660 1673
1661 if ((unsigned long)fp < user_stack_pointer(regs)) 1674 if ((unsigned long)fp < regs->sp)
1662 break; 1675 break;
1663 1676
1664 callchain_store(entry, frame.return_address); 1677 callchain_store(entry, frame.return_address);
1665 fp = frame.next_fp; 1678 fp = frame.next_frame;
1666 } 1679 }
1667
1668 entry->user = entry->nr - nr;
1669} 1680}
1670 1681
1671static void 1682static void
@@ -1701,9 +1712,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1701 entry = &__get_cpu_var(irq_entry); 1712 entry = &__get_cpu_var(irq_entry);
1702 1713
1703 entry->nr = 0; 1714 entry->nr = 0;
1704 entry->hv = 0;
1705 entry->kernel = 0;
1706 entry->user = 0;
1707 1715
1708 perf_do_callchain(regs, entry); 1716 perf_do_callchain(regs, entry);
1709 1717
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d6f5b9fbde32..5c481f6205bf 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void)
716 wd_ops = &k7_wd_ops; 716 wd_ops = &k7_wd_ops;
717 break; 717 break;
718 case X86_VENDOR_INTEL: 718 case X86_VENDOR_INTEL:
719 /* 719 /* Work around where perfctr1 doesn't have a working enable
720 * Work around Core Duo (Yonah) errata AE49 where perfctr1 720 * bit as described in the following errata:
721 * doesn't have a working enable bit. 721 * AE49 Core Duo and Intel Core Solo 65 nm
722 * AN49 Intel Pentium Dual-Core
723 * AF49 Dual-Core Intel Xeon Processor LV
722 */ 724 */
723 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 725 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
726 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
727 boot_cpu_data.x86_mask == 4))) {
724 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 728 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
725 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 729 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
726 } 730 }
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index ff958248e61d..5e409dc298a4 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -27,6 +27,7 @@
27#include <asm/cpu.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30#include <asm/iommu.h>
30 31
31 32
32#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
103#ifdef CONFIG_HPET_TIMER 104#ifdef CONFIG_HPET_TIMER
104 hpet_disable(); 105 hpet_disable();
105#endif 106#endif
107
108#ifdef CONFIG_X86_64
109 pci_iommu_shutdown();
110#endif
111
106 crash_save_cpu(regs, safe_smp_processor_id()); 112 crash_save_cpu(regs, safe_smp_processor_id());
107} 113}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 95ea5fa7d444..c8405718a4c3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,6 +22,7 @@
22#include "dumpstack.h" 22#include "dumpstack.h"
23 23
24int panic_on_unrecovered_nmi; 24int panic_on_unrecovered_nmi;
25int panic_on_io_nmi;
25unsigned int code_bytes = 64; 26unsigned int code_bytes = 64;
26int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; 27int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
27static int die_counter; 28static int die_counter;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7271fa33d791..c4ca89d9aaf4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1383,6 +1383,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1383 return 32*1024*1024; 1383 return 32*1024*1024;
1384} 1384}
1385 1385
1386#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
1387
1386void __init e820_reserve_resources_late(void) 1388void __init e820_reserve_resources_late(void)
1387{ 1389{
1388 int i; 1390 int i;
@@ -1400,17 +1402,19 @@ void __init e820_reserve_resources_late(void)
1400 * avoid stolen RAM: 1402 * avoid stolen RAM:
1401 */ 1403 */
1402 for (i = 0; i < e820.nr_map; i++) { 1404 for (i = 0; i < e820.nr_map; i++) {
1403 struct e820entry *entry = &e820_saved.map[i]; 1405 struct e820entry *entry = &e820.map[i];
1404 resource_size_t start, end; 1406 u64 start, end;
1405 1407
1406 if (entry->type != E820_RAM) 1408 if (entry->type != E820_RAM)
1407 continue; 1409 continue;
1408 start = entry->addr + entry->size; 1410 start = entry->addr + entry->size;
1409 end = round_up(start, ram_alignment(start)); 1411 end = round_up(start, ram_alignment(start)) - 1;
1410 if (start == end) 1412 if (end > MAX_RESOURCE_SIZE)
1413 end = MAX_RESOURCE_SIZE;
1414 if (start >= end)
1411 continue; 1415 continue;
1412 reserve_region_with_split(&iomem_resource, start, 1416 reserve_region_with_split(&iomem_resource, start, end,
1413 end - 1, "RAM buffer"); 1417 "RAM buffer");
1414 } 1418 }
1415} 1419}
1416 1420
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1736acc4d7aa..96f7ac0bbf01 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void)
240 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; 240 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
241 int e820_type; 241 int e820_type;
242 242
243 if (md->attribute & EFI_MEMORY_WB) 243 switch (md->type) {
244 e820_type = E820_RAM; 244 case EFI_LOADER_CODE:
245 else 245 case EFI_LOADER_DATA:
246 case EFI_BOOT_SERVICES_CODE:
247 case EFI_BOOT_SERVICES_DATA:
248 case EFI_CONVENTIONAL_MEMORY:
249 if (md->attribute & EFI_MEMORY_WB)
250 e820_type = E820_RAM;
251 else
252 e820_type = E820_RESERVED;
253 break;
254 case EFI_ACPI_RECLAIM_MEMORY:
255 e820_type = E820_ACPI;
256 break;
257 case EFI_ACPI_MEMORY_NVS:
258 e820_type = E820_NVS;
259 break;
260 case EFI_UNUSABLE_MEMORY:
261 e820_type = E820_UNUSABLE;
262 break;
263 default:
264 /*
265 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
266 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
267 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
268 */
246 e820_type = E820_RESERVED; 269 e820_type = E820_RESERVED;
270 break;
271 }
247 e820_add_region(start, size, e820_type); 272 e820_add_region(start, size, e820_type);
248 } 273 }
249 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 274 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c929add475c9..c097e7d607c6 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -48,7 +48,6 @@
48#include <asm/segment.h> 48#include <asm/segment.h>
49#include <asm/smp.h> 49#include <asm/smp.h>
50#include <asm/page_types.h> 50#include <asm/page_types.h>
51#include <asm/desc.h>
52#include <asm/percpu.h> 51#include <asm/percpu.h>
53#include <asm/dwarf2.h> 52#include <asm/dwarf2.h>
54#include <asm/processor-flags.h> 53#include <asm/processor-flags.h>
@@ -84,7 +83,7 @@
84#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 83#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
85#else 84#else
86#define preempt_stop(clobbers) 85#define preempt_stop(clobbers)
87#define resume_kernel restore_nocheck 86#define resume_kernel restore_all
88#endif 87#endif
89 88
90.macro TRACE_IRQS_IRET 89.macro TRACE_IRQS_IRET
@@ -372,7 +371,7 @@ END(ret_from_exception)
372ENTRY(resume_kernel) 371ENTRY(resume_kernel)
373 DISABLE_INTERRUPTS(CLBR_ANY) 372 DISABLE_INTERRUPTS(CLBR_ANY)
374 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? 373 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
375 jnz restore_nocheck 374 jnz restore_all
376need_resched: 375need_resched:
377 movl TI_flags(%ebp), %ecx # need_resched set ? 376 movl TI_flags(%ebp), %ecx # need_resched set ?
378 testb $_TIF_NEED_RESCHED, %cl 377 testb $_TIF_NEED_RESCHED, %cl
@@ -540,6 +539,8 @@ syscall_exit:
540 jne syscall_exit_work 539 jne syscall_exit_work
541 540
542restore_all: 541restore_all:
542 TRACE_IRQS_IRET
543restore_all_notrace:
543 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 544 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
544 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we 545 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
545 # are returning to the kernel. 546 # are returning to the kernel.
@@ -551,8 +552,6 @@ restore_all:
551 CFI_REMEMBER_STATE 552 CFI_REMEMBER_STATE
552 je ldt_ss # returning to user-space with LDT SS 553 je ldt_ss # returning to user-space with LDT SS
553restore_nocheck: 554restore_nocheck:
554 TRACE_IRQS_IRET
555restore_nocheck_notrace:
556 RESTORE_REGS 4 # skip orig_eax/error_code 555 RESTORE_REGS 4 # skip orig_eax/error_code
557 CFI_ADJUST_CFA_OFFSET -4 556 CFI_ADJUST_CFA_OFFSET -4
558irq_return: 557irq_return:
@@ -588,22 +587,34 @@ ldt_ss:
588 jne restore_nocheck 587 jne restore_nocheck
589#endif 588#endif
590 589
591 /* If returning to userspace with 16bit stack, 590/*
592 * try to fix the higher word of ESP, as the CPU 591 * Setup and switch to ESPFIX stack
593 * won't restore it. 592 *
594 * This is an "official" bug of all the x86-compatible 593 * We're returning to userspace with a 16 bit stack. The CPU will not
595 * CPUs, which we can try to work around to make 594 * restore the high word of ESP for us on executing iret... This is an
596 * dosemu and wine happy. */ 595 * "official" bug of all the x86-compatible CPUs, which we can work
597 movl PT_OLDESP(%esp), %eax 596 * around to make dosemu and wine happy. We do this by preloading the
598 movl %esp, %edx 597 * high word of ESP with the high word of the userspace ESP while
599 call patch_espfix_desc 598 * compensating for the offset by changing to the ESPFIX segment with
599 * a base address that matches for the difference.
600 */
601 mov %esp, %edx /* load kernel esp */
602 mov PT_OLDESP(%esp), %eax /* load userspace esp */
603 mov %dx, %ax /* eax: new kernel esp */
604 sub %eax, %edx /* offset (low word is 0) */
605 PER_CPU(gdt_page, %ebx)
606 shr $16, %edx
607 mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
608 mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
600 pushl $__ESPFIX_SS 609 pushl $__ESPFIX_SS
601 CFI_ADJUST_CFA_OFFSET 4 610 CFI_ADJUST_CFA_OFFSET 4
602 pushl %eax 611 push %eax /* new kernel esp */
603 CFI_ADJUST_CFA_OFFSET 4 612 CFI_ADJUST_CFA_OFFSET 4
613 /* Disable interrupts, but do not irqtrace this section: we
614 * will soon execute iret and the tracer was already set to
615 * the irqstate after the iret */
604 DISABLE_INTERRUPTS(CLBR_EAX) 616 DISABLE_INTERRUPTS(CLBR_EAX)
605 TRACE_IRQS_OFF 617 lss (%esp), %esp /* switch to espfix segment */
606 lss (%esp), %esp
607 CFI_ADJUST_CFA_OFFSET -8 618 CFI_ADJUST_CFA_OFFSET -8
608 jmp restore_nocheck 619 jmp restore_nocheck
609 CFI_ENDPROC 620 CFI_ENDPROC
@@ -716,15 +727,24 @@ PTREGSCALL(vm86)
716PTREGSCALL(vm86old) 727PTREGSCALL(vm86old)
717 728
718.macro FIXUP_ESPFIX_STACK 729.macro FIXUP_ESPFIX_STACK
719 /* since we are on a wrong stack, we cant make it a C code :( */ 730/*
731 * Switch back for ESPFIX stack to the normal zerobased stack
732 *
733 * We can't call C functions using the ESPFIX stack. This code reads
734 * the high word of the segment base from the GDT and swiches to the
735 * normal stack and adjusts ESP with the matching offset.
736 */
737 /* fixup the stack */
720 PER_CPU(gdt_page, %ebx) 738 PER_CPU(gdt_page, %ebx)
721 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) 739 mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
722 addl %esp, %eax 740 mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
741 shl $16, %eax
742 addl %esp, %eax /* the adjusted stack pointer */
723 pushl $__KERNEL_DS 743 pushl $__KERNEL_DS
724 CFI_ADJUST_CFA_OFFSET 4 744 CFI_ADJUST_CFA_OFFSET 4
725 pushl %eax 745 pushl %eax
726 CFI_ADJUST_CFA_OFFSET 4 746 CFI_ADJUST_CFA_OFFSET 4
727 lss (%esp), %esp 747 lss (%esp), %esp /* switch to the normal stack segment */
728 CFI_ADJUST_CFA_OFFSET -8 748 CFI_ADJUST_CFA_OFFSET -8
729.endm 749.endm
730.macro UNWIND_ESPFIX_STACK 750.macro UNWIND_ESPFIX_STACK
@@ -1154,6 +1174,7 @@ ENTRY(ftrace_graph_caller)
1154 pushl %edx 1174 pushl %edx
1155 movl 0xc(%esp), %edx 1175 movl 0xc(%esp), %edx
1156 lea 0x4(%ebp), %eax 1176 lea 0x4(%ebp), %eax
1177 movl (%ebp), %ecx
1157 subl $MCOUNT_INSN_SIZE, %edx 1178 subl $MCOUNT_INSN_SIZE, %edx
1158 call prepare_ftrace_return 1179 call prepare_ftrace_return
1159 popl %edx 1180 popl %edx
@@ -1168,6 +1189,7 @@ return_to_handler:
1168 pushl %eax 1189 pushl %eax
1169 pushl %ecx 1190 pushl %ecx
1170 pushl %edx 1191 pushl %edx
1192 movl %ebp, %eax
1171 call ftrace_return_to_handler 1193 call ftrace_return_to_handler
1172 movl %eax, 0xc(%esp) 1194 movl %eax, 0xc(%esp)
1173 popl %edx 1195 popl %edx
@@ -1329,7 +1351,7 @@ nmi_stack_correct:
1329 xorl %edx,%edx # zero error code 1351 xorl %edx,%edx # zero error code
1330 movl %esp,%eax # pt_regs pointer 1352 movl %esp,%eax # pt_regs pointer
1331 call do_nmi 1353 call do_nmi
1332 jmp restore_nocheck_notrace 1354 jmp restore_all_notrace
1333 CFI_ENDPROC 1355 CFI_ENDPROC
1334 1356
1335nmi_stack_fixup: 1357nmi_stack_fixup:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index de74f0a3e0ed..c251be745107 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller)
135 135
136 leaq 8(%rbp), %rdi 136 leaq 8(%rbp), %rdi
137 movq 0x38(%rsp), %rsi 137 movq 0x38(%rsp), %rsi
138 movq (%rbp), %rdx
138 subq $MCOUNT_INSN_SIZE, %rsi 139 subq $MCOUNT_INSN_SIZE, %rsi
139 140
140 call prepare_ftrace_return 141 call prepare_ftrace_return
@@ -150,6 +151,7 @@ GLOBAL(return_to_handler)
150 /* Save the return values */ 151 /* Save the return values */
151 movq %rax, (%rsp) 152 movq %rax, (%rsp)
152 movq %rdx, 8(%rsp) 153 movq %rdx, 8(%rsp)
154 movq %rbp, %rdi
153 155
154 call ftrace_return_to_handler 156 call ftrace_return_to_handler
155 157
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b79c5533c421..d94e1ea3b9fe 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void)
408 * Hook the return address and push it in the stack of return addrs 408 * Hook the return address and push it in the stack of return addrs
409 * in current thread info. 409 * in current thread info.
410 */ 410 */
411void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) 411void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
412 unsigned long frame_pointer)
412{ 413{
413 unsigned long old; 414 unsigned long old;
414 int faulted; 415 int faulted;
@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
453 return; 454 return;
454 } 455 }
455 456
456 if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { 457 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
458 frame_pointer) == -EBUSY) {
457 *parent = old; 459 *parent = old;
458 return; 460 return;
459 } 461 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index dc5ed4bdd88d..8663afb56535 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -13,7 +13,6 @@
13#include <asm/segment.h> 13#include <asm/segment.h>
14#include <asm/page_types.h> 14#include <asm/page_types.h>
15#include <asm/pgtable_types.h> 15#include <asm/pgtable_types.h>
16#include <asm/desc.h>
17#include <asm/cache.h> 16#include <asm/cache.h>
18#include <asm/thread_info.h> 17#include <asm/thread_info.h>
19#include <asm/asm-offsets.h> 18#include <asm/asm-offsets.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 54b29bb24e71..fa54f78e2a05 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -12,7 +12,6 @@
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <asm/desc.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/pgtable.h> 16#include <asm/pgtable.h>
18#include <asm/page.h> 17#include <asm/page.h>
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 81408b93f887..dedc2bddf7a5 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev)
510{ 510{
511 511
512 if (request_irq(dev->irq, hpet_interrupt_handler, 512 if (request_irq(dev->irq, hpet_interrupt_handler,
513 IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) 513 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
514 dev->name, dev))
514 return -1; 515 return -1;
515 516
516 disable_irq(dev->irq); 517 disable_irq(dev->irq);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 745579bc8256..1a041bcf506b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -32,6 +32,8 @@ int no_iommu __read_mostly;
32/* Set this to 1 if there is a HW IOMMU in the system */ 32/* Set this to 1 if there is a HW IOMMU in the system */
33int iommu_detected __read_mostly = 0; 33int iommu_detected __read_mostly = 0;
34 34
35int iommu_pass_through;
36
35dma_addr_t bad_dma_address __read_mostly = 0; 37dma_addr_t bad_dma_address __read_mostly = 0;
36EXPORT_SYMBOL(bad_dma_address); 38EXPORT_SYMBOL(bad_dma_address);
37 39
@@ -210,6 +212,10 @@ static __init int iommu_setup(char *p)
210 if (!strncmp(p, "soft", 4)) 212 if (!strncmp(p, "soft", 4))
211 swiotlb = 1; 213 swiotlb = 1;
212#endif 214#endif
215 if (!strncmp(p, "pt", 2)) {
216 iommu_pass_through = 1;
217 return 1;
218 }
213 219
214 gart_parse_options(p); 220 gart_parse_options(p);
215 221
@@ -290,6 +296,8 @@ static int __init pci_iommu_init(void)
290void pci_iommu_shutdown(void) 296void pci_iommu_shutdown(void)
291{ 297{
292 gart_iommu_shutdown(); 298 gart_iommu_shutdown();
299
300 amd_iommu_shutdown();
293} 301}
294/* Must execute after PCI subsystem */ 302/* Must execute after PCI subsystem */
295fs_initcall(pci_iommu_init); 303fs_initcall(pci_iommu_init);
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index a1712f2b50f1..6af96ee44200 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void)
71{ 71{
72 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 72 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) 74 if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) ||
75 iommu_pass_through)
75 swiotlb = 1; 76 swiotlb = 1;
76#endif 77#endif
77 if (swiotlb_force) 78 if (swiotlb_force)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index be5ae80f897f..de2cab132844 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align)
289 return ret; 289 return ret;
290} 290}
291 291
292#ifdef CONFIG_X86_64
293static void __init init_gbpages(void)
294{
295 if (direct_gbpages && cpu_has_gbpages)
296 printk(KERN_INFO "Using GB pages for direct mapping\n");
297 else
298 direct_gbpages = 0;
299}
300#else
301static inline void init_gbpages(void)
302{
303}
304#endif
305
292static void __init reserve_brk(void) 306static void __init reserve_brk(void)
293{ 307{
294 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
@@ -871,6 +885,8 @@ void __init setup_arch(char **cmdline_p)
871 885
872 reserve_brk(); 886 reserve_brk();
873 887
888 init_gbpages();
889
874 /* max_pfn_mapped is updated here */ 890 /* max_pfn_mapped is updated here */
875 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 891 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
876 max_pfn_mapped = max_low_pfn_mapped; 892 max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9c3f0823e6aa..29a3eef7cf4a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
124} 124}
125 125
126/* 126/*
127 * Remap allocator 127 * Large page remap allocator
128 * 128 *
129 * This allocator uses PMD page as unit. A PMD page is allocated for 129 * This allocator uses PMD page as unit. A PMD page is allocated for
130 * each cpu and each is remapped into vmalloc area using PMD mapping. 130 * each cpu and each is remapped into vmalloc area using PMD mapping.
@@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
137 * better than only using 4k mappings while still being NUMA friendly. 137 * better than only using 4k mappings while still being NUMA friendly.
138 */ 138 */
139#ifdef CONFIG_NEED_MULTIPLE_NODES 139#ifdef CONFIG_NEED_MULTIPLE_NODES
140static size_t pcpur_size __initdata; 140struct pcpul_ent {
141static void **pcpur_ptrs __initdata; 141 unsigned int cpu;
142 void *ptr;
143};
144
145static size_t pcpul_size;
146static struct pcpul_ent *pcpul_map;
147static struct vm_struct pcpul_vm;
142 148
143static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) 149static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
144{ 150{
145 size_t off = (size_t)pageno << PAGE_SHIFT; 151 size_t off = (size_t)pageno << PAGE_SHIFT;
146 152
147 if (off >= pcpur_size) 153 if (off >= pcpul_size)
148 return NULL; 154 return NULL;
149 155
150 return virt_to_page(pcpur_ptrs[cpu] + off); 156 return virt_to_page(pcpul_map[cpu].ptr + off);
151} 157}
152 158
153static ssize_t __init setup_pcpu_remap(size_t static_size) 159static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
154{ 160{
155 static struct vm_struct vm; 161 size_t map_size, dyn_size;
156 size_t ptrs_size, dyn_size;
157 unsigned int cpu; 162 unsigned int cpu;
163 int i, j;
158 ssize_t ret; 164 ssize_t ret;
159 165
160 /* 166 if (!chosen) {
161 * If large page isn't supported, there's no benefit in doing 167 size_t vm_size = VMALLOC_END - VMALLOC_START;
162 * this. Also, on non-NUMA, embedding is better. 168 size_t tot_size = num_possible_cpus() * PMD_SIZE;
163 * 169
164 * NOTE: disabled for now. 170 /* on non-NUMA, embedding is better */
165 */ 171 if (!pcpu_need_numa())
166 if (true || !cpu_has_pse || !pcpu_need_numa()) 172 return -EINVAL;
173
174 /* don't consume more than 20% of vmalloc area */
175 if (tot_size > vm_size / 5) {
176 pr_info("PERCPU: too large chunk size %zuMB for "
177 "large page remap\n", tot_size >> 20);
178 return -EINVAL;
179 }
180 }
181
182 /* need PSE */
183 if (!cpu_has_pse) {
184 pr_warning("PERCPU: lpage allocator requires PSE\n");
167 return -EINVAL; 185 return -EINVAL;
186 }
168 187
169 /* 188 /*
170 * Currently supports only single page. Supporting multiple 189 * Currently supports only single page. Supporting multiple
171 * pages won't be too difficult if it ever becomes necessary. 190 * pages won't be too difficult if it ever becomes necessary.
172 */ 191 */
173 pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + 192 pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
174 PERCPU_DYNAMIC_RESERVE); 193 PERCPU_DYNAMIC_RESERVE);
175 if (pcpur_size > PMD_SIZE) { 194 if (pcpul_size > PMD_SIZE) {
176 pr_warning("PERCPU: static data is larger than large page, " 195 pr_warning("PERCPU: static data is larger than large page, "
177 "can't use large page\n"); 196 "can't use large page\n");
178 return -EINVAL; 197 return -EINVAL;
179 } 198 }
180 dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
181 200
182 /* allocate pointer array and alloc large pages */ 201 /* allocate pointer array and alloc large pages */
183 ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); 202 map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
184 pcpur_ptrs = alloc_bootmem(ptrs_size); 203 pcpul_map = alloc_bootmem(map_size);
185 204
186 for_each_possible_cpu(cpu) { 205 for_each_possible_cpu(cpu) {
187 pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); 206 pcpul_map[cpu].cpu = cpu;
188 if (!pcpur_ptrs[cpu]) 207 pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
208 PMD_SIZE);
209 if (!pcpul_map[cpu].ptr) {
210 pr_warning("PERCPU: failed to allocate large page "
211 "for cpu%u\n", cpu);
189 goto enomem; 212 goto enomem;
213 }
190 214
191 /* 215 /*
192 * Only use pcpur_size bytes and give back the rest. 216 * Only use pcpul_size bytes and give back the rest.
193 * 217 *
194 * Ingo: The 2MB up-rounding bootmem is needed to make 218 * Ingo: The 2MB up-rounding bootmem is needed to make
195 * sure the partial 2MB page is still fully RAM - it's 219 * sure the partial 2MB page is still fully RAM - it's
196 * not well-specified to have a PAT-incompatible area 220 * not well-specified to have a PAT-incompatible area
197 * (unmapped RAM, device memory, etc.) in that hole. 221 * (unmapped RAM, device memory, etc.) in that hole.
198 */ 222 */
199 free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), 223 free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
200 PMD_SIZE - pcpur_size); 224 PMD_SIZE - pcpul_size);
201 225
202 memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); 226 memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
203 } 227 }
204 228
205 /* allocate address and map */ 229 /* allocate address and map */
206 vm.flags = VM_ALLOC; 230 pcpul_vm.flags = VM_ALLOC;
207 vm.size = num_possible_cpus() * PMD_SIZE; 231 pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
208 vm_area_register_early(&vm, PMD_SIZE); 232 vm_area_register_early(&pcpul_vm, PMD_SIZE);
209 233
210 for_each_possible_cpu(cpu) { 234 for_each_possible_cpu(cpu) {
211 pmd_t *pmd; 235 pmd_t *pmd, pmd_v;
212 236
213 pmd = populate_extra_pmd((unsigned long)vm.addr 237 pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
214 + cpu * PMD_SIZE); 238 cpu * PMD_SIZE);
215 set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), 239 pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
216 PAGE_KERNEL_LARGE)); 240 PAGE_KERNEL_LARGE);
241 set_pmd(pmd, pmd_v);
217 } 242 }
218 243
219 /* we're ready, commit */ 244 /* we're ready, commit */
220 pr_info("PERCPU: Remapped at %p with large pages, static data " 245 pr_info("PERCPU: Remapped at %p with large pages, static data "
221 "%zu bytes\n", vm.addr, static_size); 246 "%zu bytes\n", pcpul_vm.addr, static_size);
222 247
223 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 248 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
224 PERCPU_FIRST_CHUNK_RESERVE, dyn_size, 249 PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
225 PMD_SIZE, vm.addr, NULL); 250 PMD_SIZE, pcpul_vm.addr, NULL);
226 goto out_free_ar; 251
252 /* sort pcpul_map array for pcpu_lpage_remapped() */
253 for (i = 0; i < num_possible_cpus() - 1; i++)
254 for (j = i + 1; j < num_possible_cpus(); j++)
255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
256 struct pcpul_ent tmp = pcpul_map[i];
257 pcpul_map[i] = pcpul_map[j];
258 pcpul_map[j] = tmp;
259 }
260
261 return ret;
227 262
228enomem: 263enomem:
229 for_each_possible_cpu(cpu) 264 for_each_possible_cpu(cpu)
230 if (pcpur_ptrs[cpu]) 265 if (pcpul_map[cpu].ptr)
231 free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); 266 free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
232 ret = -ENOMEM; 267 free_bootmem(__pa(pcpul_map), map_size);
233out_free_ar: 268 return -ENOMEM;
234 free_bootmem(__pa(pcpur_ptrs), ptrs_size); 269}
235 return ret; 270
271/**
272 * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
273 * @kaddr: the kernel address in question
274 *
275 * Determine whether @kaddr falls in the pcpul recycled area. This is
276 * used by pageattr to detect VM aliases and break up the pcpu PMD
277 * mapping such that the same physical page is not mapped under
278 * different attributes.
279 *
280 * The recycled area is always at the tail of a partially used PMD
281 * page.
282 *
283 * RETURNS:
284 * Address of corresponding remapped pcpu address if match is found;
285 * otherwise, NULL.
286 */
287void *pcpu_lpage_remapped(void *kaddr)
288{
289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
291 int left = 0, right = num_possible_cpus() - 1;
292 int pos;
293
294 /* pcpul in use at all? */
295 if (!pcpul_map)
296 return NULL;
297
298 /* okay, perform binary search */
299 while (left <= right) {
300 pos = (left + right) / 2;
301
302 if (pcpul_map[pos].ptr < pmd_addr)
303 left = pos + 1;
304 else if (pcpul_map[pos].ptr > pmd_addr)
305 right = pos - 1;
306 else {
307 /* it shouldn't be in the area for the first chunk */
308 WARN_ON(offset < pcpul_size);
309
310 return pcpul_vm.addr +
311 pcpul_map[pos].cpu * PMD_SIZE + offset;
312 }
313 }
314
315 return NULL;
236} 316}
237#else 317#else
238static ssize_t __init setup_pcpu_remap(size_t static_size) 318static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
239{ 319{
240 return -EINVAL; 320 return -EINVAL;
241} 321}
@@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
249 * mapping so that it can use PMD mapping without additional TLB 329 * mapping so that it can use PMD mapping without additional TLB
250 * pressure. 330 * pressure.
251 */ 331 */
252static ssize_t __init setup_pcpu_embed(size_t static_size) 332static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
253{ 333{
254 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 334 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
255 335
@@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
258 * this. Also, embedding allocation doesn't play well with 338 * this. Also, embedding allocation doesn't play well with
259 * NUMA. 339 * NUMA.
260 */ 340 */
261 if (!cpu_has_pse || pcpu_need_numa()) 341 if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
262 return -EINVAL; 342 return -EINVAL;
263 343
264 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, 344 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
@@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
308 void *ptr; 388 void *ptr;
309 389
310 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); 390 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
311 if (!ptr) 391 if (!ptr) {
392 pr_warning("PERCPU: failed to allocate "
393 "4k page for cpu%u\n", cpu);
312 goto enomem; 394 goto enomem;
395 }
313 396
314 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); 397 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
315 pcpu4k_pages[j++] = virt_to_page(ptr); 398 pcpu4k_pages[j++] = virt_to_page(ptr);
@@ -333,6 +416,16 @@ out_free_ar:
333 return ret; 416 return ret;
334} 417}
335 418
419/* for explicit first chunk allocator selection */
420static char pcpu_chosen_alloc[16] __initdata;
421
422static int __init percpu_alloc_setup(char *str)
423{
424 strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
425 return 0;
426}
427early_param("percpu_alloc", percpu_alloc_setup);
428
336static inline void setup_percpu_segment(int cpu) 429static inline void setup_percpu_segment(int cpu)
337{ 430{
338#ifdef CONFIG_X86_32 431#ifdef CONFIG_X86_32
@@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu)
346#endif 439#endif
347} 440}
348 441
349/*
350 * Great future plan:
351 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
352 * Always point %gs to its beginning
353 */
354void __init setup_per_cpu_areas(void) 442void __init setup_per_cpu_areas(void)
355{ 443{
356 size_t static_size = __per_cpu_end - __per_cpu_start; 444 size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void)
367 * of large page mappings. Please read comments on top of 455 * of large page mappings. Please read comments on top of
368 * each allocator for details. 456 * each allocator for details.
369 */ 457 */
370 ret = setup_pcpu_remap(static_size); 458 ret = -EINVAL;
371 if (ret < 0) 459 if (strlen(pcpu_chosen_alloc)) {
372 ret = setup_pcpu_embed(static_size); 460 if (strcmp(pcpu_chosen_alloc, "4k")) {
461 if (!strcmp(pcpu_chosen_alloc, "lpage"))
462 ret = setup_pcpu_lpage(static_size, true);
463 else if (!strcmp(pcpu_chosen_alloc, "embed"))
464 ret = setup_pcpu_embed(static_size, true);
465 else
466 pr_warning("PERCPU: unknown allocator %s "
467 "specified\n", pcpu_chosen_alloc);
468 if (ret < 0)
469 pr_warning("PERCPU: %s allocator failed (%zd), "
470 "falling back to 4k\n",
471 pcpu_chosen_alloc, ret);
472 }
473 } else {
474 ret = setup_pcpu_lpage(static_size, false);
475 if (ret < 0)
476 ret = setup_pcpu_embed(static_size, false);
477 }
373 if (ret < 0) 478 if (ret < 0)
374 ret = setup_pcpu_4k(static_size); 479 ret = setup_pcpu_4k(static_size);
375 if (ret < 0) 480 if (ret < 0)
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 124d40c575df..8ccabb8a2f6a 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode)
711 unsigned long pa; 711 unsigned long pa;
712 unsigned long m; 712 unsigned long m;
713 unsigned long n; 713 unsigned long n;
714 unsigned long mmr_image;
715 struct bau_desc *adp; 714 struct bau_desc *adp;
716 struct bau_desc *ad2; 715 struct bau_desc *ad2;
717 716
@@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode)
727 n = pa >> uv_nshift; 726 n = pa >> uv_nshift;
728 m = pa & uv_mmask; 727 m = pa & uv_mmask;
729 728
730 mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); 729 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
731 if (mmr_image) { 730 (n << UV_DESC_BASE_PNODE_SHIFT | m));
732 uv_write_global_mmr64(pnode, (unsigned long)
733 UVH_LB_BAU_SB_DESCRIPTOR_BASE,
734 (n << UV_DESC_BASE_PNODE_SHIFT | m));
735 }
736 731
737 /* 732 /*
738 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 733 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5f935f0d5861..5204332f475d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,6 +54,7 @@
54#include <asm/traps.h> 54#include <asm/traps.h>
55#include <asm/desc.h> 55#include <asm/desc.h>
56#include <asm/i387.h> 56#include <asm/i387.h>
57#include <asm/mce.h>
57 58
58#include <asm/mach_traps.h> 59#include <asm/mach_traps.h>
59 60
@@ -65,8 +66,6 @@
65#include <asm/setup.h> 66#include <asm/setup.h>
66#include <asm/traps.h> 67#include <asm/traps.h>
67 68
68#include "cpu/mcheck/mce.h"
69
70asmlinkage int system_call(void); 69asmlinkage int system_call(void);
71 70
72/* Do we ignore FPU interrupts ? */ 71/* Do we ignore FPU interrupts ? */
@@ -347,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
347 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 346 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
348 show_registers(regs); 347 show_registers(regs);
349 348
349 if (panic_on_io_nmi)
350 panic("NMI IOCK error: Not continuing");
351
350 /* Re-enable the IOCK line, wait for a few seconds */ 352 /* Re-enable the IOCK line, wait for a few seconds */
351 reason = (reason & 0xf) | 8; 353 reason = (reason & 0xf) | 8;
352 outb(reason, 0x61); 354 outb(reason, 0x61);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b0597ad02c93..6e1a368d21d4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -590,22 +590,26 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
590 */ 590 */
591 591
592DEFINE_PER_CPU(unsigned long, cyc2ns); 592DEFINE_PER_CPU(unsigned long, cyc2ns);
593DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
593 594
594static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) 595static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
595{ 596{
596 unsigned long long tsc_now, ns_now; 597 unsigned long long tsc_now, ns_now, *offset;
597 unsigned long flags, *scale; 598 unsigned long flags, *scale;
598 599
599 local_irq_save(flags); 600 local_irq_save(flags);
600 sched_clock_idle_sleep_event(); 601 sched_clock_idle_sleep_event();
601 602
602 scale = &per_cpu(cyc2ns, cpu); 603 scale = &per_cpu(cyc2ns, cpu);
604 offset = &per_cpu(cyc2ns_offset, cpu);
603 605
604 rdtscll(tsc_now); 606 rdtscll(tsc_now);
605 ns_now = __cycles_2_ns(tsc_now); 607 ns_now = __cycles_2_ns(tsc_now);
606 608
607 if (cpu_khz) 609 if (cpu_khz) {
608 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; 610 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
611 *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
612 }
609 613
610 sched_clock_idle_wakeup_event(0); 614 sched_clock_idle_wakeup_event(0);
611 local_irq_restore(flags); 615 local_irq_restore(flags);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5c3d6e81a7dc..7030b5f911bf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2157,7 +2157,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2157 else 2157 else
2158 /* 32 bits PSE 4MB page */ 2158 /* 32 bits PSE 4MB page */
2159 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); 2159 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
2160 context->rsvd_bits_mask[1][0] = ~0ull; 2160 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
2161 break; 2161 break;
2162 case PT32E_ROOT_LEVEL: 2162 case PT32E_ROOT_LEVEL:
2163 context->rsvd_bits_mask[0][2] = 2163 context->rsvd_bits_mask[0][2] =
@@ -2170,7 +2170,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2170 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2170 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2171 rsvd_bits(maxphyaddr, 62) | 2171 rsvd_bits(maxphyaddr, 62) |
2172 rsvd_bits(13, 20); /* large page */ 2172 rsvd_bits(13, 20); /* large page */
2173 context->rsvd_bits_mask[1][0] = ~0ull; 2173 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
2174 break; 2174 break;
2175 case PT64_ROOT_LEVEL: 2175 case PT64_ROOT_LEVEL:
2176 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 2176 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
@@ -2186,7 +2186,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2186 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2186 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2187 rsvd_bits(maxphyaddr, 51) | 2187 rsvd_bits(maxphyaddr, 51) |
2188 rsvd_bits(13, 20); /* large page */ 2188 rsvd_bits(13, 20); /* large page */
2189 context->rsvd_bits_mask[1][0] = ~0ull; 2189 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
2190 break; 2190 break;
2191 } 2191 }
2192} 2192}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 258e4591e1ca..67785f635399 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -281,7 +281,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
281{ 281{
282 unsigned access = gw->pt_access; 282 unsigned access = gw->pt_access;
283 struct kvm_mmu_page *shadow_page; 283 struct kvm_mmu_page *shadow_page;
284 u64 spte, *sptep; 284 u64 spte, *sptep = NULL;
285 int direct; 285 int direct;
286 gfn_t table_gfn; 286 gfn_t table_gfn;
287 int r; 287 int r;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e770bf349ec4..356a0ce85c68 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3012,6 +3012,12 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3012 return 1; 3012 return 1;
3013} 3013}
3014 3014
3015static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3016{
3017 kvm_queue_exception(vcpu, UD_VECTOR);
3018 return 1;
3019}
3020
3015static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3021static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3016{ 3022{
3017 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3023 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3198,6 +3204,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
3198 [EXIT_REASON_HLT] = handle_halt, 3204 [EXIT_REASON_HLT] = handle_halt,
3199 [EXIT_REASON_INVLPG] = handle_invlpg, 3205 [EXIT_REASON_INVLPG] = handle_invlpg,
3200 [EXIT_REASON_VMCALL] = handle_vmcall, 3206 [EXIT_REASON_VMCALL] = handle_vmcall,
3207 [EXIT_REASON_VMCLEAR] = handle_vmx_insn,
3208 [EXIT_REASON_VMLAUNCH] = handle_vmx_insn,
3209 [EXIT_REASON_VMPTRLD] = handle_vmx_insn,
3210 [EXIT_REASON_VMPTRST] = handle_vmx_insn,
3211 [EXIT_REASON_VMREAD] = handle_vmx_insn,
3212 [EXIT_REASON_VMRESUME] = handle_vmx_insn,
3213 [EXIT_REASON_VMWRITE] = handle_vmx_insn,
3214 [EXIT_REASON_VMOFF] = handle_vmx_insn,
3215 [EXIT_REASON_VMON] = handle_vmx_insn,
3201 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 3216 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
3202 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 3217 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
3203 [EXIT_REASON_WBINVD] = handle_wbinvd, 3218 [EXIT_REASON_WBINVD] = handle_wbinvd,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 249540f98513..fe5474aec41a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -898,6 +898,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
898 case MSR_VM_HSAVE_PA: 898 case MSR_VM_HSAVE_PA:
899 case MSR_P6_EVNTSEL0: 899 case MSR_P6_EVNTSEL0:
900 case MSR_P6_EVNTSEL1: 900 case MSR_P6_EVNTSEL1:
901 case MSR_K7_EVNTSEL0:
901 data = 0; 902 data = 0;
902 break; 903 break;
903 case MSR_MTRRcap: 904 case MSR_MTRRcap:
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index c1b6c232e02b..616de4628d60 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1361,7 +1361,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1361 return 0; 1361 return 0;
1362} 1362}
1363 1363
1364void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) 1364static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1365{ 1365{
1366 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); 1366 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1367 /* 1367 /*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index f4568605d7d5..ff485d361182 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -55,8 +55,10 @@ static void delay_tsc(unsigned long loops)
55 55
56 preempt_disable(); 56 preempt_disable();
57 cpu = smp_processor_id(); 57 cpu = smp_processor_id();
58 rdtsc_barrier();
58 rdtscl(bclock); 59 rdtscl(bclock);
59 for (;;) { 60 for (;;) {
61 rdtsc_barrier();
60 rdtscl(now); 62 rdtscl(now);
61 if ((now - bclock) >= loops) 63 if ((now - bclock) >= loops)
62 break; 64 break;
@@ -78,6 +80,7 @@ static void delay_tsc(unsigned long loops)
78 if (unlikely(cpu != smp_processor_id())) { 80 if (unlikely(cpu != smp_processor_id())) {
79 loops -= (now - bclock); 81 loops -= (now - bclock);
80 cpu = smp_processor_id(); 82 cpu = smp_processor_id();
83 rdtsc_barrier();
81 rdtscl(bclock); 84 rdtscl(bclock);
82 } 85 }
83 } 86 }
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index ec13cb5f17ed..b7c2849ffb66 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(__strnlen_user);
127 127
128long strnlen_user(const char __user *s, long n) 128long strnlen_user(const char __user *s, long n)
129{ 129{
130 if (!access_ok(VERIFY_READ, s, n)) 130 if (!access_ok(VERIFY_READ, s, 1))
131 return 0; 131 return 0;
132 return __strnlen_user(s, n); 132 return __strnlen_user(s, n);
133} 133}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index baa0e86adfbc..78a5fff857be 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -952,8 +952,6 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
952 tsk = current; 952 tsk = current;
953 mm = tsk->mm; 953 mm = tsk->mm;
954 954
955 prefetchw(&mm->mmap_sem);
956
957 /* Get the faulting address: */ 955 /* Get the faulting address: */
958 address = read_cr2(); 956 address = read_cr2();
959 957
@@ -963,6 +961,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
963 */ 961 */
964 if (kmemcheck_active(regs)) 962 if (kmemcheck_active(regs))
965 kmemcheck_hide(regs); 963 kmemcheck_hide(regs);
964 prefetchw(&mm->mmap_sem);
966 965
967 if (unlikely(kmmio_fault(regs, address))) 966 if (unlikely(kmmio_fault(regs, address)))
968 return; 967 return;
@@ -1114,7 +1113,7 @@ good_area:
1114 * make sure we exit gracefully rather than endlessly redo 1113 * make sure we exit gracefully rather than endlessly redo
1115 * the fault: 1114 * the fault:
1116 */ 1115 */
1117 fault = handle_mm_fault(mm, vma, address, write); 1116 fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
1118 1117
1119 if (unlikely(fault & VM_FAULT_ERROR)) { 1118 if (unlikely(fault & VM_FAULT_ERROR)) {
1120 mm_fault_error(regs, error_code, address, fault); 1119 mm_fault_error(regs, error_code, address, fault);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 6340cef6798a..71da1bca13cb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -14,7 +14,7 @@
14static inline pte_t gup_get_pte(pte_t *ptep) 14static inline pte_t gup_get_pte(pte_t *ptep)
15{ 15{
16#ifndef CONFIG_X86_PAE 16#ifndef CONFIG_X86_PAE
17 return *ptep; 17 return ACCESS_ONCE(*ptep);
18#else 18#else
19 /* 19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking 20 * With get_user_pages_fast, we walk down the pagetables without taking
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
219 return 1; 219 return 1;
220} 220}
221 221
222/*
223 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
224 * back to the regular GUP.
225 */
226int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
227 struct page **pages)
228{
229 struct mm_struct *mm = current->mm;
230 unsigned long addr, len, end;
231 unsigned long next;
232 unsigned long flags;
233 pgd_t *pgdp;
234 int nr = 0;
235
236 start &= PAGE_MASK;
237 addr = start;
238 len = (unsigned long) nr_pages << PAGE_SHIFT;
239 end = start + len;
240 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
241 (void __user *)start, len)))
242 return 0;
243
244 /*
245 * XXX: batch / limit 'nr', to avoid large irq off latency
246 * needs some instrumenting to determine the common sizes used by
247 * important workloads (eg. DB2), and whether limiting the batch size
248 * will decrease performance.
249 *
250 * It seems like we're in the clear for the moment. Direct-IO is
251 * the main guy that batches up lots of get_user_pages, and even
252 * they are limited to 64-at-a-time which is not so many.
253 */
254 /*
255 * This doesn't prevent pagetable teardown, but does prevent
256 * the pagetables and pages from being freed on x86.
257 *
258 * So long as we atomically load page table pointers versus teardown
259 * (which we do on x86, with the above PAE exception), we can follow the
260 * address down to the the page and take a ref on it.
261 */
262 local_irq_save(flags);
263 pgdp = pgd_offset(mm, addr);
264 do {
265 pgd_t pgd = *pgdp;
266
267 next = pgd_addr_end(addr, end);
268 if (pgd_none(pgd))
269 break;
270 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
271 break;
272 } while (pgdp++, addr = next, addr != end);
273 local_irq_restore(flags);
274
275 return nr;
276}
277
222/** 278/**
223 * get_user_pages_fast() - pin user pages in memory 279 * get_user_pages_fast() - pin user pages in memory
224 * @start: starting user address 280 * @start: starting user address
@@ -247,11 +303,16 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
247 start &= PAGE_MASK; 303 start &= PAGE_MASK;
248 addr = start; 304 addr = start;
249 len = (unsigned long) nr_pages << PAGE_SHIFT; 305 len = (unsigned long) nr_pages << PAGE_SHIFT;
306
250 end = start + len; 307 end = start + len;
251 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, 308 if (end < start)
252 (void __user *)start, len)))
253 goto slow_irqon; 309 goto slow_irqon;
254 310
311#ifdef CONFIG_X86_64
312 if (end >> __VIRTUAL_MASK_SHIFT)
313 goto slow_irqon;
314#endif
315
255 /* 316 /*
256 * XXX: batch / limit 'nr', to avoid large irq off latency 317 * XXX: batch / limit 'nr', to avoid large irq off latency
257 * needs some instrumenting to determine the common sizes used by 318 * needs some instrumenting to determine the common sizes used by
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f53b57e4086f..47ce9a2ce5e7 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -177,20 +177,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
177 return nr_range; 177 return nr_range;
178} 178}
179 179
180#ifdef CONFIG_X86_64
181static void __init init_gbpages(void)
182{
183 if (direct_gbpages && cpu_has_gbpages)
184 printk(KERN_INFO "Using GB pages for direct mapping\n");
185 else
186 direct_gbpages = 0;
187}
188#else
189static inline void init_gbpages(void)
190{
191}
192#endif
193
194/* 180/*
195 * Setup the direct mapping of the physical memory at PAGE_OFFSET. 181 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
196 * This runs before bootmem is initialized and gets pages directly from 182 * This runs before bootmem is initialized and gets pages directly from
@@ -210,9 +196,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
210 196
211 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); 197 printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
212 198
213 if (!after_bootmem)
214 init_gbpages();
215
216#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) 199#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
217 /* 200 /*
218 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. 201 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9c543290a813..b177652251a4 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -527,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
527 return phys_pud_init(pud, addr, end, page_size_mask); 527 return phys_pud_init(pud, addr, end, page_size_mask);
528} 528}
529 529
530unsigned long __init 530unsigned long __meminit
531kernel_physical_mapping_init(unsigned long start, 531kernel_physical_mapping_init(unsigned long start,
532 unsigned long end, 532 unsigned long end,
533 unsigned long page_size_mask) 533 unsigned long page_size_mask)
@@ -598,6 +598,8 @@ void __init paging_init(void)
598 598
599 sparse_memory_present_with_active_regions(MAX_NUMNODES); 599 sparse_memory_present_with_active_regions(MAX_NUMNODES);
600 sparse_init(); 600 sparse_init();
601 /* clear the default setting with node 0 */
602 nodes_clear(node_states[N_NORMAL_MEMORY]);
601 free_area_init_nodes(max_zone_pfns); 603 free_area_init_nodes(max_zone_pfns);
602} 604}
603 605
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3cfe9ced8a4c..1b734d7a8966 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -11,6 +11,7 @@
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/pfn.h>
14 15
15#include <asm/e820.h> 16#include <asm/e820.h>
16#include <asm/processor.h> 17#include <asm/processor.h>
@@ -681,8 +682,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
681static int cpa_process_alias(struct cpa_data *cpa) 682static int cpa_process_alias(struct cpa_data *cpa)
682{ 683{
683 struct cpa_data alias_cpa; 684 struct cpa_data alias_cpa;
684 int ret = 0; 685 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
685 unsigned long temp_cpa_vaddr, vaddr; 686 unsigned long vaddr, remapped;
687 int ret;
686 688
687 if (cpa->pfn >= max_pfn_mapped) 689 if (cpa->pfn >= max_pfn_mapped)
688 return 0; 690 return 0;
@@ -706,42 +708,55 @@ static int cpa_process_alias(struct cpa_data *cpa)
706 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { 708 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
707 709
708 alias_cpa = *cpa; 710 alias_cpa = *cpa;
709 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 711 alias_cpa.vaddr = &laddr;
710 alias_cpa.vaddr = &temp_cpa_vaddr;
711 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 712 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
712 713
713
714 ret = __change_page_attr_set_clr(&alias_cpa, 0); 714 ret = __change_page_attr_set_clr(&alias_cpa, 0);
715 if (ret)
716 return ret;
715 } 717 }
716 718
717#ifdef CONFIG_X86_64 719#ifdef CONFIG_X86_64
718 if (ret)
719 return ret;
720 /* 720 /*
721 * No need to redo, when the primary call touched the high 721 * If the primary call didn't touch the high mapping already
722 * mapping already: 722 * and the physical address is inside the kernel map, we need
723 */
724 if (within(vaddr, (unsigned long) _text, _brk_end))
725 return 0;
726
727 /*
728 * If the physical address is inside the kernel map, we need
729 * to touch the high mapped kernel as well: 723 * to touch the high mapped kernel as well:
730 */ 724 */
731 if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) 725 if (!within(vaddr, (unsigned long)_text, _brk_end) &&
732 return 0; 726 within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
727 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
728 __START_KERNEL_map - phys_base;
729 alias_cpa = *cpa;
730 alias_cpa.vaddr = &temp_cpa_vaddr;
731 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
733 732
734 alias_cpa = *cpa; 733 /*
735 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 734 * The high mapping range is imprecise, so ignore the
736 alias_cpa.vaddr = &temp_cpa_vaddr; 735 * return value.
737 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 736 */
737 __change_page_attr_set_clr(&alias_cpa, 0);
738 }
739#endif
738 740
739 /* 741 /*
740 * The high mapping range is imprecise, so ignore the return value. 742 * If the PMD page was partially used for per-cpu remapping,
743 * the recycled area needs to be split and modified. Because
744 * the area is always proper subset of a PMD page
745 * cpa->numpages is guaranteed to be 1 for these areas, so
746 * there's no need to loop over and check for further remaps.
741 */ 747 */
742 __change_page_attr_set_clr(&alias_cpa, 0); 748 remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
743#endif 749 if (remapped) {
744 return ret; 750 WARN_ON(cpa->numpages > 1);
751 alias_cpa = *cpa;
752 alias_cpa.vaddr = &remapped;
753 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
754 ret = __change_page_attr_set_clr(&alias_cpa, 0);
755 if (ret)
756 return ret;
757 }
758
759 return 0;
745} 760}
746 761
747static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) 762static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index c0ecf250fe51..b26626dc517c 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -38,15 +38,26 @@ count_resource(struct acpi_resource *acpi_res, void *data)
38 struct acpi_resource_address64 addr; 38 struct acpi_resource_address64 addr;
39 acpi_status status; 39 acpi_status status;
40 40
41 if (info->res_num >= PCI_BUS_NUM_RESOURCES)
42 return AE_OK;
43
44 status = resource_to_addr(acpi_res, &addr); 41 status = resource_to_addr(acpi_res, &addr);
45 if (ACPI_SUCCESS(status)) 42 if (ACPI_SUCCESS(status))
46 info->res_num++; 43 info->res_num++;
47 return AE_OK; 44 return AE_OK;
48} 45}
49 46
47static int
48bus_has_transparent_bridge(struct pci_bus *bus)
49{
50 struct pci_dev *dev;
51
52 list_for_each_entry(dev, &bus->devices, bus_list) {
53 u16 class = dev->class >> 8;
54
55 if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent)
56 return true;
57 }
58 return false;
59}
60
50static acpi_status 61static acpi_status
51setup_resource(struct acpi_resource *acpi_res, void *data) 62setup_resource(struct acpi_resource *acpi_res, void *data)
52{ 63{
@@ -56,9 +67,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
56 acpi_status status; 67 acpi_status status;
57 unsigned long flags; 68 unsigned long flags;
58 struct resource *root; 69 struct resource *root;
59 70 int max_root_bus_resources = PCI_BUS_NUM_RESOURCES;
60 if (info->res_num >= PCI_BUS_NUM_RESOURCES)
61 return AE_OK;
62 71
63 status = resource_to_addr(acpi_res, &addr); 72 status = resource_to_addr(acpi_res, &addr);
64 if (!ACPI_SUCCESS(status)) 73 if (!ACPI_SUCCESS(status))
@@ -82,6 +91,18 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
82 res->end = res->start + addr.address_length - 1; 91 res->end = res->start + addr.address_length - 1;
83 res->child = NULL; 92 res->child = NULL;
84 93
94 if (bus_has_transparent_bridge(info->bus))
95 max_root_bus_resources -= 3;
96 if (info->res_num >= max_root_bus_resources) {
97 printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx "
98 "from %s for %s due to _CRS returning more than "
99 "%d resource descriptors\n", (unsigned long) res->start,
100 (unsigned long) res->end, root->name, info->name,
101 max_root_bus_resources);
102 info->res_num++;
103 return AE_OK;
104 }
105
85 if (insert_resource(root, res)) { 106 if (insert_resource(root, res)) {
86 printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " 107 printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx "
87 "from %s for %s\n", (unsigned long) res->start, 108 "from %s for %s\n", (unsigned long) res->start,
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 8766b0e216c5..712443ec6d43 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -523,6 +523,69 @@ reject:
523 523
524static int __initdata known_bridge; 524static int __initdata known_bridge;
525 525
526static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
527
528/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
529struct acpi_mcfg_allocation *pci_mmcfg_config;
530int pci_mmcfg_config_num;
531
532static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
533{
534 if (!strcmp(mcfg->header.oem_id, "SGI"))
535 acpi_mcfg_64bit_base_addr = TRUE;
536
537 return 0;
538}
539
540static int __init pci_parse_mcfg(struct acpi_table_header *header)
541{
542 struct acpi_table_mcfg *mcfg;
543 unsigned long i;
544 int config_size;
545
546 if (!header)
547 return -EINVAL;
548
549 mcfg = (struct acpi_table_mcfg *)header;
550
551 /* how many config structures do we have */
552 pci_mmcfg_config_num = 0;
553 i = header->length - sizeof(struct acpi_table_mcfg);
554 while (i >= sizeof(struct acpi_mcfg_allocation)) {
555 ++pci_mmcfg_config_num;
556 i -= sizeof(struct acpi_mcfg_allocation);
557 };
558 if (pci_mmcfg_config_num == 0) {
559 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
560 return -ENODEV;
561 }
562
563 config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
564 pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
565 if (!pci_mmcfg_config) {
566 printk(KERN_WARNING PREFIX
567 "No memory for MCFG config tables\n");
568 return -ENOMEM;
569 }
570
571 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
572
573 acpi_mcfg_oem_check(mcfg);
574
575 for (i = 0; i < pci_mmcfg_config_num; ++i) {
576 if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
577 !acpi_mcfg_64bit_base_addr) {
578 printk(KERN_ERR PREFIX
579 "MMCONFIG not in low 4GB of memory\n");
580 kfree(pci_mmcfg_config);
581 pci_mmcfg_config_num = 0;
582 return -ENODEV;
583 }
584 }
585
586 return 0;
587}
588
526static void __init __pci_mmcfg_init(int early) 589static void __init __pci_mmcfg_init(int early)
527{ 590{
528 /* MMCONFIG disabled */ 591 /* MMCONFIG disabled */
@@ -543,7 +606,7 @@ static void __init __pci_mmcfg_init(int early)
543 } 606 }
544 607
545 if (!known_bridge) 608 if (!known_bridge)
546 acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); 609 acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
547 610
548 pci_mmcfg_reject_broken(early); 611 pci_mmcfg_reject_broken(early);
549 612
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index d277ef1eea51..b3d20b9cac63 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -244,7 +244,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
244 do_fpu_end(); 244 do_fpu_end();
245 mtrr_ap_init(); 245 mtrr_ap_init();
246 246
247#ifdef CONFIG_X86_32 247#ifdef CONFIG_X86_OLD_MCE
248 mcheck_init(&boot_cpu_data); 248 mcheck_init(&boot_cpu_data);
249#endif 249#endif
250} 250}