aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-05-07 04:51:38 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-07 04:51:48 -0400
commit4cb1f43ce8c72ee453c00fcb9f6ee9c4ebd03f98 (patch)
tree15e64f192b54ea01fd640d69eed0cabed2baaaa9 /arch/x86
parent9de4966a4d218f29c68e96e8e7b4d2840dedec79 (diff)
parent0ee5623f9a6e52df90a78bd21179f8ab370e102e (diff)
Merge commit 'v2.6.39-rc6' into x86/cleanups
Merge reason: move to a (much) newer upstream base. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig11
-rw-r--r--arch/x86/boot/memory.c2
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c14
-rw-r--r--arch/x86/ia32/ia32_aout.c1
-rw-r--r--arch/x86/include/asm/acpi.h5
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/bitops.h4
-rw-r--r--arch/x86/include/asm/dma.h1
-rw-r--r--arch/x86/include/asm/gart.h24
-rw-r--r--arch/x86/include/asm/i387.h2
-rw-r--r--arch/x86/include/asm/io_apic.h2
-rw-r--r--arch/x86/include/asm/mmu.h6
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/numa.h2
-rw-r--r--arch/x86/include/asm/percpu.h10
-rw-r--r--arch/x86/include/asm/types.h8
-rw-r--r--arch/x86/kernel/acpi/sleep.c12
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c26
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/aperture_64.c2
-rw-r--r--arch/x86/kernel/apic/apic.c33
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c107
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c10
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/cpu/amd.c21
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c42
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c23
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c30
-rw-r--r--arch/x86/kernel/cpu/perf_event.c33
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c38
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c20
-rw-r--r--arch/x86/kernel/crash_dump_32.c3
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/devicetree.c8
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/e820.c1
-rw-r--r--arch/x86/kernel/i8237.c30
-rw-r--r--arch/x86/kernel/i8259.c33
-rw-r--r--arch/x86/kernel/irq.c1
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/microcode_core.c35
-rw-r--r--arch/x86/kernel/mpparse.c8
-rw-r--r--arch/x86/kernel/pci-gart_64.c41
-rw-r--r--arch/x86/kernel/process_64.c8
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/reboot_32.S12
-rw-r--r--arch/x86/kernel/setup.c27
-rw-r--r--arch/x86/kvm/x86.c37
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S14
-rw-r--r--arch/x86/mm/init_64.c16
-rw-r--r--arch/x86/mm/numa.c31
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/numa_emulation.c20
-rw-r--r--arch/x86/mm/srat_32.c4
-rw-r--r--arch/x86/oprofile/nmi_int.c49
-rw-r--r--arch/x86/oprofile/op_counter.h1
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts8
-rw-r--r--arch/x86/platform/mrst/mrst.c10
-rw-r--r--arch/x86/platform/mrst/vrtc.c4
-rw-r--r--arch/x86/platform/olpc/olpc-xo1.c25
-rw-r--r--arch/x86/platform/uv/tlb_uv.c1
-rw-r--r--arch/x86/platform/visws/visws_quirks.c20
-rw-r--r--arch/x86/vdso/vdso32-setup.c15
-rw-r--r--arch/x86/xen/Kconfig1
-rw-r--r--arch/x86/xen/enlighten.c21
-rw-r--r--arch/x86/xen/mmu.c142
-rw-r--r--arch/x86/xen/p2m.c10
-rw-r--r--arch/x86/xen/setup.c2
73 files changed, 677 insertions, 506 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d57ddd7573cc..cc6c53a95bfd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -71,6 +71,7 @@ config X86
71 select GENERIC_IRQ_SHOW 71 select GENERIC_IRQ_SHOW
72 select IRQ_FORCED_THREADING 72 select IRQ_FORCED_THREADING
73 select USE_GENERIC_SMP_HELPERS if SMP 73 select USE_GENERIC_SMP_HELPERS if SMP
74 select ARCH_NO_SYSDEV_OPS
74 75
75config INSTRUCTION_DECODER 76config INSTRUCTION_DECODER
76 def_bool (KPROBES || PERF_EVENTS) 77 def_bool (KPROBES || PERF_EVENTS)
@@ -2096,6 +2097,16 @@ source "drivers/pcmcia/Kconfig"
2096 2097
2097source "drivers/pci/hotplug/Kconfig" 2098source "drivers/pci/hotplug/Kconfig"
2098 2099
2100config RAPIDIO
2101 bool "RapidIO support"
2102 depends on PCI
2103 default n
2104 help
2105 If you say Y here, the kernel will include drivers and
2106 infrastructure code to support RapidIO interconnect devices.
2107
2108source "drivers/rapidio/Kconfig"
2109
2099endmenu 2110endmenu
2100 2111
2101 2112
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index cae3feb1035e..db75d07c3645 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -91,7 +91,7 @@ static int detect_memory_e801(void)
91 if (oreg.ax > 15*1024) { 91 if (oreg.ax > 15*1024) {
92 return -1; /* Bogus! */ 92 return -1; /* Bogus! */
93 } else if (oreg.ax == 15*1024) { 93 } else if (oreg.ax == 15*1024) {
94 boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; 94 boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
95 } else { 95 } else {
96 /* 96 /*
97 * This ignores memory above 16MB if we have a memory 97 * This ignores memory above 16MB if we have a memory
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index adcf794b22e2..be6d9e365a80 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1612,6 +1612,7 @@ _zero_cipher_left_encrypt:
1612 movdqa SHUF_MASK(%rip), %xmm10 1612 movdqa SHUF_MASK(%rip), %xmm10
1613 PSHUFB_XMM %xmm10, %xmm0 1613 PSHUFB_XMM %xmm10, %xmm0
1614 1614
1615
1615 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) 1616 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
1616 sub $16, %r11 1617 sub $16, %r11
1617 add %r13, %r11 1618 add %r13, %r11
@@ -1634,7 +1635,9 @@ _zero_cipher_left_encrypt:
1634 # GHASH computation for the last <16 byte block 1635 # GHASH computation for the last <16 byte block
1635 sub %r13, %r11 1636 sub %r13, %r11
1636 add $16, %r11 1637 add $16, %r11
1637 PSHUFB_XMM %xmm10, %xmm1 1638
1639 movdqa SHUF_MASK(%rip), %xmm10
1640 PSHUFB_XMM %xmm10, %xmm0
1638 1641
1639 # shuffle xmm0 back to output as ciphertext 1642 # shuffle xmm0 back to output as ciphertext
1640 1643
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e0e6340c8dad..2577613fb32b 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -828,9 +828,15 @@ static int rfc4106_init(struct crypto_tfm *tfm)
828 struct cryptd_aead *cryptd_tfm; 828 struct cryptd_aead *cryptd_tfm;
829 struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) 829 struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *)
830 PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); 830 PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
831 struct crypto_aead *cryptd_child;
832 struct aesni_rfc4106_gcm_ctx *child_ctx;
831 cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); 833 cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
832 if (IS_ERR(cryptd_tfm)) 834 if (IS_ERR(cryptd_tfm))
833 return PTR_ERR(cryptd_tfm); 835 return PTR_ERR(cryptd_tfm);
836
837 cryptd_child = cryptd_aead_child(cryptd_tfm);
838 child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child);
839 memcpy(child_ctx, ctx, sizeof(*ctx));
834 ctx->cryptd_tfm = cryptd_tfm; 840 ctx->cryptd_tfm = cryptd_tfm;
835 tfm->crt_aead.reqsize = sizeof(struct aead_request) 841 tfm->crt_aead.reqsize = sizeof(struct aead_request)
836 + crypto_aead_reqsize(&cryptd_tfm->base); 842 + crypto_aead_reqsize(&cryptd_tfm->base);
@@ -923,6 +929,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
923 int ret = 0; 929 int ret = 0;
924 struct crypto_tfm *tfm = crypto_aead_tfm(parent); 930 struct crypto_tfm *tfm = crypto_aead_tfm(parent);
925 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); 931 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
932 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
933 struct aesni_rfc4106_gcm_ctx *child_ctx =
934 aesni_rfc4106_gcm_ctx_get(cryptd_child);
926 u8 *new_key_mem = NULL; 935 u8 *new_key_mem = NULL;
927 936
928 if (key_len < 4) { 937 if (key_len < 4) {
@@ -966,6 +975,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
966 goto exit; 975 goto exit;
967 } 976 }
968 ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); 977 ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
978 memcpy(child_ctx, ctx, sizeof(*ctx));
969exit: 979exit:
970 kfree(new_key_mem); 980 kfree(new_key_mem);
971 return ret; 981 return ret;
@@ -997,7 +1007,6 @@ static int rfc4106_encrypt(struct aead_request *req)
997 int ret; 1007 int ret;
998 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 1008 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
999 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); 1009 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
1000 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
1001 1010
1002 if (!irq_fpu_usable()) { 1011 if (!irq_fpu_usable()) {
1003 struct aead_request *cryptd_req = 1012 struct aead_request *cryptd_req =
@@ -1006,6 +1015,7 @@ static int rfc4106_encrypt(struct aead_request *req)
1006 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); 1015 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1007 return crypto_aead_encrypt(cryptd_req); 1016 return crypto_aead_encrypt(cryptd_req);
1008 } else { 1017 } else {
1018 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
1009 kernel_fpu_begin(); 1019 kernel_fpu_begin();
1010 ret = cryptd_child->base.crt_aead.encrypt(req); 1020 ret = cryptd_child->base.crt_aead.encrypt(req);
1011 kernel_fpu_end(); 1021 kernel_fpu_end();
@@ -1018,7 +1028,6 @@ static int rfc4106_decrypt(struct aead_request *req)
1018 int ret; 1028 int ret;
1019 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 1029 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1020 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); 1030 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
1021 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
1022 1031
1023 if (!irq_fpu_usable()) { 1032 if (!irq_fpu_usable()) {
1024 struct aead_request *cryptd_req = 1033 struct aead_request *cryptd_req =
@@ -1027,6 +1036,7 @@ static int rfc4106_decrypt(struct aead_request *req)
1027 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); 1036 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1028 return crypto_aead_decrypt(cryptd_req); 1037 return crypto_aead_decrypt(cryptd_req);
1029 } else { 1038 } else {
1039 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
1030 kernel_fpu_begin(); 1040 kernel_fpu_begin();
1031 ret = cryptd_child->base.crt_aead.decrypt(req); 1041 ret = cryptd_child->base.crt_aead.decrypt(req);
1032 kernel_fpu_end(); 1042 kernel_fpu_end();
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2d93bdbc9ac0..fd843877e841 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -298,6 +298,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
298 /* OK, This is the point of no return */ 298 /* OK, This is the point of no return */
299 set_personality(PER_LINUX); 299 set_personality(PER_LINUX);
300 set_thread_flag(TIF_IA32); 300 set_thread_flag(TIF_IA32);
301 current->mm->context.ia32_compat = 1;
301 302
302 setup_new_exec(bprm); 303 setup_new_exec(bprm);
303 304
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 448d73a371ba..12e0e7dd869c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -114,9 +114,8 @@ static inline void acpi_disable_pci(void)
114 acpi_noirq_set(); 114 acpi_noirq_set();
115} 115}
116 116
117/* routines for saving/restoring kernel state */ 117/* Low-level suspend routine. */
118extern int acpi_save_state_mem(void); 118extern int acpi_suspend_lowlevel(void);
119extern void acpi_restore_state_mem(void);
120 119
121extern const unsigned char acpi_wakeup_code[]; 120extern const unsigned char acpi_wakeup_code[];
122#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code))) 121#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a279d98ea95e..2b7d573be549 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -2,7 +2,6 @@
2#define _ASM_X86_APIC_H 2#define _ASM_X86_APIC_H
3 3
4#include <linux/cpumask.h> 4#include <linux/cpumask.h>
5#include <linux/delay.h>
6#include <linux/pm.h> 5#include <linux/pm.h>
7 6
8#include <asm/alternative.h> 7#include <asm/alternative.h>
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 903683b07e42..69d58131bc8e 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -456,14 +456,12 @@ static inline int fls(int x)
456 456
457#ifdef __KERNEL__ 457#ifdef __KERNEL__
458 458
459#include <asm-generic/bitops/ext2-non-atomic.h> 459#include <asm-generic/bitops/le.h>
460 460
461#define ext2_set_bit_atomic(lock, nr, addr) \ 461#define ext2_set_bit_atomic(lock, nr, addr) \
462 test_and_set_bit((nr), (unsigned long *)(addr)) 462 test_and_set_bit((nr), (unsigned long *)(addr))
463#define ext2_clear_bit_atomic(lock, nr, addr) \ 463#define ext2_clear_bit_atomic(lock, nr, addr) \
464 test_and_clear_bit((nr), (unsigned long *)(addr)) 464 test_and_clear_bit((nr), (unsigned long *)(addr))
465 465
466#include <asm-generic/bitops/minix.h>
467
468#endif /* __KERNEL__ */ 466#endif /* __KERNEL__ */
469#endif /* _ASM_X86_BITOPS_H */ 467#endif /* _ASM_X86_BITOPS_H */
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 97b6d8114a43..057099e5faba 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -10,7 +10,6 @@
10 10
11#include <linux/spinlock.h> /* And spinlocks */ 11#include <linux/spinlock.h> /* And spinlocks */
12#include <asm/io.h> /* need byte IO */ 12#include <asm/io.h> /* need byte IO */
13#include <linux/delay.h>
14 13
15#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER 14#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
16#define dma_outb outb_p 15#define dma_outb outb_p
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 43085bfc99c3..156cd5d18d2a 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -66,7 +66,7 @@ static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order)
66 * Don't enable translation but enable GART IO and CPU accesses. 66 * Don't enable translation but enable GART IO and CPU accesses.
67 * Also, set DISTLBWALKPRB since GART tables memory is UC. 67 * Also, set DISTLBWALKPRB since GART tables memory is UC.
68 */ 68 */
69 ctl = DISTLBWALKPRB | order << 1; 69 ctl = order << 1;
70 70
71 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); 71 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
72} 72}
@@ -75,17 +75,17 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
75{ 75{
76 u32 tmp, ctl; 76 u32 tmp, ctl;
77 77
78 /* address of the mappings table */ 78 /* address of the mappings table */
79 addr >>= 12; 79 addr >>= 12;
80 tmp = (u32) addr<<4; 80 tmp = (u32) addr<<4;
81 tmp &= ~0xf; 81 tmp &= ~0xf;
82 pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); 82 pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp);
83 83
84 /* Enable GART translation for this hammer. */ 84 /* Enable GART translation for this hammer. */
85 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 85 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
86 ctl |= GARTEN; 86 ctl |= GARTEN | DISTLBWALKPRB;
87 ctl &= ~(DISGARTCPU | DISGARTIO); 87 ctl &= ~(DISGARTCPU | DISGARTIO);
88 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); 88 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
89} 89}
90 90
91static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) 91static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size)
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index ef328901c802..c9e09ea05644 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -237,7 +237,7 @@ static inline void fpu_save_init(struct fpu *fpu)
237 } else if (use_fxsr()) { 237 } else if (use_fxsr()) {
238 fpu_fxsave(fpu); 238 fpu_fxsave(fpu);
239 } else { 239 } else {
240 asm volatile("fsave %[fx]; fwait" 240 asm volatile("fnsave %[fx]; fwait"
241 : [fx] "=m" (fpu->state->fsave)); 241 : [fx] "=m" (fpu->state->fsave));
242 return; 242 return;
243 } 243 }
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index c4bd267dfc50..a97a240f67f3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,7 +150,7 @@ void setup_IO_APIC_irq_extra(u32 gsi);
150extern void ioapic_and_gsi_init(void); 150extern void ioapic_and_gsi_init(void);
151extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
152 152
153int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); 153int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
154 154
155extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 155extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
156extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); 156extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 80a1dee5bea5..aeff3e89b222 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -13,6 +13,12 @@ typedef struct {
13 int size; 13 int size;
14 struct mutex lock; 14 struct mutex lock;
15 void *vdso; 15 void *vdso;
16
17#ifdef CONFIG_X86_64
18 /* True if mm supports a task running in 32 bit compatibility mode. */
19 unsigned short ia32_compat;
20#endif
21
16} mm_context_t; 22} mm_context_t;
17 23
18#ifdef CONFIG_SMP 24#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index fd5a1f365c95..3cce71413d0b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -96,11 +96,15 @@
96#define MSR_IA32_MC0_ADDR 0x00000402 96#define MSR_IA32_MC0_ADDR 0x00000402
97#define MSR_IA32_MC0_MISC 0x00000403 97#define MSR_IA32_MC0_MISC 0x00000403
98 98
99#define MSR_AMD64_MC0_MASK 0xc0010044
100
99#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 101#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
100#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) 102#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
101#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) 103#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
102#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) 104#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
103 105
106#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
107
104/* These are consecutive and not in the normal 4er MCE bank block */ 108/* These are consecutive and not in the normal 4er MCE bank block */
105#define MSR_IA32_MC0_CTL2 0x00000280 109#define MSR_IA32_MC0_CTL2 0x00000280
106#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) 110#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 3d4dab43c994..a50fc9f493b3 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -51,7 +51,7 @@ static inline void numa_remove_cpu(int cpu) { }
51#endif /* CONFIG_NUMA */ 51#endif /* CONFIG_NUMA */
52 52
53#ifdef CONFIG_DEBUG_PER_CPU_MAPS 53#ifdef CONFIG_DEBUG_PER_CPU_MAPS
54struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable); 54void debug_cpumask_set_cpu(int cpu, int node, bool enable);
55#endif 55#endif
56 56
57#endif /* _ASM_X86_NUMA_H */ 57#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a09e1f052d84..d475b4398d8b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -45,7 +45,7 @@
45#include <linux/stringify.h> 45#include <linux/stringify.h>
46 46
47#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x 48#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
49#define __my_cpu_offset percpu_read(this_cpu_off) 49#define __my_cpu_offset percpu_read(this_cpu_off)
50 50
51/* 51/*
@@ -62,9 +62,11 @@
62 (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ 62 (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
63}) 63})
64#else 64#else
65#define __percpu_arg(x) "%P" #x 65#define __percpu_prefix ""
66#endif 66#endif
67 67
68#define __percpu_arg(x) __percpu_prefix "%P" #x
69
68/* 70/*
69 * Initialized pointers to per-cpu variables needed for the boot 71 * Initialized pointers to per-cpu variables needed for the boot
70 * processor need to use these macros to get the proper address 72 * processor need to use these macros to get the proper address
@@ -516,11 +518,11 @@ do { \
516 typeof(o2) __n2 = n2; \ 518 typeof(o2) __n2 = n2; \
517 typeof(o2) __dummy; \ 519 typeof(o2) __dummy; \
518 alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ 520 alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
519 "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ 521 "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
520 X86_FEATURE_CX16, \ 522 X86_FEATURE_CX16, \
521 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ 523 ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
522 "S" (&pcp1), "b"(__n1), "c"(__n2), \ 524 "S" (&pcp1), "b"(__n1), "c"(__n2), \
523 "a"(__o1), "d"(__o2)); \ 525 "a"(__o1), "d"(__o2) : "memory"); \
524 __ret; \ 526 __ret; \
525}) 527})
526 528
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index 88102055a4b8..8e8c23fef08c 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -3,12 +3,4 @@
3 3
4#include <asm-generic/types.h> 4#include <asm-generic/types.h>
5 5
6#ifdef __KERNEL__
7#ifndef __ASSEMBLY__
8
9typedef u64 dma64_addr_t;
10
11#endif /* __ASSEMBLY__ */
12#endif /* __KERNEL__ */
13
14#endif /* _ASM_X86_TYPES_H */ 6#endif /* _ASM_X86_TYPES_H */
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 4572c58e66d5..ff93bc1b09c3 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -25,12 +25,12 @@ static char temp_stack[4096];
25#endif 25#endif
26 26
27/** 27/**
28 * acpi_save_state_mem - save kernel state 28 * acpi_suspend_lowlevel - save kernel state
29 * 29 *
30 * Create an identity mapped page table and copy the wakeup routine to 30 * Create an identity mapped page table and copy the wakeup routine to
31 * low memory. 31 * low memory.
32 */ 32 */
33int acpi_save_state_mem(void) 33int acpi_suspend_lowlevel(void)
34{ 34{
35 struct wakeup_header *header; 35 struct wakeup_header *header;
36 /* address in low memory of the wakeup routine. */ 36 /* address in low memory of the wakeup routine. */
@@ -96,16 +96,10 @@ int acpi_save_state_mem(void)
96 saved_magic = 0x123456789abcdef0L; 96 saved_magic = 0x123456789abcdef0L;
97#endif /* CONFIG_64BIT */ 97#endif /* CONFIG_64BIT */
98 98
99 do_suspend_lowlevel();
99 return 0; 100 return 0;
100} 101}
101 102
102/*
103 * acpi_restore_state - undo effects of acpi_save_state_mem
104 */
105void acpi_restore_state_mem(void)
106{
107}
108
109static int __init acpi_sleep_setup(char *str) 103static int __init acpi_sleep_setup(char *str)
110{ 104{
111 while ((str != NULL) && (*str != '\0')) { 105 while ((str != NULL) && (*str != '\0')) {
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 86ba1c87165b..416d4be13fef 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -11,3 +11,5 @@ extern int wakeup_pmode_return;
11 11
12extern unsigned long acpi_copy_wakeup_routine(unsigned long); 12extern unsigned long acpi_copy_wakeup_routine(unsigned long);
13extern void wakeup_long64(void); 13extern void wakeup_long64(void);
14
15extern void do_suspend_lowlevel(void);
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 6e11c8134158..246d727b65b7 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -21,7 +21,7 @@
21#include <linux/acpi.h> 21#include <linux/acpi.h>
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/sysdev.h> 24#include <linux/syscore_ops.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27#include <asm/pci-direct.h> 27#include <asm/pci-direct.h>
@@ -1260,7 +1260,7 @@ static void disable_iommus(void)
1260 * disable suspend until real resume implemented 1260 * disable suspend until real resume implemented
1261 */ 1261 */
1262 1262
1263static int amd_iommu_resume(struct sys_device *dev) 1263static void amd_iommu_resume(void)
1264{ 1264{
1265 struct amd_iommu *iommu; 1265 struct amd_iommu *iommu;
1266 1266
@@ -1276,11 +1276,9 @@ static int amd_iommu_resume(struct sys_device *dev)
1276 */ 1276 */
1277 amd_iommu_flush_all_devices(); 1277 amd_iommu_flush_all_devices();
1278 amd_iommu_flush_all_domains(); 1278 amd_iommu_flush_all_domains();
1279
1280 return 0;
1281} 1279}
1282 1280
1283static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) 1281static int amd_iommu_suspend(void)
1284{ 1282{
1285 /* disable IOMMUs to go out of the way for BIOS */ 1283 /* disable IOMMUs to go out of the way for BIOS */
1286 disable_iommus(); 1284 disable_iommus();
@@ -1288,17 +1286,11 @@ static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
1288 return 0; 1286 return 0;
1289} 1287}
1290 1288
1291static struct sysdev_class amd_iommu_sysdev_class = { 1289static struct syscore_ops amd_iommu_syscore_ops = {
1292 .name = "amd_iommu",
1293 .suspend = amd_iommu_suspend, 1290 .suspend = amd_iommu_suspend,
1294 .resume = amd_iommu_resume, 1291 .resume = amd_iommu_resume,
1295}; 1292};
1296 1293
1297static struct sys_device device_amd_iommu = {
1298 .id = 0,
1299 .cls = &amd_iommu_sysdev_class,
1300};
1301
1302/* 1294/*
1303 * This is the core init function for AMD IOMMU hardware in the system. 1295 * This is the core init function for AMD IOMMU hardware in the system.
1304 * This function is called from the generic x86 DMA layer initialization 1296 * This function is called from the generic x86 DMA layer initialization
@@ -1415,14 +1407,6 @@ static int __init amd_iommu_init(void)
1415 goto free; 1407 goto free;
1416 } 1408 }
1417 1409
1418 ret = sysdev_class_register(&amd_iommu_sysdev_class);
1419 if (ret)
1420 goto free;
1421
1422 ret = sysdev_register(&device_amd_iommu);
1423 if (ret)
1424 goto free;
1425
1426 ret = amd_iommu_init_devices(); 1410 ret = amd_iommu_init_devices();
1427 if (ret) 1411 if (ret)
1428 goto free; 1412 goto free;
@@ -1441,6 +1425,8 @@ static int __init amd_iommu_init(void)
1441 1425
1442 amd_iommu_init_notifier(); 1426 amd_iommu_init_notifier();
1443 1427
1428 register_syscore_ops(&amd_iommu_syscore_ops);
1429
1444 if (iommu_pass_through) 1430 if (iommu_pass_through)
1445 goto out; 1431 goto out;
1446 1432
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 6801959a8b2a..4c39baa8facc 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -21,7 +21,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
21EXPORT_SYMBOL(amd_nb_misc_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23static struct pci_device_id amd_nb_link_ids[] = { 23static struct pci_device_id amd_nb_link_ids[] = {
24 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) }, 24 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
25 {} 25 {}
26}; 26};
27 27
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 1293c709ee85..cd1ffed4ee22 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -316,7 +316,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
316 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 316 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
317 irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); 317 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
318 /* APB timer irqs are set up as mp_irqs, timer is edge type */ 318 /* APB timer irqs are set up as mp_irqs, timer is edge type */
319 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); 319 __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge");
320 320
321 if (system_state == SYSTEM_BOOTING) { 321 if (system_state == SYSTEM_BOOTING) {
322 if (request_irq(adev->irq, apbt_interrupt_handler, 322 if (request_irq(adev->irq, apbt_interrupt_handler,
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 86d1ad4962a7..73fb469908c6 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -499,7 +499,7 @@ out:
499 * Don't enable translation yet but enable GART IO and CPU 499 * Don't enable translation yet but enable GART IO and CPU
500 * accesses and set DISTLBWALKPRB since GART table memory is UC. 500 * accesses and set DISTLBWALKPRB since GART table memory is UC.
501 */ 501 */
502 u32 ctl = DISTLBWALKPRB | aper_order << 1; 502 u32 ctl = aper_order << 1;
503 503
504 bus = amd_nb_bus_dev_ranges[i].bus; 504 bus = amd_nb_bus_dev_ranges[i].bus;
505 dev_base = amd_nb_bus_dev_ranges[i].dev_base; 505 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 966673f44141..fabf01eff771 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -24,7 +24,7 @@
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/ioport.h> 25#include <linux/ioport.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/sysdev.h> 27#include <linux/syscore_ops.h>
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/timex.h> 29#include <linux/timex.h>
30#include <linux/dmar.h> 30#include <linux/dmar.h>
@@ -2046,7 +2046,7 @@ static struct {
2046 unsigned int apic_thmr; 2046 unsigned int apic_thmr;
2047} apic_pm_state; 2047} apic_pm_state;
2048 2048
2049static int lapic_suspend(struct sys_device *dev, pm_message_t state) 2049static int lapic_suspend(void)
2050{ 2050{
2051 unsigned long flags; 2051 unsigned long flags;
2052 int maxlvt; 2052 int maxlvt;
@@ -2084,23 +2084,21 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
2084 return 0; 2084 return 0;
2085} 2085}
2086 2086
2087static int lapic_resume(struct sys_device *dev) 2087static void lapic_resume(void)
2088{ 2088{
2089 unsigned int l, h; 2089 unsigned int l, h;
2090 unsigned long flags; 2090 unsigned long flags;
2091 int maxlvt; 2091 int maxlvt, ret;
2092 int ret = 0;
2093 struct IO_APIC_route_entry **ioapic_entries = NULL; 2092 struct IO_APIC_route_entry **ioapic_entries = NULL;
2094 2093
2095 if (!apic_pm_state.active) 2094 if (!apic_pm_state.active)
2096 return 0; 2095 return;
2097 2096
2098 local_irq_save(flags); 2097 local_irq_save(flags);
2099 if (intr_remapping_enabled) { 2098 if (intr_remapping_enabled) {
2100 ioapic_entries = alloc_ioapic_entries(); 2099 ioapic_entries = alloc_ioapic_entries();
2101 if (!ioapic_entries) { 2100 if (!ioapic_entries) {
2102 WARN(1, "Alloc ioapic_entries in lapic resume failed."); 2101 WARN(1, "Alloc ioapic_entries in lapic resume failed.");
2103 ret = -ENOMEM;
2104 goto restore; 2102 goto restore;
2105 } 2103 }
2106 2104
@@ -2162,8 +2160,6 @@ static int lapic_resume(struct sys_device *dev)
2162 } 2160 }
2163restore: 2161restore:
2164 local_irq_restore(flags); 2162 local_irq_restore(flags);
2165
2166 return ret;
2167} 2163}
2168 2164
2169/* 2165/*
@@ -2171,17 +2167,11 @@ restore:
2171 * are needed on every CPU up until machine_halt/restart/poweroff. 2167 * are needed on every CPU up until machine_halt/restart/poweroff.
2172 */ 2168 */
2173 2169
2174static struct sysdev_class lapic_sysclass = { 2170static struct syscore_ops lapic_syscore_ops = {
2175 .name = "lapic",
2176 .resume = lapic_resume, 2171 .resume = lapic_resume,
2177 .suspend = lapic_suspend, 2172 .suspend = lapic_suspend,
2178}; 2173};
2179 2174
2180static struct sys_device device_lapic = {
2181 .id = 0,
2182 .cls = &lapic_sysclass,
2183};
2184
2185static void __cpuinit apic_pm_activate(void) 2175static void __cpuinit apic_pm_activate(void)
2186{ 2176{
2187 apic_pm_state.active = 1; 2177 apic_pm_state.active = 1;
@@ -2189,16 +2179,11 @@ static void __cpuinit apic_pm_activate(void)
2189 2179
2190static int __init init_lapic_sysfs(void) 2180static int __init init_lapic_sysfs(void)
2191{ 2181{
2192 int error;
2193
2194 if (!cpu_has_apic)
2195 return 0;
2196 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ 2182 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2183 if (cpu_has_apic)
2184 register_syscore_ops(&lapic_syscore_ops);
2197 2185
2198 error = sysdev_class_register(&lapic_sysclass); 2186 return 0;
2199 if (!error)
2200 error = sysdev_register(&device_lapic);
2201 return error;
2202} 2187}
2203 2188
2204/* local apic needs to resume before other devices access its registers. */ 2189/* local apic needs to resume before other devices access its registers. */
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c4e557a1ebb6..5260fe91bcb6 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -16,6 +16,7 @@
16#include <linux/kprobes.h> 16#include <linux/kprobes.h>
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/delay.h>
19 20
20#ifdef CONFIG_HARDLOCKUP_DETECTOR 21#ifdef CONFIG_HARDLOCKUP_DETECTOR
21u64 hw_nmi_get_sample_period(void) 22u64 hw_nmi_get_sample_period(void)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 180ca240e03c..45fd33d1fd3a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -30,7 +30,7 @@
30#include <linux/compiler.h> 30#include <linux/compiler.h>
31#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/sysdev.h> 33#include <linux/syscore_ops.h>
34#include <linux/msi.h> 34#include <linux/msi.h>
35#include <linux/htirq.h> 35#include <linux/htirq.h>
36#include <linux/freezer.h> 36#include <linux/freezer.h>
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str)
128} 128}
129early_param("noapic", parse_noapic); 129early_param("noapic", parse_noapic);
130 130
131static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 131static int io_apic_setup_irq_pin(unsigned int irq, int node,
132 struct io_apic_irq_attr *attr); 132 struct io_apic_irq_attr *attr);
133 133
134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ 134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
135void mp_save_irq(struct mpc_intsrc *m) 135void mp_save_irq(struct mpc_intsrc *m)
@@ -2918,89 +2918,84 @@ static int __init io_apic_bug_finalize(void)
2918 2918
2919late_initcall(io_apic_bug_finalize); 2919late_initcall(io_apic_bug_finalize);
2920 2920
2921struct sysfs_ioapic_data { 2921static struct IO_APIC_route_entry *ioapic_saved_data[MAX_IO_APICS];
2922 struct sys_device dev;
2923 struct IO_APIC_route_entry entry[0];
2924};
2925static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
2926 2922
2927static int ioapic_suspend(struct sys_device *dev, pm_message_t state) 2923static void suspend_ioapic(int ioapic_id)
2928{ 2924{
2929 struct IO_APIC_route_entry *entry; 2925 struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
2930 struct sysfs_ioapic_data *data;
2931 int i; 2926 int i;
2932 2927
2933 data = container_of(dev, struct sysfs_ioapic_data, dev); 2928 if (!saved_data)
2934 entry = data->entry; 2929 return;
2935 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) 2930
2936 *entry = ioapic_read_entry(dev->id, i); 2931 for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
2932 saved_data[i] = ioapic_read_entry(ioapic_id, i);
2933}
2934
2935static int ioapic_suspend(void)
2936{
2937 int ioapic_id;
2938
2939 for (ioapic_id = 0; ioapic_id < nr_ioapics; ioapic_id++)
2940 suspend_ioapic(ioapic_id);
2937 2941
2938 return 0; 2942 return 0;
2939} 2943}
2940 2944
2941static int ioapic_resume(struct sys_device *dev) 2945static void resume_ioapic(int ioapic_id)
2942{ 2946{
2943 struct IO_APIC_route_entry *entry; 2947 struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
2944 struct sysfs_ioapic_data *data;
2945 unsigned long flags; 2948 unsigned long flags;
2946 union IO_APIC_reg_00 reg_00; 2949 union IO_APIC_reg_00 reg_00;
2947 int i; 2950 int i;
2948 2951
2949 data = container_of(dev, struct sysfs_ioapic_data, dev); 2952 if (!saved_data)
2950 entry = data->entry; 2953 return;
2951 2954
2952 raw_spin_lock_irqsave(&ioapic_lock, flags); 2955 raw_spin_lock_irqsave(&ioapic_lock, flags);
2953 reg_00.raw = io_apic_read(dev->id, 0); 2956 reg_00.raw = io_apic_read(ioapic_id, 0);
2954 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { 2957 if (reg_00.bits.ID != mp_ioapics[ioapic_id].apicid) {
2955 reg_00.bits.ID = mp_ioapics[dev->id].apicid; 2958 reg_00.bits.ID = mp_ioapics[ioapic_id].apicid;
2956 io_apic_write(dev->id, 0, reg_00.raw); 2959 io_apic_write(ioapic_id, 0, reg_00.raw);
2957 } 2960 }
2958 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2961 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2959 for (i = 0; i < nr_ioapic_registers[dev->id]; i++) 2962 for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
2960 ioapic_write_entry(dev->id, i, entry[i]); 2963 ioapic_write_entry(ioapic_id, i, saved_data[i]);
2964}
2961 2965
2962 return 0; 2966static void ioapic_resume(void)
2967{
2968 int ioapic_id;
2969
2970 for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
2971 resume_ioapic(ioapic_id);
2963} 2972}
2964 2973
2965static struct sysdev_class ioapic_sysdev_class = { 2974static struct syscore_ops ioapic_syscore_ops = {
2966 .name = "ioapic",
2967 .suspend = ioapic_suspend, 2975 .suspend = ioapic_suspend,
2968 .resume = ioapic_resume, 2976 .resume = ioapic_resume,
2969}; 2977};
2970 2978
2971static int __init ioapic_init_sysfs(void) 2979static int __init ioapic_init_ops(void)
2972{ 2980{
2973 struct sys_device * dev; 2981 int i;
2974 int i, size, error;
2975 2982
2976 error = sysdev_class_register(&ioapic_sysdev_class); 2983 for (i = 0; i < nr_ioapics; i++) {
2977 if (error) 2984 unsigned int size;
2978 return error;
2979 2985
2980 for (i = 0; i < nr_ioapics; i++ ) { 2986 size = nr_ioapic_registers[i]
2981 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2982 * sizeof(struct IO_APIC_route_entry); 2987 * sizeof(struct IO_APIC_route_entry);
2983 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); 2988 ioapic_saved_data[i] = kzalloc(size, GFP_KERNEL);
2984 if (!mp_ioapic_data[i]) { 2989 if (!ioapic_saved_data[i])
2985 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); 2990 pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
2986 continue;
2987 }
2988 dev = &mp_ioapic_data[i]->dev;
2989 dev->id = i;
2990 dev->cls = &ioapic_sysdev_class;
2991 error = sysdev_register(dev);
2992 if (error) {
2993 kfree(mp_ioapic_data[i]);
2994 mp_ioapic_data[i] = NULL;
2995 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2996 continue;
2997 }
2998 } 2991 }
2999 2992
2993 register_syscore_ops(&ioapic_syscore_ops);
2994
3000 return 0; 2995 return 0;
3001} 2996}
3002 2997
3003device_initcall(ioapic_init_sysfs); 2998device_initcall(ioapic_init_ops);
3004 2999
3005/* 3000/*
3006 * Dynamic irq allocate and deallocation 3001 * Dynamic irq allocate and deallocation
@@ -3575,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3575} 3570}
3576#endif /* CONFIG_HT_IRQ */ 3571#endif /* CONFIG_HT_IRQ */
3577 3572
3578int 3573static int
3579io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) 3574io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3580{ 3575{
3581 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); 3576 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -3590,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3590 return ret; 3585 return ret;
3591} 3586}
3592 3587
3593static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 3588int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3594 struct io_apic_irq_attr *attr) 3589 struct io_apic_irq_attr *attr)
3595{ 3590{
3596 unsigned int id = attr->ioapic, pin = attr->ioapic_pin; 3591 unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
3597 int ret; 3592 int ret;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 3c289281394c..33b10a0fc095 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -23,6 +23,8 @@
23#include <linux/io.h> 23#include <linux/io.h>
24#include <linux/pci.h> 24#include <linux/pci.h>
25#include <linux/kdebug.h> 25#include <linux/kdebug.h>
26#include <linux/delay.h>
27#include <linux/crash_dump.h>
26 28
27#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
28#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
@@ -34,6 +36,7 @@
34#include <asm/ipi.h> 36#include <asm/ipi.h>
35#include <asm/smp.h> 37#include <asm/smp.h>
36#include <asm/x86_init.h> 38#include <asm/x86_init.h>
39#include <asm/emergency-restart.h>
37 40
38DEFINE_PER_CPU(int, x2apic_extra_bits); 41DEFINE_PER_CPU(int, x2apic_extra_bits);
39 42
@@ -810,4 +813,11 @@ void __init uv_system_init(void)
810 813
811 /* register Legacy VGA I/O redirection handler */ 814 /* register Legacy VGA I/O redirection handler */
812 pci_register_set_vga_state(uv_set_vga_state); 815 pci_register_set_vga_state(uv_set_vga_state);
816
817 /*
818 * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
819 * EFI is not enabled in the kdump kernel.
820 */
821 if (is_kdump_kernel())
822 reboot_type = BOOT_ACPI;
813} 823}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0b4be431c620..adee12e0da1f 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
228#include <linux/kthread.h> 228#include <linux/kthread.h>
229#include <linux/jiffies.h> 229#include <linux/jiffies.h>
230#include <linux/acpi.h> 230#include <linux/acpi.h>
231#include <linux/syscore_ops.h>
231 232
232#include <asm/system.h> 233#include <asm/system.h>
233#include <asm/uaccess.h> 234#include <asm/uaccess.h>
@@ -1238,6 +1239,7 @@ static int suspend(int vetoable)
1238 1239
1239 local_irq_disable(); 1240 local_irq_disable();
1240 sysdev_suspend(PMSG_SUSPEND); 1241 sysdev_suspend(PMSG_SUSPEND);
1242 syscore_suspend();
1241 1243
1242 local_irq_enable(); 1244 local_irq_enable();
1243 1245
@@ -1255,6 +1257,7 @@ static int suspend(int vetoable)
1255 apm_error("suspend", err); 1257 apm_error("suspend", err);
1256 err = (err == APM_SUCCESS) ? 0 : -EIO; 1258 err = (err == APM_SUCCESS) ? 0 : -EIO;
1257 1259
1260 syscore_resume();
1258 sysdev_resume(); 1261 sysdev_resume();
1259 local_irq_enable(); 1262 local_irq_enable();
1260 1263
@@ -1280,6 +1283,7 @@ static void standby(void)
1280 1283
1281 local_irq_disable(); 1284 local_irq_disable();
1282 sysdev_suspend(PMSG_SUSPEND); 1285 sysdev_suspend(PMSG_SUSPEND);
1286 syscore_suspend();
1283 local_irq_enable(); 1287 local_irq_enable();
1284 1288
1285 err = set_system_power_state(APM_STATE_STANDBY); 1289 err = set_system_power_state(APM_STATE_STANDBY);
@@ -1287,6 +1291,7 @@ static void standby(void)
1287 apm_error("standby", err); 1291 apm_error("standby", err);
1288 1292
1289 local_irq_disable(); 1293 local_irq_disable();
1294 syscore_resume();
1290 sysdev_resume(); 1295 sysdev_resume();
1291 local_irq_enable(); 1296 local_irq_enable();
1292 1297
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3ecece0217ef..bb9eb29a52dd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -615,6 +615,25 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
615 /* As a rule processors have APIC timer running in deep C states */ 615 /* As a rule processors have APIC timer running in deep C states */
616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) 616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
617 set_cpu_cap(c, X86_FEATURE_ARAT); 617 set_cpu_cap(c, X86_FEATURE_ARAT);
618
619 /*
620 * Disable GART TLB Walk Errors on Fam10h. We do this here
621 * because this is always needed when GART is enabled, even in a
622 * kernel which has no MCE support built in.
623 */
624 if (c->x86 == 0x10) {
625 /*
626 * BIOS should disable GartTlbWlk Errors themself. If
627 * it doesn't do it here as suggested by the BKDG.
628 *
629 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
630 */
631 u64 mask;
632
633 rdmsrl(MSR_AMD64_MCx_MASK(4), mask);
634 mask |= (1 << 10);
635 wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
636 }
618} 637}
619 638
620#ifdef CONFIG_X86_32 639#ifdef CONFIG_X86_32
@@ -679,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
679 */ 698 */
680 699
681const int amd_erratum_400[] = 700const int amd_erratum_400[] =
682 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), 701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
683 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
684EXPORT_SYMBOL_GPL(amd_erratum_400); 703EXPORT_SYMBOL_GPL(amd_erratum_400);
685 704
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 8209472b27a5..83930deec3c6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
106ssize_t apei_read_mce(struct mce *m, u64 *record_id) 106ssize_t apei_read_mce(struct mce *m, u64 *record_id)
107{ 107{
108 struct cper_mce_record rcd; 108 struct cper_mce_record rcd;
109 ssize_t len; 109 int rc, pos;
110 110
111 len = erst_read_next(&rcd.hdr, sizeof(rcd)); 111 rc = erst_get_record_id_begin(&pos);
112 if (len <= 0) 112 if (rc)
113 return len; 113 return rc;
114 /* Can not skip other records in storage via ERST unless clear them */ 114retry:
115 else if (len != sizeof(rcd) || 115 rc = erst_get_record_id_next(&pos, record_id);
116 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { 116 if (rc)
117 if (printk_ratelimit()) 117 goto out;
118 pr_warning( 118 /* no more record */
119 "MCE-APEI: Can not skip the unknown record in ERST"); 119 if (*record_id == APEI_ERST_INVALID_RECORD_ID)
120 return -EIO; 120 goto out;
121 } 121 rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
122 122 /* someone else has cleared the record, try next one */
123 if (rc == -ENOENT)
124 goto retry;
125 else if (rc < 0)
126 goto out;
127 /* try to skip other type records in storage */
128 else if (rc != sizeof(rcd) ||
129 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
130 goto retry;
123 memcpy(m, &rcd.mce, sizeof(*m)); 131 memcpy(m, &rcd.mce, sizeof(*m));
124 *record_id = rcd.hdr.record_id; 132 rc = sizeof(*m);
133out:
134 erst_get_record_id_end();
125 135
126 return sizeof(*m); 136 return rc;
127} 137}
128 138
129/* Check whether there is record in ERST */ 139/* Check whether there is record in ERST */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ab1122998dba..3385ea26f684 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/sysdev.h> 23#include <linux/sysdev.h>
24#include <linux/syscore_ops.h>
24#include <linux/delay.h> 25#include <linux/delay.h>
25#include <linux/ctype.h> 26#include <linux/ctype.h>
26#include <linux/sched.h> 27#include <linux/sched.h>
@@ -1625,7 +1626,7 @@ out:
1625static unsigned int mce_poll(struct file *file, poll_table *wait) 1626static unsigned int mce_poll(struct file *file, poll_table *wait)
1626{ 1627{
1627 poll_wait(file, &mce_wait, wait); 1628 poll_wait(file, &mce_wait, wait);
1628 if (rcu_dereference_check_mce(mcelog.next)) 1629 if (rcu_access_index(mcelog.next))
1629 return POLLIN | POLLRDNORM; 1630 return POLLIN | POLLRDNORM;
1630 if (!mce_apei_read_done && apei_check_mce()) 1631 if (!mce_apei_read_done && apei_check_mce())
1631 return POLLIN | POLLRDNORM; 1632 return POLLIN | POLLRDNORM;
@@ -1749,14 +1750,14 @@ static int mce_disable_error_reporting(void)
1749 return 0; 1750 return 0;
1750} 1751}
1751 1752
1752static int mce_suspend(struct sys_device *dev, pm_message_t state) 1753static int mce_suspend(void)
1753{ 1754{
1754 return mce_disable_error_reporting(); 1755 return mce_disable_error_reporting();
1755} 1756}
1756 1757
1757static int mce_shutdown(struct sys_device *dev) 1758static void mce_shutdown(void)
1758{ 1759{
1759 return mce_disable_error_reporting(); 1760 mce_disable_error_reporting();
1760} 1761}
1761 1762
1762/* 1763/*
@@ -1764,14 +1765,18 @@ static int mce_shutdown(struct sys_device *dev)
1764 * Only one CPU is active at this time, the others get re-added later using 1765 * Only one CPU is active at this time, the others get re-added later using
1765 * CPU hotplug: 1766 * CPU hotplug:
1766 */ 1767 */
1767static int mce_resume(struct sys_device *dev) 1768static void mce_resume(void)
1768{ 1769{
1769 __mcheck_cpu_init_generic(); 1770 __mcheck_cpu_init_generic();
1770 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); 1771 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
1771
1772 return 0;
1773} 1772}
1774 1773
1774static struct syscore_ops mce_syscore_ops = {
1775 .suspend = mce_suspend,
1776 .shutdown = mce_shutdown,
1777 .resume = mce_resume,
1778};
1779
1775static void mce_cpu_restart(void *data) 1780static void mce_cpu_restart(void *data)
1776{ 1781{
1777 del_timer_sync(&__get_cpu_var(mce_timer)); 1782 del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1808,9 +1813,6 @@ static void mce_enable_ce(void *all)
1808} 1813}
1809 1814
1810static struct sysdev_class mce_sysclass = { 1815static struct sysdev_class mce_sysclass = {
1811 .suspend = mce_suspend,
1812 .shutdown = mce_shutdown,
1813 .resume = mce_resume,
1814 .name = "machinecheck", 1816 .name = "machinecheck",
1815}; 1817};
1816 1818
@@ -2139,6 +2141,7 @@ static __init int mcheck_init_device(void)
2139 return err; 2141 return err;
2140 } 2142 }
2141 2143
2144 register_syscore_ops(&mce_syscore_ops);
2142 register_hotcpu_notifier(&mce_cpu_notifier); 2145 register_hotcpu_notifier(&mce_cpu_notifier);
2143 misc_register(&mce_log_device); 2146 misc_register(&mce_log_device);
2144 2147
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index bebabec5b448..929739a653d1 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -45,6 +45,7 @@
45#include <linux/cpu.h> 45#include <linux/cpu.h>
46#include <linux/pci.h> 46#include <linux/pci.h>
47#include <linux/smp.h> 47#include <linux/smp.h>
48#include <linux/syscore_ops.h>
48 49
49#include <asm/processor.h> 50#include <asm/processor.h>
50#include <asm/e820.h> 51#include <asm/e820.h>
@@ -292,14 +293,24 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
292 293
293 /* 294 /*
294 * HACK! 295 * HACK!
295 * We use this same function to initialize the mtrrs on boot. 296 *
296 * The state of the boot cpu's mtrrs has been saved, and we want 297 * We use this same function to initialize the mtrrs during boot,
297 * to replicate across all the APs. 298 * resume, runtime cpu online and on an explicit request to set a
298 * If we're doing that @reg is set to something special... 299 * specific MTRR.
300 *
301 * During boot or suspend, the state of the boot cpu's mtrrs has been
302 * saved, and we want to replicate that across all the cpus that come
303 * online (either at the end of boot or resume or during a runtime cpu
304 * online). If we're doing that, @reg is set to something special and on
305 * this cpu we still do mtrr_if->set_all(). During boot/resume, this
306 * is unnecessary if at this point we are still on the cpu that started
307 * the boot/resume sequence. But there is no guarantee that we are still
308 * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
309 * sure that we are in sync with everyone else.
299 */ 310 */
300 if (reg != ~0U) 311 if (reg != ~0U)
301 mtrr_if->set(reg, base, size, type); 312 mtrr_if->set(reg, base, size, type);
302 else if (!mtrr_aps_delayed_init) 313 else
303 mtrr_if->set_all(); 314 mtrr_if->set_all();
304 315
305 /* Wait for the others */ 316 /* Wait for the others */
@@ -630,7 +641,7 @@ struct mtrr_value {
630 641
631static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; 642static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
632 643
633static int mtrr_save(struct sys_device *sysdev, pm_message_t state) 644static int mtrr_save(void)
634{ 645{
635 int i; 646 int i;
636 647
@@ -642,7 +653,7 @@ static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
642 return 0; 653 return 0;
643} 654}
644 655
645static int mtrr_restore(struct sys_device *sysdev) 656static void mtrr_restore(void)
646{ 657{
647 int i; 658 int i;
648 659
@@ -653,12 +664,11 @@ static int mtrr_restore(struct sys_device *sysdev)
653 mtrr_value[i].ltype); 664 mtrr_value[i].ltype);
654 } 665 }
655 } 666 }
656 return 0;
657} 667}
658 668
659 669
660 670
661static struct sysdev_driver mtrr_sysdev_driver = { 671static struct syscore_ops mtrr_syscore_ops = {
662 .suspend = mtrr_save, 672 .suspend = mtrr_save,
663 .resume = mtrr_restore, 673 .resume = mtrr_restore,
664}; 674};
@@ -839,7 +849,7 @@ static int __init mtrr_init_finialize(void)
839 * TBD: is there any system with such CPU which supports 849 * TBD: is there any system with such CPU which supports
840 * suspend/resume? If no, we should remove the code. 850 * suspend/resume? If no, we should remove the code.
841 */ 851 */
842 sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver); 852 register_syscore_ops(&mtrr_syscore_ops);
843 853
844 return 0; 854 return 0;
845} 855}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 87eab4a27dfc..e638689279d3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -500,12 +500,17 @@ static bool check_hw_exists(void)
500 return true; 500 return true;
501 501
502bios_fail: 502bios_fail:
503 printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); 503 /*
504 * We still allow the PMU driver to operate:
505 */
506 printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
504 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); 507 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
505 return false; 508
509 return true;
506 510
507msr_fail: 511msr_fail:
508 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); 512 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
513
509 return false; 514 return false;
510} 515}
511 516
@@ -581,8 +586,12 @@ static int x86_setup_perfctr(struct perf_event *event)
581 return -EOPNOTSUPP; 586 return -EOPNOTSUPP;
582 } 587 }
583 588
589 /*
590 * Do not allow config1 (extended registers) to propagate,
591 * there's no sane user-space generalization yet:
592 */
584 if (attr->type == PERF_TYPE_RAW) 593 if (attr->type == PERF_TYPE_RAW)
585 return x86_pmu_extra_regs(event->attr.config, event); 594 return 0;
586 595
587 if (attr->type == PERF_TYPE_HW_CACHE) 596 if (attr->type == PERF_TYPE_HW_CACHE)
588 return set_ext_hw_attr(hwc, event); 597 return set_ext_hw_attr(hwc, event);
@@ -604,8 +613,8 @@ static int x86_setup_perfctr(struct perf_event *event)
604 /* 613 /*
605 * Branch tracing: 614 * Branch tracing:
606 */ 615 */
607 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 616 if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
608 (hwc->sample_period == 1)) { 617 !attr->freq && hwc->sample_period == 1) {
609 /* BTS is not supported by this architecture. */ 618 /* BTS is not supported by this architecture. */
610 if (!x86_pmu.bts_active) 619 if (!x86_pmu.bts_active)
611 return -EOPNOTSUPP; 620 return -EOPNOTSUPP;
@@ -912,7 +921,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
912 hwc->event_base = 0; 921 hwc->event_base = 0;
913 } else if (hwc->idx >= X86_PMC_IDX_FIXED) { 922 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
914 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; 923 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
915 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0; 924 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
916 } else { 925 } else {
917 hwc->config_base = x86_pmu_config_addr(hwc->idx); 926 hwc->config_base = x86_pmu_config_addr(hwc->idx);
918 hwc->event_base = x86_pmu_event_addr(hwc->idx); 927 hwc->event_base = x86_pmu_event_addr(hwc->idx);
@@ -1279,6 +1288,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1279 1288
1280 cpuc = &__get_cpu_var(cpu_hw_events); 1289 cpuc = &__get_cpu_var(cpu_hw_events);
1281 1290
1291 /*
1292 * Some chipsets need to unmask the LVTPC in a particular spot
1293 * inside the nmi handler. As a result, the unmasking was pushed
1294 * into all the nmi handlers.
1295 *
1296 * This generic handler doesn't seem to have any issues where the
1297 * unmasking occurs so it was left at the top.
1298 */
1299 apic_write(APIC_LVTPC, APIC_DM_NMI);
1300
1282 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1301 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1283 if (!test_bit(idx, cpuc->active_mask)) { 1302 if (!test_bit(idx, cpuc->active_mask)) {
1284 /* 1303 /*
@@ -1365,8 +1384,6 @@ perf_event_nmi_handler(struct notifier_block *self,
1365 return NOTIFY_DONE; 1384 return NOTIFY_DONE;
1366 } 1385 }
1367 1386
1368 apic_write(APIC_LVTPC, APIC_DM_NMI);
1369
1370 handled = x86_pmu.handle_irq(args->regs); 1387 handled = x86_pmu.handle_irq(args->regs);
1371 if (!handled) 1388 if (!handled)
1372 return NOTIFY_DONE; 1389 return NOTIFY_DONE;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 461f62bbd774..cf4e369cea67 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids
8 [ C(L1D) ] = { 8 [ C(L1D) ] = {
9 [ C(OP_READ) ] = { 9 [ C(OP_READ) ] = {
10 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 10 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
11 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ 11 [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
12 }, 12 },
13 [ C(OP_WRITE) ] = { 13 [ C(OP_WRITE) ] = {
14 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ 14 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
@@ -427,7 +427,9 @@ static __initconst const struct x86_pmu amd_pmu = {
427 * 427 *
428 * Exceptions: 428 * Exceptions:
429 * 429 *
430 * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)
430 * 0x003 FP PERF_CTL[3] 431 * 0x003 FP PERF_CTL[3]
432 * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)
431 * 0x00B FP PERF_CTL[3] 433 * 0x00B FP PERF_CTL[3]
432 * 0x00D FP PERF_CTL[3] 434 * 0x00D FP PERF_CTL[3]
433 * 0x023 DE PERF_CTL[2:0] 435 * 0x023 DE PERF_CTL[2:0]
@@ -448,6 +450,8 @@ static __initconst const struct x86_pmu amd_pmu = {
448 * 0x0DF LS PERF_CTL[5:0] 450 * 0x0DF LS PERF_CTL[5:0]
449 * 0x1D6 EX PERF_CTL[5:0] 451 * 0x1D6 EX PERF_CTL[5:0]
450 * 0x1D8 EX PERF_CTL[5:0] 452 * 0x1D8 EX PERF_CTL[5:0]
453 *
454 * (*) depending on the umask all FPU counters may be used
451 */ 455 */
452 456
453static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 457static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
@@ -460,18 +464,28 @@ static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
460static struct event_constraint * 464static struct event_constraint *
461amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) 465amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
462{ 466{
463 unsigned int event_code = amd_get_event_code(&event->hw); 467 struct hw_perf_event *hwc = &event->hw;
468 unsigned int event_code = amd_get_event_code(hwc);
464 469
465 switch (event_code & AMD_EVENT_TYPE_MASK) { 470 switch (event_code & AMD_EVENT_TYPE_MASK) {
466 case AMD_EVENT_FP: 471 case AMD_EVENT_FP:
467 switch (event_code) { 472 switch (event_code) {
473 case 0x000:
474 if (!(hwc->config & 0x0000F000ULL))
475 break;
476 if (!(hwc->config & 0x00000F00ULL))
477 break;
478 return &amd_f15_PMC3;
479 case 0x004:
480 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
481 break;
482 return &amd_f15_PMC3;
468 case 0x003: 483 case 0x003:
469 case 0x00B: 484 case 0x00B:
470 case 0x00D: 485 case 0x00D:
471 return &amd_f15_PMC3; 486 return &amd_f15_PMC3;
472 default:
473 return &amd_f15_PMC53;
474 } 487 }
488 return &amd_f15_PMC53;
475 case AMD_EVENT_LS: 489 case AMD_EVENT_LS:
476 case AMD_EVENT_DC: 490 case AMD_EVENT_DC:
477 case AMD_EVENT_EX_LS: 491 case AMD_EVENT_EX_LS:
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8fc2b2cee1da..e61539b07d2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -25,7 +25,7 @@ struct intel_percore {
25/* 25/*
26 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
27 */ 27 */
28static const u64 intel_perfmon_event_map[] = 28static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
29{ 29{
30 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 30 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
31 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 31 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -391,12 +391,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids
391{ 391{
392 [ C(L1D) ] = { 392 [ C(L1D) ] = {
393 [ C(OP_READ) ] = { 393 [ C(OP_READ) ] = {
394 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ 394 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
395 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ 395 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
396 }, 396 },
397 [ C(OP_WRITE) ] = { 397 [ C(OP_WRITE) ] = {
398 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ 398 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
399 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ 399 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
400 }, 400 },
401 [ C(OP_PREFETCH) ] = { 401 [ C(OP_PREFETCH) ] = {
402 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 402 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
@@ -933,6 +933,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
933 933
934 cpuc = &__get_cpu_var(cpu_hw_events); 934 cpuc = &__get_cpu_var(cpu_hw_events);
935 935
936 /*
937 * Some chipsets need to unmask the LVTPC in a particular spot
938 * inside the nmi handler. As a result, the unmasking was pushed
939 * into all the nmi handlers.
940 *
941 * This handler doesn't seem to have any issues with the unmasking
942 * so it was left at the top.
943 */
944 apic_write(APIC_LVTPC, APIC_DM_NMI);
945
936 intel_pmu_disable_all(); 946 intel_pmu_disable_all();
937 handled = intel_pmu_drain_bts_buffer(); 947 handled = intel_pmu_drain_bts_buffer();
938 status = intel_pmu_get_status(); 948 status = intel_pmu_get_status();
@@ -998,6 +1008,9 @@ intel_bts_constraints(struct perf_event *event)
998 struct hw_perf_event *hwc = &event->hw; 1008 struct hw_perf_event *hwc = &event->hw;
999 unsigned int hw_event, bts_event; 1009 unsigned int hw_event, bts_event;
1000 1010
1011 if (event->attr.freq)
1012 return NULL;
1013
1001 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 1014 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1002 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 1015 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1003 1016
@@ -1305,7 +1318,7 @@ static void intel_clovertown_quirks(void)
1305 * AJ106 could possibly be worked around by not allowing LBR 1318 * AJ106 could possibly be worked around by not allowing LBR
1306 * usage from PEBS, including the fixup. 1319 * usage from PEBS, including the fixup.
1307 * AJ68 could possibly be worked around by always programming 1320 * AJ68 could possibly be worked around by always programming
1308 * a pebs_event_reset[0] value and coping with the lost events. 1321 * a pebs_event_reset[0] value and coping with the lost events.
1309 * 1322 *
1310 * But taken together it might just make sense to not enable PEBS on 1323 * But taken together it might just make sense to not enable PEBS on
1311 * these chips. 1324 * these chips.
@@ -1409,6 +1422,18 @@ static __init int intel_pmu_init(void)
1409 x86_pmu.percore_constraints = intel_nehalem_percore_constraints; 1422 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1410 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1423 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1411 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1424 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1425
1426 if (ebx & 0x40) {
1427 /*
1428 * Erratum AAJ80 detected, we work it around by using
1429 * the BR_MISP_EXEC.ANY event. This will over-count
1430 * branch-misses, but it's still much better than the
1431 * architectural event which is often completely bogus:
1432 */
1433 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1434
1435 pr_cont("erratum AAJ80 worked around, ");
1436 }
1412 pr_cont("Nehalem events, "); 1437 pr_cont("Nehalem events, ");
1413 break; 1438 break;
1414 1439
@@ -1425,6 +1450,7 @@ static __init int intel_pmu_init(void)
1425 1450
1426 case 37: /* 32 nm nehalem, "Clarkdale" */ 1451 case 37: /* 32 nm nehalem, "Clarkdale" */
1427 case 44: /* 32 nm nehalem, "Gulftown" */ 1452 case 44: /* 32 nm nehalem, "Gulftown" */
1453 case 47: /* 32 nm Xeon E7 */
1428 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 1454 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1429 sizeof(hw_cache_event_ids)); 1455 sizeof(hw_cache_event_ids));
1430 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 1456 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 0811f5ebfba6..e93fcd55fae1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -777,6 +777,7 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
777 * the counter has reached zero value and continued counting before 777 * the counter has reached zero value and continued counting before
778 * real NMI signal was received: 778 * real NMI signal was received:
779 */ 779 */
780 rdmsrl(hwc->event_base, v);
780 if (!(v & ARCH_P4_UNFLAGGED_BIT)) 781 if (!(v & ARCH_P4_UNFLAGGED_BIT))
781 return 1; 782 return 1;
782 783
@@ -946,14 +947,23 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
946 if (!x86_perf_event_set_period(event)) 947 if (!x86_perf_event_set_period(event))
947 continue; 948 continue;
948 if (perf_event_overflow(event, 1, &data, regs)) 949 if (perf_event_overflow(event, 1, &data, regs))
949 p4_pmu_disable_event(event); 950 x86_pmu_stop(event, 0);
950 } 951 }
951 952
952 if (handled) { 953 if (handled)
953 /* p4 quirk: unmask it again */
954 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
955 inc_irq_stat(apic_perf_irqs); 954 inc_irq_stat(apic_perf_irqs);
956 } 955
956 /*
957 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
958 * been observed that the OVF bit flag has to be cleared first _before_
959 * the LVTPC can be unmasked.
960 *
961 * The reason is the NMI line will continue to be asserted while the OVF
962 * bit is set. This causes a second NMI to generate if the LVTPC is
963 * unmasked before the OVF bit is cleared, leading to unknown NMI
964 * messages.
965 */
966 apic_write(APIC_LVTPC, APIC_DM_NMI);
957 967
958 return handled; 968 return handled;
959} 969}
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index d5cd13945d5a..642f75a68cd5 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -14,9 +14,6 @@
14 14
15static void *kdump_buf_page; 15static void *kdump_buf_page;
16 16
17/* Stores the physical address of elf header of crash image. */
18unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
19
20static inline bool is_crashed_pfn_valid(unsigned long pfn) 17static inline bool is_crashed_pfn_valid(unsigned long pfn)
21{ 18{
22#ifndef CONFIG_X86_PAE 19#ifndef CONFIG_X86_PAE
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 994828899e09..afa64adb75ee 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,9 +10,6 @@
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/io.h> 11#include <linux/io.h>
12 12
13/* Stores the physical address of elf header of crash image. */
14unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
15
16/** 13/**
17 * copy_oldmem_page - copy one page from "oldmem" 14 * copy_oldmem_page - copy one page from "oldmem"
18 * @pfn: page frame number to be copied 15 * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 7a8cebc9ff29..e90f08458e6b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -65,12 +65,10 @@ unsigned int irq_create_of_mapping(struct device_node *controller,
65 return 0; 65 return 0;
66 ret = ih->xlate(ih, intspec, intsize, &virq, &type); 66 ret = ih->xlate(ih, intspec, intsize, &virq, &type);
67 if (ret) 67 if (ret)
68 return ret; 68 return 0;
69 if (type == IRQ_TYPE_NONE) 69 if (type == IRQ_TYPE_NONE)
70 return virq; 70 return virq;
71 /* set the mask if it is different from current */ 71 irq_set_irq_type(virq, type);
72 if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
73 set_irq_type(virq, type);
74 return virq; 72 return virq;
75} 73}
76EXPORT_SYMBOL_GPL(irq_create_of_mapping); 74EXPORT_SYMBOL_GPL(irq_create_of_mapping);
@@ -393,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
393 391
394 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); 392 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
395 393
396 return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr); 394 return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
397} 395}
398 396
399static void __init ioapic_add_ofnode(struct device_node *np) 397static void __init ioapic_add_ofnode(struct device_node *np)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 81ac6c78c01c..e2a3f0606da4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -27,7 +27,7 @@ static int die_counter;
27 27
28void printk_address(unsigned long address, int reliable) 28void printk_address(unsigned long address, int reliable)
29{ 29{
30 printk(" [<%p>] %s%pS\n", (void *) address, 30 printk(" [<%p>] %s%pB\n", (void *) address,
31 reliable ? "" : "? ", (void *) address); 31 reliable ? "" : "? ", (void *) address);
32} 32}
33 33
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index cdf5bfd9d4d5..3e2ef8425316 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/crash_dump.h>
14#include <linux/bootmem.h> 15#include <linux/bootmem.h>
15#include <linux/pfn.h> 16#include <linux/pfn.h>
16#include <linux/suspend.h> 17#include <linux/suspend.h>
diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index b42ca694dc68..8eeaa81de066 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c
@@ -10,7 +10,7 @@
10 */ 10 */
11 11
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/sysdev.h> 13#include <linux/syscore_ops.h>
14 14
15#include <asm/dma.h> 15#include <asm/dma.h>
16 16
@@ -21,7 +21,7 @@
21 * in asm/dma.h. 21 * in asm/dma.h.
22 */ 22 */
23 23
24static int i8237A_resume(struct sys_device *dev) 24static void i8237A_resume(void)
25{ 25{
26 unsigned long flags; 26 unsigned long flags;
27 int i; 27 int i;
@@ -41,31 +41,15 @@ static int i8237A_resume(struct sys_device *dev)
41 enable_dma(4); 41 enable_dma(4);
42 42
43 release_dma_lock(flags); 43 release_dma_lock(flags);
44
45 return 0;
46} 44}
47 45
48static int i8237A_suspend(struct sys_device *dev, pm_message_t state) 46static struct syscore_ops i8237_syscore_ops = {
49{
50 return 0;
51}
52
53static struct sysdev_class i8237_sysdev_class = {
54 .name = "i8237",
55 .suspend = i8237A_suspend,
56 .resume = i8237A_resume, 47 .resume = i8237A_resume,
57}; 48};
58 49
59static struct sys_device device_i8237A = { 50static int __init i8237A_init_ops(void)
60 .id = 0,
61 .cls = &i8237_sysdev_class,
62};
63
64static int __init i8237A_init_sysfs(void)
65{ 51{
66 int error = sysdev_class_register(&i8237_sysdev_class); 52 register_syscore_ops(&i8237_syscore_ops);
67 if (!error) 53 return 0;
68 error = sysdev_register(&device_i8237A);
69 return error;
70} 54}
71device_initcall(i8237A_init_sysfs); 55device_initcall(i8237A_init_ops);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index d9ca749c123b..65b8f5c2eebf 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -8,7 +8,7 @@
8#include <linux/random.h> 8#include <linux/random.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/kernel_stat.h> 10#include <linux/kernel_stat.h>
11#include <linux/sysdev.h> 11#include <linux/syscore_ops.h>
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include <linux/acpi.h> 13#include <linux/acpi.h>
14#include <linux/io.h> 14#include <linux/io.h>
@@ -245,20 +245,19 @@ static void save_ELCR(char *trigger)
245 trigger[1] = inb(0x4d1) & 0xDE; 245 trigger[1] = inb(0x4d1) & 0xDE;
246} 246}
247 247
248static int i8259A_resume(struct sys_device *dev) 248static void i8259A_resume(void)
249{ 249{
250 init_8259A(i8259A_auto_eoi); 250 init_8259A(i8259A_auto_eoi);
251 restore_ELCR(irq_trigger); 251 restore_ELCR(irq_trigger);
252 return 0;
253} 252}
254 253
255static int i8259A_suspend(struct sys_device *dev, pm_message_t state) 254static int i8259A_suspend(void)
256{ 255{
257 save_ELCR(irq_trigger); 256 save_ELCR(irq_trigger);
258 return 0; 257 return 0;
259} 258}
260 259
261static int i8259A_shutdown(struct sys_device *dev) 260static void i8259A_shutdown(void)
262{ 261{
263 /* Put the i8259A into a quiescent state that 262 /* Put the i8259A into a quiescent state that
264 * the kernel initialization code can get it 263 * the kernel initialization code can get it
@@ -266,21 +265,14 @@ static int i8259A_shutdown(struct sys_device *dev)
266 */ 265 */
267 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 266 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
268 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ 267 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
269 return 0;
270} 268}
271 269
272static struct sysdev_class i8259_sysdev_class = { 270static struct syscore_ops i8259_syscore_ops = {
273 .name = "i8259",
274 .suspend = i8259A_suspend, 271 .suspend = i8259A_suspend,
275 .resume = i8259A_resume, 272 .resume = i8259A_resume,
276 .shutdown = i8259A_shutdown, 273 .shutdown = i8259A_shutdown,
277}; 274};
278 275
279static struct sys_device device_i8259A = {
280 .id = 0,
281 .cls = &i8259_sysdev_class,
282};
283
284static void mask_8259A(void) 276static void mask_8259A(void)
285{ 277{
286 unsigned long flags; 278 unsigned long flags;
@@ -399,17 +391,12 @@ struct legacy_pic default_legacy_pic = {
399 391
400struct legacy_pic *legacy_pic = &default_legacy_pic; 392struct legacy_pic *legacy_pic = &default_legacy_pic;
401 393
402static int __init i8259A_init_sysfs(void) 394static int __init i8259A_init_ops(void)
403{ 395{
404 int error; 396 if (legacy_pic == &default_legacy_pic)
405 397 register_syscore_ops(&i8259_syscore_ops);
406 if (legacy_pic != &default_legacy_pic)
407 return 0;
408 398
409 error = sysdev_class_register(&i8259_sysdev_class); 399 return 0;
410 if (!error)
411 error = sysdev_register(&device_i8259A);
412 return error;
413} 400}
414 401
415device_initcall(i8259A_init_sysfs); 402device_initcall(i8259A_init_ops);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 948a31eae75f..1cb0b9fc78dc 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -8,6 +8,7 @@
8#include <linux/seq_file.h> 8#include <linux/seq_file.h>
9#include <linux/smp.h> 9#include <linux/smp.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <linux/delay.h>
11 12
12#include <asm/apic.h> 13#include <asm/apic.h>
13#include <asm/io_apic.h> 14#include <asm/io_apic.h>
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index dba0b36941a5..5f9ecff328b5 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -121,8 +121,8 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
121 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, 121 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
122 dbg_reg_def[regno].size); 122 dbg_reg_def[regno].size);
123 123
124 switch (regno) {
125#ifdef CONFIG_X86_32 124#ifdef CONFIG_X86_32
125 switch (regno) {
126 case GDB_SS: 126 case GDB_SS:
127 if (!user_mode_vm(regs)) 127 if (!user_mode_vm(regs))
128 *(unsigned long *)mem = __KERNEL_DS; 128 *(unsigned long *)mem = __KERNEL_DS;
@@ -135,8 +135,8 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
135 case GDB_FS: 135 case GDB_FS:
136 *(unsigned long *)mem = 0xFFFF; 136 *(unsigned long *)mem = 0xFFFF;
137 break; 137 break;
138#endif
139 } 138 }
139#endif
140 return dbg_reg_def[regno].name; 140 return dbg_reg_def[regno].name;
141} 141}
142 142
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 87af68e0e1e1..f9242800bc84 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -82,6 +82,7 @@
82#include <linux/cpu.h> 82#include <linux/cpu.h>
83#include <linux/fs.h> 83#include <linux/fs.h>
84#include <linux/mm.h> 84#include <linux/mm.h>
85#include <linux/syscore_ops.h>
85 86
86#include <asm/microcode.h> 87#include <asm/microcode.h>
87#include <asm/processor.h> 88#include <asm/processor.h>
@@ -438,33 +439,25 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
438 return 0; 439 return 0;
439} 440}
440 441
441static int mc_sysdev_resume(struct sys_device *dev) 442static struct sysdev_driver mc_sysdev_driver = {
443 .add = mc_sysdev_add,
444 .remove = mc_sysdev_remove,
445};
446
447/**
448 * mc_bp_resume - Update boot CPU microcode during resume.
449 */
450static void mc_bp_resume(void)
442{ 451{
443 int cpu = dev->id; 452 int cpu = smp_processor_id();
444 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 453 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
445 454
446 if (!cpu_online(cpu))
447 return 0;
448
449 /*
450 * All non-bootup cpus are still disabled,
451 * so only CPU 0 will apply ucode here.
452 *
453 * Moreover, there can be no concurrent
454 * updates from any other places at this point.
455 */
456 WARN_ON(cpu != 0);
457
458 if (uci->valid && uci->mc) 455 if (uci->valid && uci->mc)
459 microcode_ops->apply_microcode(cpu); 456 microcode_ops->apply_microcode(cpu);
460
461 return 0;
462} 457}
463 458
464static struct sysdev_driver mc_sysdev_driver = { 459static struct syscore_ops mc_syscore_ops = {
465 .add = mc_sysdev_add, 460 .resume = mc_bp_resume,
466 .remove = mc_sysdev_remove,
467 .resume = mc_sysdev_resume,
468}; 461};
469 462
470static __cpuinit int 463static __cpuinit int
@@ -542,6 +535,7 @@ static int __init microcode_init(void)
542 if (error) 535 if (error)
543 return error; 536 return error;
544 537
538 register_syscore_ops(&mc_syscore_ops);
545 register_hotcpu_notifier(&mc_cpu_notifier); 539 register_hotcpu_notifier(&mc_cpu_notifier);
546 540
547 pr_info("Microcode Update Driver: v" MICROCODE_VERSION 541 pr_info("Microcode Update Driver: v" MICROCODE_VERSION
@@ -556,6 +550,7 @@ static void __exit microcode_exit(void)
556 microcode_dev_exit(); 550 microcode_dev_exit();
557 551
558 unregister_hotcpu_notifier(&mc_cpu_notifier); 552 unregister_hotcpu_notifier(&mc_cpu_notifier);
553 unregister_syscore_ops(&mc_syscore_ops);
559 554
560 get_online_cpus(); 555 get_online_cpus();
561 mutex_lock(&microcode_mutex); 556 mutex_lock(&microcode_mutex);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index ef32d4c09c64..ef59817357fc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -714,10 +714,6 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
714 *nr_m_spare += 1; 714 *nr_m_spare += 1;
715 } 715 }
716} 716}
717#else /* CONFIG_X86_IO_APIC */
718static
719inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
720#endif /* CONFIG_X86_IO_APIC */
721 717
722static int 718static int
723check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) 719check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
@@ -729,6 +725,10 @@ check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
729 725
730 return 0; 726 return 0;
731} 727}
728#else /* CONFIG_X86_IO_APIC */
729static
730inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
731#endif /* CONFIG_X86_IO_APIC */
732 732
733static int __init replace_intsrc_all(struct mpc_table *mpc, 733static int __init replace_intsrc_all(struct mpc_table *mpc,
734 unsigned long mpc_new_phys, 734 unsigned long mpc_new_phys,
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c01ffa5b9b87..b117efd24f71 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -27,7 +27,7 @@
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/scatterlist.h> 28#include <linux/scatterlist.h>
29#include <linux/iommu-helper.h> 29#include <linux/iommu-helper.h>
30#include <linux/sysdev.h> 30#include <linux/syscore_ops.h>
31#include <linux/io.h> 31#include <linux/io.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <asm/atomic.h> 33#include <asm/atomic.h>
@@ -81,6 +81,9 @@ static u32 gart_unmapped_entry;
81#define AGPEXTERN 81#define AGPEXTERN
82#endif 82#endif
83 83
84/* GART can only remap to physical addresses < 1TB */
85#define GART_MAX_PHYS_ADDR (1ULL << 40)
86
84/* backdoor interface to AGP driver */ 87/* backdoor interface to AGP driver */
85AGPEXTERN int agp_memory_reserved; 88AGPEXTERN int agp_memory_reserved;
86AGPEXTERN __u32 *agp_gatt_table; 89AGPEXTERN __u32 *agp_gatt_table;
@@ -212,9 +215,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
212 size_t size, int dir, unsigned long align_mask) 215 size_t size, int dir, unsigned long align_mask)
213{ 216{
214 unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); 217 unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
215 unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); 218 unsigned long iommu_page;
216 int i; 219 int i;
217 220
221 if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
222 return bad_dma_addr;
223
224 iommu_page = alloc_iommu(dev, npages, align_mask);
218 if (iommu_page == -1) { 225 if (iommu_page == -1) {
219 if (!nonforced_iommu(dev, phys_mem, size)) 226 if (!nonforced_iommu(dev, phys_mem, size))
220 return phys_mem; 227 return phys_mem;
@@ -589,7 +596,7 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
589 aperture_alloc = aper_alloc; 596 aperture_alloc = aper_alloc;
590} 597}
591 598
592static void gart_fixup_northbridges(struct sys_device *dev) 599static void gart_fixup_northbridges(void)
593{ 600{
594 int i; 601 int i;
595 602
@@ -613,33 +620,20 @@ static void gart_fixup_northbridges(struct sys_device *dev)
613 } 620 }
614} 621}
615 622
616static int gart_resume(struct sys_device *dev) 623static void gart_resume(void)
617{ 624{
618 pr_info("PCI-DMA: Resuming GART IOMMU\n"); 625 pr_info("PCI-DMA: Resuming GART IOMMU\n");
619 626
620 gart_fixup_northbridges(dev); 627 gart_fixup_northbridges();
621 628
622 enable_gart_translations(); 629 enable_gart_translations();
623
624 return 0;
625} 630}
626 631
627static int gart_suspend(struct sys_device *dev, pm_message_t state) 632static struct syscore_ops gart_syscore_ops = {
628{
629 return 0;
630}
631
632static struct sysdev_class gart_sysdev_class = {
633 .name = "gart",
634 .suspend = gart_suspend,
635 .resume = gart_resume, 633 .resume = gart_resume,
636 634
637}; 635};
638 636
639static struct sys_device device_gart = {
640 .cls = &gart_sysdev_class,
641};
642
643/* 637/*
644 * Private Northbridge GATT initialization in case we cannot use the 638 * Private Northbridge GATT initialization in case we cannot use the
645 * AGP driver for some reason. 639 * AGP driver for some reason.
@@ -650,7 +644,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
650 unsigned aper_base, new_aper_base; 644 unsigned aper_base, new_aper_base;
651 struct pci_dev *dev; 645 struct pci_dev *dev;
652 void *gatt; 646 void *gatt;
653 int i, error; 647 int i;
654 648
655 pr_info("PCI-DMA: Disabling AGP.\n"); 649 pr_info("PCI-DMA: Disabling AGP.\n");
656 650
@@ -685,12 +679,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
685 679
686 agp_gatt_table = gatt; 680 agp_gatt_table = gatt;
687 681
688 error = sysdev_class_register(&gart_sysdev_class); 682 register_syscore_ops(&gart_syscore_ops);
689 if (!error)
690 error = sysdev_register(&device_gart);
691 if (error)
692 panic("Could not register gart_sysdev -- "
693 "would corrupt data on next suspend");
694 683
695 flush_gart(); 684 flush_gart();
696 685
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bd387e8f73b4..6c9dd922ac0d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -501,6 +501,10 @@ void set_personality_64bit(void)
501 /* Make sure to be in 64bit mode */ 501 /* Make sure to be in 64bit mode */
502 clear_thread_flag(TIF_IA32); 502 clear_thread_flag(TIF_IA32);
503 503
504 /* Ensure the corresponding mm is not marked. */
505 if (current->mm)
506 current->mm->context.ia32_compat = 0;
507
504 /* TBD: overwrites user setup. Should have two bits. 508 /* TBD: overwrites user setup. Should have two bits.
505 But 64bit processes have always behaved this way, 509 But 64bit processes have always behaved this way,
506 so it's not too bad. The main problem is just that 510 so it's not too bad. The main problem is just that
@@ -516,6 +520,10 @@ void set_personality_ia32(void)
516 set_thread_flag(TIF_IA32); 520 set_thread_flag(TIF_IA32);
517 current->personality |= force_personality32; 521 current->personality |= force_personality32;
518 522
523 /* Mark the associated mm as containing 32-bit tasks. */
524 if (current->mm)
525 current->mm->context.ia32_compat = 1;
526
519 /* Prepare the first "return" to user space */ 527 /* Prepare the first "return" to user space */
520 current_thread_info()->status |= TS_COMPAT; 528 current_thread_info()->status |= TS_COMPAT;
521} 529}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index d3ce37edb54d..08c44b08bf5b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -6,6 +6,7 @@
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/tboot.h> 8#include <linux/tboot.h>
9#include <linux/delay.h>
9#include <acpi/reboot.h> 10#include <acpi/reboot.h>
10#include <asm/io.h> 11#include <asm/io.h>
11#include <asm/apic.h> 12#include <asm/apic.h>
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
index 29092b38d816..1d5c46df0d78 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/kernel/reboot_32.S
@@ -21,26 +21,26 @@ r_base = .
21 /* Get our own relocated address */ 21 /* Get our own relocated address */
22 call 1f 22 call 1f
231: popl %ebx 231: popl %ebx
24 subl $1b, %ebx 24 subl $(1b - r_base), %ebx
25 25
26 /* Compute the equivalent real-mode segment */ 26 /* Compute the equivalent real-mode segment */
27 movl %ebx, %ecx 27 movl %ebx, %ecx
28 shrl $4, %ecx 28 shrl $4, %ecx
29 29
30 /* Patch post-real-mode segment jump */ 30 /* Patch post-real-mode segment jump */
31 movw dispatch_table(%ebx,%eax,2),%ax 31 movw (dispatch_table - r_base)(%ebx,%eax,2),%ax
32 movw %ax, 101f(%ebx) 32 movw %ax, (101f - r_base)(%ebx)
33 movw %cx, 102f(%ebx) 33 movw %cx, (102f - r_base)(%ebx)
34 34
35 /* Set up the IDT for real mode. */ 35 /* Set up the IDT for real mode. */
36 lidtl machine_real_restart_idt(%ebx) 36 lidtl (machine_real_restart_idt - r_base)(%ebx)
37 37
38 /* 38 /*
39 * Set up a GDT from which we can load segment descriptors for real 39 * Set up a GDT from which we can load segment descriptors for real
40 * mode. The GDT is not used in real mode; it is just needed here to 40 * mode. The GDT is not used in real mode; it is just needed here to
41 * prepare the descriptors. 41 * prepare the descriptors.
42 */ 42 */
43 lgdtl machine_real_restart_gdt(%ebx) 43 lgdtl (machine_real_restart_gdt - r_base)(%ebx)
44 44
45 /* 45 /*
46 * Load the data segment registers with 16-bit compatible values 46 * Load the data segment registers with 16-bit compatible values
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 32bd87cbf982..4be9b398470e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -619,28 +619,6 @@ void __init reserve_standard_io_resources(void)
619 619
620} 620}
621 621
622/*
623 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
624 * is_kdump_kernel() to determine if we are booting after a panic. Hence
625 * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
626 */
627
628#ifdef CONFIG_CRASH_DUMP
629/* elfcorehdr= specifies the location of elf core header
630 * stored by the crashed kernel. This option will be passed
631 * by kexec loader to the capture kernel.
632 */
633static int __init setup_elfcorehdr(char *arg)
634{
635 char *end;
636 if (!arg)
637 return -EINVAL;
638 elfcorehdr_addr = memparse(arg, &end);
639 return end > arg ? 0 : -EINVAL;
640}
641early_param("elfcorehdr", setup_elfcorehdr);
642#endif
643
644static __init void reserve_ibft_region(void) 622static __init void reserve_ibft_region(void)
645{ 623{
646 unsigned long addr, size = 0; 624 unsigned long addr, size = 0;
@@ -998,6 +976,11 @@ void __init setup_arch(char **cmdline_p)
998 paging_init(); 976 paging_init();
999 x86_init.paging.pagetable_setup_done(swapper_pg_dir); 977 x86_init.paging.pagetable_setup_done(swapper_pg_dir);
1000 978
979 if (boot_cpu_data.cpuid_level >= 0) {
980 /* A CPU has %cr4 if and only if it has CPUID */
981 mmu_cr4_features = read_cr4();
982 }
983
1001#ifdef CONFIG_X86_32 984#ifdef CONFIG_X86_32
1002 /* sync back kernel address range */ 985 /* sync back kernel address range */
1003 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, 986 clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 58f517b59645..934b4c6b0bf9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2395,9 +2395,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2395 int i; 2395 int i;
2396 2396
2397 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 2397 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2398 for (i = 1; *nent < maxnent; ++i) { 2398 for (i = 1; *nent < maxnent && i < 64; ++i) {
2399 if (entry[i - 1].eax == 0 && i != 2) 2399 if (entry[i].eax == 0)
2400 break; 2400 continue;
2401 do_cpuid_1_ent(&entry[i], function, i); 2401 do_cpuid_1_ent(&entry[i], function, i);
2402 entry[i].flags |= 2402 entry[i].flags |=
2403 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 2403 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
@@ -4958,12 +4958,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
4958 best = e; 4958 best = e;
4959 break; 4959 break;
4960 } 4960 }
4961 /*
4962 * Both basic or both extended?
4963 */
4964 if (((e->function ^ function) & 0x80000000) == 0)
4965 if (!best || e->function > best->function)
4966 best = e;
4967 } 4961 }
4968 return best; 4962 return best;
4969} 4963}
@@ -4983,6 +4977,27 @@ not_found:
4983 return 36; 4977 return 36;
4984} 4978}
4985 4979
4980/*
4981 * If no match is found, check whether we exceed the vCPU's limit
4982 * and return the content of the highest valid _standard_ leaf instead.
4983 * This is to satisfy the CPUID specification.
4984 */
4985static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
4986 u32 function, u32 index)
4987{
4988 struct kvm_cpuid_entry2 *maxlevel;
4989
4990 maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
4991 if (!maxlevel || maxlevel->eax >= function)
4992 return NULL;
4993 if (function & 0x80000000) {
4994 maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
4995 if (!maxlevel)
4996 return NULL;
4997 }
4998 return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
4999}
5000
4986void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 5001void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
4987{ 5002{
4988 u32 function, index; 5003 u32 function, index;
@@ -4995,6 +5010,10 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
4995 kvm_register_write(vcpu, VCPU_REGS_RCX, 0); 5010 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
4996 kvm_register_write(vcpu, VCPU_REGS_RDX, 0); 5011 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
4997 best = kvm_find_cpuid_entry(vcpu, function, index); 5012 best = kvm_find_cpuid_entry(vcpu, function, index);
5013
5014 if (!best)
5015 best = check_cpuid_limit(vcpu, function, index);
5016
4998 if (best) { 5017 if (best) {
4999 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); 5018 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
5000 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); 5019 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 3e8b08a6de2b..1e572c507d06 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -10,6 +10,12 @@
10#include <asm/frame.h> 10#include <asm/frame.h>
11#include <asm/dwarf2.h> 11#include <asm/dwarf2.h>
12 12
13#ifdef CONFIG_SMP
14#define SEG_PREFIX %gs:
15#else
16#define SEG_PREFIX
17#endif
18
13.text 19.text
14 20
15/* 21/*
@@ -37,13 +43,13 @@ this_cpu_cmpxchg16b_emu:
37 pushf 43 pushf
38 cli 44 cli
39 45
40 cmpq %gs:(%rsi), %rax 46 cmpq SEG_PREFIX(%rsi), %rax
41 jne not_same 47 jne not_same
42 cmpq %gs:8(%rsi), %rdx 48 cmpq SEG_PREFIX 8(%rsi), %rdx
43 jne not_same 49 jne not_same
44 50
45 movq %rbx, %gs:(%rsi) 51 movq %rbx, SEG_PREFIX(%rsi)
46 movq %rcx, %gs:8(%rsi) 52 movq %rcx, SEG_PREFIX 8(%rsi)
47 53
48 popf 54 popf
49 mov $1, %al 55 mov $1, %al
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2362b646178e..794233587287 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -862,18 +862,18 @@ static struct vm_area_struct gate_vma = {
862 .vm_flags = VM_READ | VM_EXEC 862 .vm_flags = VM_READ | VM_EXEC
863}; 863};
864 864
865struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 865struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
866{ 866{
867#ifdef CONFIG_IA32_EMULATION 867#ifdef CONFIG_IA32_EMULATION
868 if (test_tsk_thread_flag(tsk, TIF_IA32)) 868 if (!mm || mm->context.ia32_compat)
869 return NULL; 869 return NULL;
870#endif 870#endif
871 return &gate_vma; 871 return &gate_vma;
872} 872}
873 873
874int in_gate_area(struct task_struct *task, unsigned long addr) 874int in_gate_area(struct mm_struct *mm, unsigned long addr)
875{ 875{
876 struct vm_area_struct *vma = get_gate_vma(task); 876 struct vm_area_struct *vma = get_gate_vma(mm);
877 877
878 if (!vma) 878 if (!vma)
879 return 0; 879 return 0;
@@ -882,11 +882,11 @@ int in_gate_area(struct task_struct *task, unsigned long addr)
882} 882}
883 883
884/* 884/*
885 * Use this when you have no reliable task/vma, typically from interrupt 885 * Use this when you have no reliable mm, typically from interrupt
886 * context. It is less reliable than using the task's vma and may give 886 * context. It is less reliable than using a task's mm and may give
887 * false positives: 887 * false positives.
888 */ 888 */
889int in_gate_area_no_task(unsigned long addr) 889int in_gate_area_no_mm(unsigned long addr)
890{ 890{
891 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); 891 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
892} 892}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9559d360fde7..745258dfc4dc 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -213,53 +213,48 @@ int early_cpu_to_node(int cpu)
213 return per_cpu(x86_cpu_to_node_map, cpu); 213 return per_cpu(x86_cpu_to_node_map, cpu);
214} 214}
215 215
216struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) 216void debug_cpumask_set_cpu(int cpu, int node, bool enable)
217{ 217{
218 int node = early_cpu_to_node(cpu);
219 struct cpumask *mask; 218 struct cpumask *mask;
220 char buf[64]; 219 char buf[64];
221 220
222 if (node == NUMA_NO_NODE) { 221 if (node == NUMA_NO_NODE) {
223 /* early_cpu_to_node() already emits a warning and trace */ 222 /* early_cpu_to_node() already emits a warning and trace */
224 return NULL; 223 return;
225 } 224 }
226 mask = node_to_cpumask_map[node]; 225 mask = node_to_cpumask_map[node];
227 if (!mask) { 226 if (!mask) {
228 pr_err("node_to_cpumask_map[%i] NULL\n", node); 227 pr_err("node_to_cpumask_map[%i] NULL\n", node);
229 dump_stack(); 228 dump_stack();
230 return NULL; 229 return;
231 } 230 }
232 231
232 if (enable)
233 cpumask_set_cpu(cpu, mask);
234 else
235 cpumask_clear_cpu(cpu, mask);
236
233 cpulist_scnprintf(buf, sizeof(buf), mask); 237 cpulist_scnprintf(buf, sizeof(buf), mask);
234 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 238 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
235 enable ? "numa_add_cpu" : "numa_remove_cpu", 239 enable ? "numa_add_cpu" : "numa_remove_cpu",
236 cpu, node, buf); 240 cpu, node, buf);
237 return mask; 241 return;
238} 242}
239 243
240# ifndef CONFIG_NUMA_EMU 244# ifndef CONFIG_NUMA_EMU
241static void __cpuinit numa_set_cpumask(int cpu, int enable) 245static void __cpuinit numa_set_cpumask(int cpu, bool enable)
242{ 246{
243 struct cpumask *mask; 247 debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
244
245 mask = debug_cpumask_set_cpu(cpu, enable);
246 if (!mask)
247 return;
248
249 if (enable)
250 cpumask_set_cpu(cpu, mask);
251 else
252 cpumask_clear_cpu(cpu, mask);
253} 248}
254 249
255void __cpuinit numa_add_cpu(int cpu) 250void __cpuinit numa_add_cpu(int cpu)
256{ 251{
257 numa_set_cpumask(cpu, 1); 252 numa_set_cpumask(cpu, true);
258} 253}
259 254
260void __cpuinit numa_remove_cpu(int cpu) 255void __cpuinit numa_remove_cpu(int cpu)
261{ 256{
262 numa_set_cpumask(cpu, 0); 257 numa_set_cpumask(cpu, false);
263} 258}
264# endif /* !CONFIG_NUMA_EMU */ 259# endif /* !CONFIG_NUMA_EMU */
265 260
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc72033..85b52fc03084 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -306,7 +306,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
306 bi->end = min(bi->end, high); 306 bi->end = min(bi->end, high);
307 307
308 /* and there's no empty block */ 308 /* and there's no empty block */
309 if (bi->start == bi->end) { 309 if (bi->start >= bi->end) {
310 numa_remove_memblk_from(i--, mi); 310 numa_remove_memblk_from(i--, mi);
311 continue; 311 continue;
312 } 312 }
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index ad091e4cff17..de84cc140379 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -454,10 +454,9 @@ void __cpuinit numa_remove_cpu(int cpu)
454 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); 454 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
455} 455}
456#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ 456#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
457static void __cpuinit numa_set_cpumask(int cpu, int enable) 457static void __cpuinit numa_set_cpumask(int cpu, bool enable)
458{ 458{
459 struct cpumask *mask; 459 int nid, physnid;
460 int nid, physnid, i;
461 460
462 nid = early_cpu_to_node(cpu); 461 nid = early_cpu_to_node(cpu);
463 if (nid == NUMA_NO_NODE) { 462 if (nid == NUMA_NO_NODE) {
@@ -467,28 +466,21 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
467 466
468 physnid = emu_nid_to_phys[nid]; 467 physnid = emu_nid_to_phys[nid];
469 468
470 for_each_online_node(i) { 469 for_each_online_node(nid) {
471 if (emu_nid_to_phys[nid] != physnid) 470 if (emu_nid_to_phys[nid] != physnid)
472 continue; 471 continue;
473 472
474 mask = debug_cpumask_set_cpu(cpu, enable); 473 debug_cpumask_set_cpu(cpu, nid, enable);
475 if (!mask)
476 return;
477
478 if (enable)
479 cpumask_set_cpu(cpu, mask);
480 else
481 cpumask_clear_cpu(cpu, mask);
482 } 474 }
483} 475}
484 476
485void __cpuinit numa_add_cpu(int cpu) 477void __cpuinit numa_add_cpu(int cpu)
486{ 478{
487 numa_set_cpumask(cpu, 1); 479 numa_set_cpumask(cpu, true);
488} 480}
489 481
490void __cpuinit numa_remove_cpu(int cpu) 482void __cpuinit numa_remove_cpu(int cpu)
491{ 483{
492 numa_set_cpumask(cpu, 0); 484 numa_set_cpumask(cpu, false);
493} 485}
494#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 486#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 48651c6f657d..364f36bdfad8 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -211,10 +211,12 @@ int __init get_memcfg_from_srat(void)
211{ 211{
212 int i, j, nid; 212 int i, j, nid;
213 213
214
215 if (srat_disabled()) 214 if (srat_disabled())
216 goto out_fail; 215 goto out_fail;
217 216
217 if (acpi_numa_init() < 0)
218 goto out_fail;
219
218 if (num_memory_chunks == 0) { 220 if (num_memory_chunks == 0) {
219 printk(KERN_DEBUG 221 printk(KERN_DEBUG
220 "could not find any ACPI SRAT memory areas.\n"); 222 "could not find any ACPI SRAT memory areas.\n");
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index e2b7b0c06cdf..cf9750004a08 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -15,7 +15,7 @@
15#include <linux/notifier.h> 15#include <linux/notifier.h>
16#include <linux/smp.h> 16#include <linux/smp.h>
17#include <linux/oprofile.h> 17#include <linux/oprofile.h>
18#include <linux/sysdev.h> 18#include <linux/syscore_ops.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/moduleparam.h> 20#include <linux/moduleparam.h>
21#include <linux/kdebug.h> 21#include <linux/kdebug.h>
@@ -49,6 +49,10 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
49 val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; 49 val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
50 val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; 50 val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
51 val |= (counter_config->unit_mask & 0xFF) << 8; 51 val |= (counter_config->unit_mask & 0xFF) << 8;
52 counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV |
53 ARCH_PERFMON_EVENTSEL_EDGE |
54 ARCH_PERFMON_EVENTSEL_CMASK);
55 val |= counter_config->extra;
52 event &= model->event_mask ? model->event_mask : 0xFF; 56 event &= model->event_mask ? model->event_mask : 0xFF;
53 val |= event & 0xFF; 57 val |= event & 0xFF;
54 val |= (event & 0x0F00) << 24; 58 val |= (event & 0x0F00) << 24;
@@ -440,6 +444,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
440 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 444 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
441 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 445 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
442 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 446 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
447 oprofilefs_create_ulong(sb, dir, "extra", &counter_config[i].extra);
443 } 448 }
444 449
445 return 0; 450 return 0;
@@ -536,7 +541,7 @@ static void nmi_shutdown(void)
536 541
537#ifdef CONFIG_PM 542#ifdef CONFIG_PM
538 543
539static int nmi_suspend(struct sys_device *dev, pm_message_t state) 544static int nmi_suspend(void)
540{ 545{
541 /* Only one CPU left, just stop that one */ 546 /* Only one CPU left, just stop that one */
542 if (nmi_enabled == 1) 547 if (nmi_enabled == 1)
@@ -544,49 +549,31 @@ static int nmi_suspend(struct sys_device *dev, pm_message_t state)
544 return 0; 549 return 0;
545} 550}
546 551
547static int nmi_resume(struct sys_device *dev) 552static void nmi_resume(void)
548{ 553{
549 if (nmi_enabled == 1) 554 if (nmi_enabled == 1)
550 nmi_cpu_start(NULL); 555 nmi_cpu_start(NULL);
551 return 0;
552} 556}
553 557
554static struct sysdev_class oprofile_sysclass = { 558static struct syscore_ops oprofile_syscore_ops = {
555 .name = "oprofile",
556 .resume = nmi_resume, 559 .resume = nmi_resume,
557 .suspend = nmi_suspend, 560 .suspend = nmi_suspend,
558}; 561};
559 562
560static struct sys_device device_oprofile = { 563static void __init init_suspend_resume(void)
561 .id = 0,
562 .cls = &oprofile_sysclass,
563};
564
565static int __init init_sysfs(void)
566{ 564{
567 int error; 565 register_syscore_ops(&oprofile_syscore_ops);
568
569 error = sysdev_class_register(&oprofile_sysclass);
570 if (error)
571 return error;
572
573 error = sysdev_register(&device_oprofile);
574 if (error)
575 sysdev_class_unregister(&oprofile_sysclass);
576
577 return error;
578} 566}
579 567
580static void exit_sysfs(void) 568static void exit_suspend_resume(void)
581{ 569{
582 sysdev_unregister(&device_oprofile); 570 unregister_syscore_ops(&oprofile_syscore_ops);
583 sysdev_class_unregister(&oprofile_sysclass);
584} 571}
585 572
586#else 573#else
587 574
588static inline int init_sysfs(void) { return 0; } 575static inline void init_suspend_resume(void) { }
589static inline void exit_sysfs(void) { } 576static inline void exit_suspend_resume(void) { }
590 577
591#endif /* CONFIG_PM */ 578#endif /* CONFIG_PM */
592 579
@@ -789,9 +776,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
789 776
790 mux_init(ops); 777 mux_init(ops);
791 778
792 ret = init_sysfs(); 779 init_suspend_resume();
793 if (ret)
794 return ret;
795 780
796 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 781 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
797 return 0; 782 return 0;
@@ -799,5 +784,5 @@ int __init op_nmi_init(struct oprofile_operations *ops)
799 784
800void op_nmi_exit(void) 785void op_nmi_exit(void)
801{ 786{
802 exit_sysfs(); 787 exit_suspend_resume();
803} 788}
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
index e28398df0df2..0b7b7b179cbe 100644
--- a/arch/x86/oprofile/op_counter.h
+++ b/arch/x86/oprofile/op_counter.h
@@ -22,6 +22,7 @@ struct op_counter_config {
22 unsigned long kernel; 22 unsigned long kernel;
23 unsigned long user; 23 unsigned long user;
24 unsigned long unit_mask; 24 unsigned long unit_mask;
25 unsigned long extra;
25}; 26};
26 27
27extern struct op_counter_config counter_config[]; 28extern struct op_counter_config counter_config[];
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index dc701ea58546..e70be38ce039 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -74,6 +74,7 @@
74 compatible = "intel,ce4100-pci", "pci"; 74 compatible = "intel,ce4100-pci", "pci";
75 device_type = "pci"; 75 device_type = "pci";
76 bus-range = <1 1>; 76 bus-range = <1 1>;
77 reg = <0x0800 0x0 0x0 0x0 0x0>;
77 ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>; 78 ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>;
78 79
79 interrupt-parent = <&ioapic2>; 80 interrupt-parent = <&ioapic2>;
@@ -346,7 +347,7 @@
346 "pciclass0c03"; 347 "pciclass0c03";
347 348
348 reg = <0x16800 0x0 0x0 0x0 0x0>; 349 reg = <0x16800 0x0 0x0 0x0 0x0>;
349 interrupts = <22 3>; 350 interrupts = <22 1>;
350 }; 351 };
351 352
352 usb@d,1 { 353 usb@d,1 {
@@ -356,7 +357,7 @@
356 "pciclass0c03"; 357 "pciclass0c03";
357 358
358 reg = <0x16900 0x0 0x0 0x0 0x0>; 359 reg = <0x16900 0x0 0x0 0x0 0x0>;
359 interrupts = <22 3>; 360 interrupts = <22 1>;
360 }; 361 };
361 362
362 sata@e,0 { 363 sata@e,0 {
@@ -366,7 +367,7 @@
366 "pciclass0106"; 367 "pciclass0106";
367 368
368 reg = <0x17000 0x0 0x0 0x0 0x0>; 369 reg = <0x17000 0x0 0x0 0x0 0x0>;
369 interrupts = <23 3>; 370 interrupts = <23 1>;
370 }; 371 };
371 372
372 flash@f,0 { 373 flash@f,0 {
@@ -412,6 +413,7 @@
412 #address-cells = <2>; 413 #address-cells = <2>;
413 #size-cells = <1>; 414 #size-cells = <1>;
414 compatible = "isa"; 415 compatible = "isa";
416 reg = <0xf800 0x0 0x0 0x0 0x0>;
415 ranges = <1 0 0 0 0 0x100>; 417 ranges = <1 0 0 0 0 0x100>;
416 418
417 rtc@70 { 419 rtc@70 {
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 5c0207bf959b..275dbc19e2cf 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -97,11 +97,11 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
97 pentry->freq_hz, pentry->irq); 97 pentry->freq_hz, pentry->irq);
98 if (!pentry->irq) 98 if (!pentry->irq)
99 continue; 99 continue;
100 mp_irq.type = MP_IOAPIC; 100 mp_irq.type = MP_INTSRC;
101 mp_irq.irqtype = mp_INT; 101 mp_irq.irqtype = mp_INT;
102/* triggering mode edge bit 2-3, active high polarity bit 0-1 */ 102/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
103 mp_irq.irqflag = 5; 103 mp_irq.irqflag = 5;
104 mp_irq.srcbus = 0; 104 mp_irq.srcbus = MP_BUS_ISA;
105 mp_irq.srcbusirq = pentry->irq; /* IRQ */ 105 mp_irq.srcbusirq = pentry->irq; /* IRQ */
106 mp_irq.dstapic = MP_APIC_ALL; 106 mp_irq.dstapic = MP_APIC_ALL;
107 mp_irq.dstirq = pentry->irq; 107 mp_irq.dstirq = pentry->irq;
@@ -168,10 +168,10 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
168 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { 168 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
169 pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n", 169 pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
170 totallen, (u32)pentry->phys_addr, pentry->irq); 170 totallen, (u32)pentry->phys_addr, pentry->irq);
171 mp_irq.type = MP_IOAPIC; 171 mp_irq.type = MP_INTSRC;
172 mp_irq.irqtype = mp_INT; 172 mp_irq.irqtype = mp_INT;
173 mp_irq.irqflag = 0xf; /* level trigger and active low */ 173 mp_irq.irqflag = 0xf; /* level trigger and active low */
174 mp_irq.srcbus = 0; 174 mp_irq.srcbus = MP_BUS_ISA;
175 mp_irq.srcbusirq = pentry->irq; /* IRQ */ 175 mp_irq.srcbusirq = pentry->irq; /* IRQ */
176 mp_irq.dstapic = MP_APIC_ALL; 176 mp_irq.dstapic = MP_APIC_ALL;
177 mp_irq.dstirq = pentry->irq; 177 mp_irq.dstirq = pentry->irq;
@@ -282,7 +282,7 @@ void __init x86_mrst_early_setup(void)
282 /* Avoid searching for BIOS MP tables */ 282 /* Avoid searching for BIOS MP tables */
283 x86_init.mpparse.find_smp_config = x86_init_noop; 283 x86_init.mpparse.find_smp_config = x86_init_noop;
284 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 284 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
285 285 set_bit(MP_BUS_ISA, mp_bus_not_pci);
286} 286}
287 287
288/* 288/*
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 04cf645feb92..73d70d65e76e 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -100,9 +100,11 @@ int vrtc_set_mmss(unsigned long nowtime)
100 100
101void __init mrst_rtc_init(void) 101void __init mrst_rtc_init(void)
102{ 102{
103 unsigned long vrtc_paddr = sfi_mrtc_array[0].phys_addr; 103 unsigned long vrtc_paddr;
104 104
105 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); 105 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
106
107 vrtc_paddr = sfi_mrtc_array[0].phys_addr;
106 if (!sfi_mrtc_num || !vrtc_paddr) 108 if (!sfi_mrtc_num || !vrtc_paddr)
107 return; 109 return;
108 110
diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c
index 127775696d6c..ab81fb271760 100644
--- a/arch/x86/platform/olpc/olpc-xo1.c
+++ b/arch/x86/platform/olpc/olpc-xo1.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17#include <linux/pm.h> 17#include <linux/pm.h>
18#include <linux/mfd/core.h>
18 19
19#include <asm/io.h> 20#include <asm/io.h>
20#include <asm/olpc.h> 21#include <asm/olpc.h>
@@ -56,25 +57,24 @@ static void xo1_power_off(void)
56static int __devinit olpc_xo1_probe(struct platform_device *pdev) 57static int __devinit olpc_xo1_probe(struct platform_device *pdev)
57{ 58{
58 struct resource *res; 59 struct resource *res;
60 int err;
59 61
60 /* don't run on non-XOs */ 62 /* don't run on non-XOs */
61 if (!machine_is_olpc()) 63 if (!machine_is_olpc())
62 return -ENODEV; 64 return -ENODEV;
63 65
66 err = mfd_cell_enable(pdev);
67 if (err)
68 return err;
69
64 res = platform_get_resource(pdev, IORESOURCE_IO, 0); 70 res = platform_get_resource(pdev, IORESOURCE_IO, 0);
65 if (!res) { 71 if (!res) {
66 dev_err(&pdev->dev, "can't fetch device resource info\n"); 72 dev_err(&pdev->dev, "can't fetch device resource info\n");
67 return -EIO; 73 return -EIO;
68 } 74 }
69
70 if (!request_region(res->start, resource_size(res), DRV_NAME)) {
71 dev_err(&pdev->dev, "can't request region\n");
72 return -EIO;
73 }
74
75 if (strcmp(pdev->name, "cs5535-pms") == 0) 75 if (strcmp(pdev->name, "cs5535-pms") == 0)
76 pms_base = res->start; 76 pms_base = res->start;
77 else if (strcmp(pdev->name, "cs5535-acpi") == 0) 77 else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
78 acpi_base = res->start; 78 acpi_base = res->start;
79 79
80 /* If we have both addresses, we can override the poweroff hook */ 80 /* If we have both addresses, we can override the poweroff hook */
@@ -88,14 +88,11 @@ static int __devinit olpc_xo1_probe(struct platform_device *pdev)
88 88
89static int __devexit olpc_xo1_remove(struct platform_device *pdev) 89static int __devexit olpc_xo1_remove(struct platform_device *pdev)
90{ 90{
91 struct resource *r; 91 mfd_cell_disable(pdev);
92
93 r = platform_get_resource(pdev, IORESOURCE_IO, 0);
94 release_region(r->start, resource_size(r));
95 92
96 if (strcmp(pdev->name, "cs5535-pms") == 0) 93 if (strcmp(pdev->name, "cs5535-pms") == 0)
97 pms_base = 0; 94 pms_base = 0;
98 else if (strcmp(pdev->name, "cs5535-acpi") == 0) 95 else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
99 acpi_base = 0; 96 acpi_base = 0;
100 97
101 pm_power_off = NULL; 98 pm_power_off = NULL;
@@ -113,7 +110,7 @@ static struct platform_driver cs5535_pms_drv = {
113 110
114static struct platform_driver cs5535_acpi_drv = { 111static struct platform_driver cs5535_acpi_drv = {
115 .driver = { 112 .driver = {
116 .name = "cs5535-acpi", 113 .name = "olpc-xo1-pm-acpi",
117 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
118 }, 115 },
119 .probe = olpc_xo1_probe, 116 .probe = olpc_xo1_probe,
@@ -143,7 +140,7 @@ static void __exit olpc_xo1_exit(void)
143 140
144MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); 141MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");
145MODULE_LICENSE("GPL"); 142MODULE_LICENSE("GPL");
146MODULE_ALIAS("platform:olpc-xo1"); 143MODULE_ALIAS("platform:cs5535-pms");
147 144
148module_init(olpc_xo1_init); 145module_init(olpc_xo1_init);
149module_exit(olpc_xo1_exit); 146module_exit(olpc_xo1_exit);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index a7b38d35c29a..7cb6424317f6 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/debugfs.h> 11#include <linux/debugfs.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/delay.h>
14 15
15#include <asm/mmu_context.h> 16#include <asm/mmu_context.h>
16#include <asm/uv/uv.h> 17#include <asm/uv/uv.h>
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index fe4cf8294878..c7abf13a213f 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -471,15 +471,7 @@ static unsigned int startup_piix4_master_irq(struct irq_data *data)
471{ 471{
472 legacy_pic->init(0); 472 legacy_pic->init(0);
473 enable_cobalt_irq(data); 473 enable_cobalt_irq(data);
474} 474 return 0;
475
476static void end_piix4_master_irq(struct irq_data *data)
477{
478 unsigned long flags;
479
480 spin_lock_irqsave(&cobalt_lock, flags);
481 enable_cobalt_irq(data);
482 spin_unlock_irqrestore(&cobalt_lock, flags);
483} 475}
484 476
485static struct irq_chip piix4_master_irq_type = { 477static struct irq_chip piix4_master_irq_type = {
@@ -492,7 +484,7 @@ static void pii4_mask(struct irq_data *data) { }
492 484
493static struct irq_chip piix4_virtual_irq_type = { 485static struct irq_chip piix4_virtual_irq_type = {
494 .name = "PIIX4-virtual", 486 .name = "PIIX4-virtual",
495 .mask = pii4_mask, 487 .irq_mask = pii4_mask,
496}; 488};
497 489
498/* 490/*
@@ -580,9 +572,9 @@ static struct irqaction cascade_action = {
580 572
581static inline void set_piix4_virtual_irq_type(void) 573static inline void set_piix4_virtual_irq_type(void)
582{ 574{
583 piix4_virtual_irq_type.enable = i8259A_chip.unmask; 575 piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask;
584 piix4_virtual_irq_type.disable = i8259A_chip.mask; 576 piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask;
585 piix4_virtual_irq_type.unmask = i8259A_chip.unmask; 577 piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask;
586} 578}
587 579
588static void __init visws_pre_intr_init(void) 580static void __init visws_pre_intr_init(void)
@@ -599,7 +591,7 @@ static void __init visws_pre_intr_init(void)
599 else if (i == CO_IRQ_IDE0) 591 else if (i == CO_IRQ_IDE0)
600 chip = &cobalt_irq_type; 592 chip = &cobalt_irq_type;
601 else if (i == CO_IRQ_IDE1) 593 else if (i == CO_IRQ_IDE1)
602 >chip = &cobalt_irq_type; 594 chip = &cobalt_irq_type;
603 else if (i == CO_IRQ_8259) 595 else if (i == CO_IRQ_8259)
604 chip = &piix4_master_irq_type; 596 chip = &piix4_master_irq_type;
605 else if (i < CO_IRQ_APIC0) 597 else if (i < CO_IRQ_APIC0)
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 36df991985b2..468d591dde31 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -417,24 +417,25 @@ const char *arch_vma_name(struct vm_area_struct *vma)
417 return NULL; 417 return NULL;
418} 418}
419 419
420struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 420struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
421{ 421{
422 struct mm_struct *mm = tsk->mm; 422 /*
423 423 * Check to see if the corresponding task was created in compat vdso
424 /* Check to see if this task was created in compat vdso mode */ 424 * mode.
425 */
425 if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) 426 if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
426 return &gate_vma; 427 return &gate_vma;
427 return NULL; 428 return NULL;
428} 429}
429 430
430int in_gate_area(struct task_struct *task, unsigned long addr) 431int in_gate_area(struct mm_struct *mm, unsigned long addr)
431{ 432{
432 const struct vm_area_struct *vma = get_gate_vma(task); 433 const struct vm_area_struct *vma = get_gate_vma(mm);
433 434
434 return vma && addr >= vma->vm_start && addr < vma->vm_end; 435 return vma && addr >= vma->vm_start && addr < vma->vm_end;
435} 436}
436 437
437int in_gate_area_no_task(unsigned long addr) 438int in_gate_area_no_mm(unsigned long addr)
438{ 439{
439 return 0; 440 return 0;
440} 441}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1c7121ba18ff..5cc821cb2e09 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -39,6 +39,7 @@ config XEN_MAX_DOMAIN_MEMORY
39config XEN_SAVE_RESTORE 39config XEN_SAVE_RESTORE
40 bool 40 bool
41 depends on XEN 41 depends on XEN
42 select HIBERNATE_CALLBACKS
42 default y 43 default y
43 44
44config XEN_DEBUG_FS 45config XEN_DEBUG_FS
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 49dbd78ec3cb..e3c6a06cf725 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -238,6 +238,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
238static __init void xen_init_cpuid_mask(void) 238static __init void xen_init_cpuid_mask(void)
239{ 239{
240 unsigned int ax, bx, cx, dx; 240 unsigned int ax, bx, cx, dx;
241 unsigned int xsave_mask;
241 242
242 cpuid_leaf1_edx_mask = 243 cpuid_leaf1_edx_mask =
243 ~((1 << X86_FEATURE_MCE) | /* disable MCE */ 244 ~((1 << X86_FEATURE_MCE) | /* disable MCE */
@@ -249,24 +250,16 @@ static __init void xen_init_cpuid_mask(void)
249 cpuid_leaf1_edx_mask &= 250 cpuid_leaf1_edx_mask &=
250 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ 251 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
251 (1 << X86_FEATURE_ACPI)); /* disable ACPI */ 252 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
252
253 ax = 1; 253 ax = 1;
254 cx = 0;
255 xen_cpuid(&ax, &bx, &cx, &dx); 254 xen_cpuid(&ax, &bx, &cx, &dx);
256 255
257 /* cpuid claims we support xsave; try enabling it to see what happens */ 256 xsave_mask =
258 if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { 257 (1 << (X86_FEATURE_XSAVE % 32)) |
259 unsigned long cr4; 258 (1 << (X86_FEATURE_OSXSAVE % 32));
260
261 set_in_cr4(X86_CR4_OSXSAVE);
262
263 cr4 = read_cr4();
264 259
265 if ((cr4 & X86_CR4_OSXSAVE) == 0) 260 /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
266 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); 261 if ((cx & xsave_mask) != xsave_mask)
267 262 cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
268 clear_in_cr4(X86_CR4_OSXSAVE);
269 }
270} 263}
271 264
272static void xen_set_debugreg(int reg, unsigned long val) 265static void xen_set_debugreg(int reg, unsigned long val)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c82df6c9c0f0..55c965b38c27 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -565,13 +565,13 @@ pte_t xen_make_pte_debug(pteval_t pte)
565 if (io_page && 565 if (io_page &&
566 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { 566 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
567 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; 567 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
568 WARN(addr != other_addr, 568 WARN_ONCE(addr != other_addr,
569 "0x%lx is using VM_IO, but it is 0x%lx!\n", 569 "0x%lx is using VM_IO, but it is 0x%lx!\n",
570 (unsigned long)addr, (unsigned long)other_addr); 570 (unsigned long)addr, (unsigned long)other_addr);
571 } else { 571 } else {
572 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; 572 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
573 other_addr = (_pte.pte & PTE_PFN_MASK); 573 other_addr = (_pte.pte & PTE_PFN_MASK);
574 WARN((addr == other_addr) && (!io_page) && (!iomap_set), 574 WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
575 "0x%lx is missing VM_IO (and wasn't fixed)!\n", 575 "0x%lx is missing VM_IO (and wasn't fixed)!\n",
576 (unsigned long)addr); 576 (unsigned long)addr);
577 } 577 }
@@ -1463,6 +1463,119 @@ static int xen_pgd_alloc(struct mm_struct *mm)
1463 return ret; 1463 return ret;
1464} 1464}
1465 1465
1466#ifdef CONFIG_X86_64
1467static __initdata u64 __last_pgt_set_rw = 0;
1468static __initdata u64 __pgt_buf_start = 0;
1469static __initdata u64 __pgt_buf_end = 0;
1470static __initdata u64 __pgt_buf_top = 0;
1471/*
1472 * As a consequence of the commit:
1473 *
1474 * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
1475 * Author: Yinghai Lu <yinghai@kernel.org>
1476 * Date: Fri Dec 17 16:58:28 2010 -0800
1477 *
1478 * x86-64, mm: Put early page table high
1479 *
1480 * at some point init_memory_mapping is going to reach the pagetable pages
1481 * area and map those pages too (mapping them as normal memory that falls
1482 * in the range of addresses passed to init_memory_mapping as argument).
1483 * Some of those pages are already pagetable pages (they are in the range
1484 * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
1485 * everything is fine.
1486 * Some of these pages are not pagetable pages yet (they fall in the range
1487 * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
1488 * are going to be mapped RW. When these pages become pagetable pages and
1489 * are hooked into the pagetable, xen will find that the guest has already
1490 * a RW mapping of them somewhere and fail the operation.
1491 * The reason Xen requires pagetables to be RO is that the hypervisor needs
1492 * to verify that the pagetables are valid before using them. The validation
1493 * operations are called "pinning".
1494 *
1495 * In order to fix the issue we mark all the pages in the entire range
1496 * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
1497 * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
1498 * init_memory_mapping. Hence the kernel is going to crash as soon as one
1499 * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
1500 * ranges are RO).
1501 *
1502 * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
1503 * the init_memory_mapping has completed (in a perfect world we would
1504 * call this function from init_memory_mapping, but lets ignore that).
1505 *
1506 * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
1507 * end,top] have all changed to new values (b/c init_memory_mapping
1508 * is called and setting up another new page-table). Hence, the first time
1509 * we enter this function, we save away the pgt_buf_start value and update
1510 * the pgt_buf_[end,top].
1511 *
1512 * When we detect that the "old" pgt_buf_start through pgt_buf_end
1513 * PFNs have been reserved (so memblock_x86_reserve_range has been called),
1514 * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
1515 *
1516 * And then we update those "old" pgt_buf_[end|top] with the new ones
1517 * so that we can redo this on the next pagetable.
1518 */
1519static __init void mark_rw_past_pgt(void) {
1520
1521 if (pgt_buf_end > pgt_buf_start) {
1522 u64 addr, size;
1523
1524 /* Save it away. */
1525 if (!__pgt_buf_start) {
1526 __pgt_buf_start = pgt_buf_start;
1527 __pgt_buf_end = pgt_buf_end;
1528 __pgt_buf_top = pgt_buf_top;
1529 return;
1530 }
1531 /* If we get the range that starts at __pgt_buf_end that means
1532 * the range is reserved, and that in 'init_memory_mapping'
1533 * the 'memblock_x86_reserve_range' has been called with the
1534 * outdated __pgt_buf_start, __pgt_buf_end (the "new"
1535 * pgt_buf_[start|end|top] refer now to a new pagetable.
1536 * Note: we are called _after_ the pgt_buf_[..] have been
1537 * updated.*/
1538
1539 addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
1540 &size, PAGE_SIZE);
1541
1542 /* Still not reserved, meaning 'memblock_x86_reserve_range'
1543 * hasn't been called yet. Update the _end and _top.*/
1544 if (addr == PFN_PHYS(__pgt_buf_start)) {
1545 __pgt_buf_end = pgt_buf_end;
1546 __pgt_buf_top = pgt_buf_top;
1547 return;
1548 }
1549
1550 /* OK, the area is reserved, meaning it is time for us to
1551 * set RW for the old end->top PFNs. */
1552
1553 /* ..unless we had already done this. */
1554 if (__pgt_buf_end == __last_pgt_set_rw)
1555 return;
1556
1557 addr = PFN_PHYS(__pgt_buf_end);
1558
1559 /* set as RW the rest */
1560 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
1561 PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
1562
1563 while (addr < PFN_PHYS(__pgt_buf_top)) {
1564 make_lowmem_page_readwrite(__va(addr));
1565 addr += PAGE_SIZE;
1566 }
1567 /* And update everything so that we are ready for the next
1568 * pagetable (the one created for regions past 4GB) */
1569 __last_pgt_set_rw = __pgt_buf_end;
1570 __pgt_buf_start = pgt_buf_start;
1571 __pgt_buf_end = pgt_buf_end;
1572 __pgt_buf_top = pgt_buf_top;
1573 }
1574 return;
1575}
1576#else
1577static __init void mark_rw_past_pgt(void) { }
1578#endif
1466static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) 1579static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1467{ 1580{
1468#ifdef CONFIG_X86_64 1581#ifdef CONFIG_X86_64
@@ -1473,30 +1586,43 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1473#endif 1586#endif
1474} 1587}
1475 1588
1589#ifdef CONFIG_X86_32
1476static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 1590static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1477{ 1591{
1478 unsigned long pfn = pte_pfn(pte);
1479
1480#ifdef CONFIG_X86_32
1481 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ 1592 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
1482 if (pte_val_ma(*ptep) & _PAGE_PRESENT) 1593 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
1483 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & 1594 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
1484 pte_val_ma(pte)); 1595 pte_val_ma(pte));
1485#endif 1596
1597 return pte;
1598}
1599#else /* CONFIG_X86_64 */
1600static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1601{
1602 unsigned long pfn = pte_pfn(pte);
1486 1603
1487 /* 1604 /*
1605 * A bit of optimization. We do not need to call the workaround
1606 * when xen_set_pte_init is called with a PTE with 0 as PFN.
1607 * That is b/c the pagetable at that point are just being populated
1608 * with empty values and we can save some cycles by not calling
1609 * the 'memblock' code.*/
1610 if (pfn)
1611 mark_rw_past_pgt();
1612 /*
1488 * If the new pfn is within the range of the newly allocated 1613 * If the new pfn is within the range of the newly allocated
1489 * kernel pagetable, and it isn't being mapped into an 1614 * kernel pagetable, and it isn't being mapped into an
1490 * early_ioremap fixmap slot as a freshly allocated page, make sure 1615 * early_ioremap fixmap slot as a freshly allocated page, make sure
1491 * it is RO. 1616 * it is RO.
1492 */ 1617 */
1493 if (((!is_early_ioremap_ptep(ptep) && 1618 if (((!is_early_ioremap_ptep(ptep) &&
1494 pfn >= pgt_buf_start && pfn < pgt_buf_end)) || 1619 pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
1495 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) 1620 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
1496 pte = pte_wrprotect(pte); 1621 pte = pte_wrprotect(pte);
1497 1622
1498 return pte; 1623 return pte;
1499} 1624}
1625#endif /* CONFIG_X86_64 */
1500 1626
1501/* Init-time set_pte while constructing initial pagetables, which 1627/* Init-time set_pte while constructing initial pagetables, which
1502 doesn't allow RO pagetable pages to be remapped RW */ 1628 doesn't allow RO pagetable pages to be remapped RW */
@@ -1992,6 +2118,8 @@ __init void xen_ident_map_ISA(void)
1992 2118
1993static __init void xen_post_allocator_init(void) 2119static __init void xen_post_allocator_init(void)
1994{ 2120{
2121 mark_rw_past_pgt();
2122
1995#ifdef CONFIG_XEN_DEBUG 2123#ifdef CONFIG_XEN_DEBUG
1996 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); 2124 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1997#endif 2125#endif
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 215a3ce61068..141eb0de8b06 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -497,7 +497,7 @@ static bool alloc_p2m(unsigned long pfn)
497 return true; 497 return true;
498} 498}
499 499
500bool __early_alloc_p2m(unsigned long pfn) 500static bool __init __early_alloc_p2m(unsigned long pfn)
501{ 501{
502 unsigned topidx, mididx, idx; 502 unsigned topidx, mididx, idx;
503 503
@@ -530,7 +530,7 @@ bool __early_alloc_p2m(unsigned long pfn)
530 } 530 }
531 return idx != 0; 531 return idx != 0;
532} 532}
533unsigned long set_phys_range_identity(unsigned long pfn_s, 533unsigned long __init set_phys_range_identity(unsigned long pfn_s,
534 unsigned long pfn_e) 534 unsigned long pfn_e)
535{ 535{
536 unsigned long pfn; 536 unsigned long pfn;
@@ -671,7 +671,9 @@ int m2p_add_override(unsigned long mfn, struct page *page)
671 page->private = mfn; 671 page->private = mfn;
672 page->index = pfn_to_mfn(pfn); 672 page->index = pfn_to_mfn(pfn);
673 673
674 __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); 674 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
675 return -ENOMEM;
676
675 if (!PageHighMem(page)) 677 if (!PageHighMem(page))
676 /* Just zap old mapping for now */ 678 /* Just zap old mapping for now */
677 pte_clear(&init_mm, address, ptep); 679 pte_clear(&init_mm, address, ptep);
@@ -709,7 +711,7 @@ int m2p_remove_override(struct page *page)
709 spin_lock_irqsave(&m2p_override_lock, flags); 711 spin_lock_irqsave(&m2p_override_lock, flags);
710 list_del(&page->lru); 712 list_del(&page->lru);
711 spin_unlock_irqrestore(&m2p_override_lock, flags); 713 spin_unlock_irqrestore(&m2p_override_lock, flags);
712 __set_phys_to_machine(pfn, page->index); 714 set_phys_to_machine(pfn, page->index);
713 715
714 if (!PageHighMem(page)) 716 if (!PageHighMem(page))
715 set_pte_at(&init_mm, address, ptep, 717 set_pte_at(&init_mm, address, ptep,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index fa0269a99377..90bac0aac3a5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -227,7 +227,7 @@ char * __init xen_memory_setup(void)
227 227
228 memcpy(map_raw, map, sizeof(map)); 228 memcpy(map_raw, map, sizeof(map));
229 e820.nr_map = 0; 229 e820.nr_map = 0;
230 xen_extra_mem_start = mem_end; 230 xen_extra_mem_start = max((1ULL << 32), mem_end);
231 for (i = 0; i < memmap.nr_entries; i++) { 231 for (i = 0; i < memmap.nr_entries; i++) {
232 unsigned long long end; 232 unsigned long long end;
233 233