aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S6
-rw-r--r--arch/x86/boot/setup.ld3
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c10
-rw-r--r--arch/x86/ia32/ia32entry.S41
-rw-r--r--arch/x86/include/asm/amd_iommu.h1
-rw-r--r--arch/x86/include/asm/checksum_32.h3
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h30
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h10
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/paravirt.h28
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/topology.h11
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h23
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S3
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c24
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c23
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c87
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c67
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/crash_dump_32.c19
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/early_printk.c10
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c10
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/microcode_amd.c6
-rw-r--r--arch/x86/kernel/pci-dma.c10
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/setup.c7
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/trampoline.c12
-rw-r--r--arch/x86/kernel/trampoline_64.S4
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S82
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c86
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/lib/Makefile4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S57
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/init.c63
-rw-r--r--arch/x86/mm/ioremap.c24
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/mm/setup_nx.c69
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/xen/debugfs.c2
-rw-r--r--arch/x86/xen/enlighten.c24
77 files changed, 705 insertions, 397 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93698794aa3a..72ace9515a07 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT
86config HAVE_LATENCYTOP_SUPPORT 86config HAVE_LATENCYTOP_SUPPORT
87 def_bool y 87 def_bool y
88 88
89config FAST_CMPXCHG_LOCAL
90 bool
91 default y
92
93config MMU 89config MMU
94 def_bool y 90 def_bool y
95 91
@@ -432,6 +428,17 @@ config X86_NUMAQ
432 of Flat Logical. You will need a new lynxer.elf file to flash your 428 of Flat Logical. You will need a new lynxer.elf file to flash your
433 firmware with - send email to <Martin.Bligh@us.ibm.com>. 429 firmware with - send email to <Martin.Bligh@us.ibm.com>.
434 430
431config X86_SUPPORTS_MEMORY_FAILURE
432 bool
433 # MCE code calls memory_failure():
434 depends on X86_MCE
435 # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
436 depends on !X86_NUMAQ
437 # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
438 depends on X86_64 || !SPARSEMEM
439 select ARCH_SUPPORTS_MEMORY_FAILURE
440 default y
441
435config X86_VISWS 442config X86_VISWS
436 bool "SGI 320/540 (Visual Workstation)" 443 bool "SGI 320/540 (Visual Workstation)"
437 depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT 444 depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
@@ -484,7 +491,7 @@ if PARAVIRT_GUEST
484source "arch/x86/xen/Kconfig" 491source "arch/x86/xen/Kconfig"
485 492
486config VMI 493config VMI
487 bool "VMI Guest support" 494 bool "VMI Guest support (DEPRECATED)"
488 select PARAVIRT 495 select PARAVIRT
489 depends on X86_32 496 depends on X86_32
490 ---help--- 497 ---help---
@@ -493,6 +500,15 @@ config VMI
493 at the moment), by linking the kernel to a GPL-ed ROM module 500 at the moment), by linking the kernel to a GPL-ed ROM module
494 provided by the hypervisor. 501 provided by the hypervisor.
495 502
503 As of September 2009, VMware has started a phased retirement
504 of this feature from VMware's products. Please see
505 feature-removal-schedule.txt for details. If you are
506 planning to enable this option, please note that you cannot
507 live migrate a VMI enabled VM to a future VMware product,
508 which doesn't support VMI. So if you expect your kernel to
509 seamlessly migrate to newer VMware products, keep this
510 disabled.
511
496config KVM_CLOCK 512config KVM_CLOCK
497 bool "KVM paravirtualized clock" 513 bool "KVM paravirtualized clock"
498 select PARAVIRT 514 select PARAVIRT
@@ -1427,12 +1443,8 @@ config SECCOMP
1427 1443
1428 If unsure, say Y. Only embedded should say N here. 1444 If unsure, say Y. Only embedded should say N here.
1429 1445
1430config CC_STACKPROTECTOR_ALL
1431 bool
1432
1433config CC_STACKPROTECTOR 1446config CC_STACKPROTECTOR
1434 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1447 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1435 select CC_STACKPROTECTOR_ALL
1436 ---help--- 1448 ---help---
1437 This option turns on the -fstack-protector GCC feature. This 1449 This option turns on the -fstack-protector GCC feature. This
1438 feature puts, at the beginning of functions, a canary value on 1450 feature puts, at the beginning of functions, a canary value on
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 527519b8a9f9..2649840d888f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -400,7 +400,7 @@ config X86_TSC
400 400
401config X86_CMPXCHG64 401config X86_CMPXCHG64
402 def_bool y 402 def_bool y
403 depends on X86_PAE || X86_64 403 depends on !M386 && !M486
404 404
405# this should be set for all -march=.. options where the compiler 405# this should be set for all -march=.. options where the compiler
406# generates cmov. 406# generates cmov.
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY
412 int 412 int
413 default "64" if X86_64 413 default "64" if X86_64
414 default "6" if X86_32 && X86_P6_NOP 414 default "6" if X86_32 && X86_P6_NOP
415 default "5" if X86_32 && X86_CMPXCHG64
415 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) 416 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
416 default "3" 417 default "3"
417 418
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a012ee8ef803..d2d24c9ee64d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -76,7 +76,6 @@ ifdef CONFIG_CC_STACKPROTECTOR
76 cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh 76 cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
77 ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) 77 ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
78 stackp-y := -fstack-protector 78 stackp-y := -fstack-protector
79 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
80 KBUILD_CFLAGS += $(stackp-y) 79 KBUILD_CFLAGS += $(stackp-y)
81 else 80 else
82 $(warning stack protector enabled but no compiler support) 81 $(warning stack protector enabled but no compiler support)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 75e4f001e706..f543b70ffae2 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -23,13 +23,14 @@
23 */ 23 */
24 .text 24 .text
25 25
26#include <linux/init.h>
26#include <linux/linkage.h> 27#include <linux/linkage.h>
27#include <asm/segment.h> 28#include <asm/segment.h>
28#include <asm/page_types.h> 29#include <asm/page_types.h>
29#include <asm/boot.h> 30#include <asm/boot.h>
30#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
31 32
32 .section ".text.head","ax",@progbits 33 __HEAD
33ENTRY(startup_32) 34ENTRY(startup_32)
34 cld 35 cld
35 /* 36 /*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f62c284db9eb..077e1b69198e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -24,6 +24,7 @@
24 .code32 24 .code32
25 .text 25 .text
26 26
27#include <linux/init.h>
27#include <linux/linkage.h> 28#include <linux/linkage.h>
28#include <asm/segment.h> 29#include <asm/segment.h>
29#include <asm/pgtable_types.h> 30#include <asm/pgtable_types.h>
@@ -33,7 +34,7 @@
33#include <asm/processor-flags.h> 34#include <asm/processor-flags.h>
34#include <asm/asm-offsets.h> 35#include <asm/asm-offsets.h>
35 36
36 .section ".text.head" 37 __HEAD
37 .code32 38 .code32
38ENTRY(startup_32) 39ENTRY(startup_32)
39 cld 40 cld
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index cc353e1b3ffd..f4193bb48782 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,3 +1,5 @@
1#include <asm-generic/vmlinux.lds.h>
2
1OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) 3OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
2 4
3#undef i386 5#undef i386
@@ -18,9 +20,9 @@ SECTIONS
18 * address 0. 20 * address 0.
19 */ 21 */
20 . = 0; 22 . = 0;
21 .text.head : { 23 .head.text : {
22 _head = . ; 24 _head = . ;
23 *(.text.head) 25 HEAD_TEXT
24 _ehead = . ; 26 _ehead = . ;
25 } 27 }
26 .rodata.compressed : { 28 .rodata.compressed : {
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 0f6ec455a2b1..03c0683636b6 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -53,6 +53,9 @@ SECTIONS
53 53
54 /DISCARD/ : { *(.note*) } 54 /DISCARD/ : { *(.note*) }
55 55
56 /*
57 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
58 */
56 . = ASSERT(_end <= 0x8000, "Setup too big!"); 59 . = ASSERT(_end <= 0x8000, "Setup too big!");
57 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); 60 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
58 /* Necessary for the very-old-loader check to work... */ 61 /* Necessary for the very-old-loader check to work... */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 585edebe12cf..49c552c060e9 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
82 return -EINVAL; 82 return -EINVAL;
83 } 83 }
84 84
85 if (irq_fpu_usable()) 85 if (!irq_fpu_usable())
86 err = crypto_aes_expand_key(ctx, in_key, key_len); 86 err = crypto_aes_expand_key(ctx, in_key, key_len);
87 else { 87 else {
88 kernel_fpu_begin(); 88 kernel_fpu_begin();
@@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
103{ 103{
104 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 104 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
105 105
106 if (irq_fpu_usable()) 106 if (!irq_fpu_usable())
107 crypto_aes_encrypt_x86(ctx, dst, src); 107 crypto_aes_encrypt_x86(ctx, dst, src);
108 else { 108 else {
109 kernel_fpu_begin(); 109 kernel_fpu_begin();
@@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
116{ 116{
117 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 117 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
118 118
119 if (irq_fpu_usable()) 119 if (!irq_fpu_usable())
120 crypto_aes_decrypt_x86(ctx, dst, src); 120 crypto_aes_decrypt_x86(ctx, dst, src);
121 else { 121 else {
122 kernel_fpu_begin(); 122 kernel_fpu_begin();
@@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req)
342 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 342 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
343 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 343 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
344 344
345 if (irq_fpu_usable()) { 345 if (!irq_fpu_usable()) {
346 struct ablkcipher_request *cryptd_req = 346 struct ablkcipher_request *cryptd_req =
347 ablkcipher_request_ctx(req); 347 ablkcipher_request_ctx(req);
348 memcpy(cryptd_req, req, sizeof(*req)); 348 memcpy(cryptd_req, req, sizeof(*req));
@@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req)
363 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 363 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
364 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 364 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
365 365
366 if (irq_fpu_usable()) { 366 if (!irq_fpu_usable()) {
367 struct ablkcipher_request *cryptd_req = 367 struct ablkcipher_request *cryptd_req =
368 ablkcipher_request_ctx(req); 368 ablkcipher_request_ctx(req);
369 memcpy(cryptd_req, req, sizeof(*req)); 369 memcpy(cryptd_req, req, sizeof(*req));
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 74619c4f9fda..581b0568fe19 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -21,8 +21,8 @@
21#define __AUDIT_ARCH_LE 0x40000000 21#define __AUDIT_ARCH_LE 0x40000000
22 22
23#ifndef CONFIG_AUDITSYSCALL 23#ifndef CONFIG_AUDITSYSCALL
24#define sysexit_audit int_ret_from_sys_call 24#define sysexit_audit ia32_ret_from_sys_call
25#define sysretl_audit int_ret_from_sys_call 25#define sysretl_audit ia32_ret_from_sys_call
26#endif 26#endif
27 27
28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -39,12 +39,12 @@
39 .endm 39 .endm
40 40
41 /* clobbers %eax */ 41 /* clobbers %eax */
42 .macro CLEAR_RREGS _r9=rax 42 .macro CLEAR_RREGS offset=0, _r9=rax
43 xorl %eax,%eax 43 xorl %eax,%eax
44 movq %rax,R11(%rsp) 44 movq %rax,\offset+R11(%rsp)
45 movq %rax,R10(%rsp) 45 movq %rax,\offset+R10(%rsp)
46 movq %\_r9,R9(%rsp) 46 movq %\_r9,\offset+R9(%rsp)
47 movq %rax,R8(%rsp) 47 movq %rax,\offset+R8(%rsp)
48 .endm 48 .endm
49 49
50 /* 50 /*
@@ -172,6 +172,10 @@ sysexit_from_sys_call:
172 movl RIP-R11(%rsp),%edx /* User %eip */ 172 movl RIP-R11(%rsp),%edx /* User %eip */
173 CFI_REGISTER rip,rdx 173 CFI_REGISTER rip,rdx
174 RESTORE_ARGS 1,24,1,1,1,1 174 RESTORE_ARGS 1,24,1,1,1,1
175 xorq %r8,%r8
176 xorq %r9,%r9
177 xorq %r10,%r10
178 xorq %r11,%r11
175 popfq 179 popfq
176 CFI_ADJUST_CFA_OFFSET -8 180 CFI_ADJUST_CFA_OFFSET -8
177 /*CFI_RESTORE rflags*/ 181 /*CFI_RESTORE rflags*/
@@ -200,9 +204,9 @@ sysexit_from_sys_call:
200 movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ 204 movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
201 .endm 205 .endm
202 206
203 .macro auditsys_exit exit,ebpsave=RBP 207 .macro auditsys_exit exit
204 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 208 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
205 jnz int_ret_from_sys_call 209 jnz ia32_ret_from_sys_call
206 TRACE_IRQS_ON 210 TRACE_IRQS_ON
207 sti 211 sti
208 movl %eax,%esi /* second arg, syscall return value */ 212 movl %eax,%esi /* second arg, syscall return value */
@@ -213,13 +217,13 @@ sysexit_from_sys_call:
213 call audit_syscall_exit 217 call audit_syscall_exit
214 GET_THREAD_INFO(%r10) 218 GET_THREAD_INFO(%r10)
215 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ 219 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
216 movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
217 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 220 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
218 cli 221 cli
219 TRACE_IRQS_OFF 222 TRACE_IRQS_OFF
220 testl %edi,TI_flags(%r10) 223 testl %edi,TI_flags(%r10)
221 jnz int_with_check 224 jz \exit
222 jmp \exit 225 CLEAR_RREGS -ARGOFFSET
226 jmp int_with_check
223 .endm 227 .endm
224 228
225sysenter_auditsys: 229sysenter_auditsys:
@@ -329,6 +333,9 @@ sysretl_from_sys_call:
329 CFI_REGISTER rip,rcx 333 CFI_REGISTER rip,rcx
330 movl EFLAGS-ARGOFFSET(%rsp),%r11d 334 movl EFLAGS-ARGOFFSET(%rsp),%r11d
331 /*CFI_REGISTER rflags,r11*/ 335 /*CFI_REGISTER rflags,r11*/
336 xorq %r10,%r10
337 xorq %r9,%r9
338 xorq %r8,%r8
332 TRACE_IRQS_ON 339 TRACE_IRQS_ON
333 movl RSP-ARGOFFSET(%rsp),%esp 340 movl RSP-ARGOFFSET(%rsp),%esp
334 CFI_RESTORE rsp 341 CFI_RESTORE rsp
@@ -343,7 +350,7 @@ cstar_auditsys:
343 jmp cstar_dispatch 350 jmp cstar_dispatch
344 351
345sysretl_audit: 352sysretl_audit:
346 auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ 353 auditsys_exit sysretl_from_sys_call
347#endif 354#endif
348 355
349cstar_tracesys: 356cstar_tracesys:
@@ -353,7 +360,7 @@ cstar_tracesys:
353#endif 360#endif
354 xchgl %r9d,%ebp 361 xchgl %r9d,%ebp
355 SAVE_REST 362 SAVE_REST
356 CLEAR_RREGS r9 363 CLEAR_RREGS 0, r9
357 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 364 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
358 movq %rsp,%rdi /* &pt_regs -> arg1 */ 365 movq %rsp,%rdi /* &pt_regs -> arg1 */
359 call syscall_trace_enter 366 call syscall_trace_enter
@@ -425,6 +432,8 @@ ia32_do_call:
425 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative 432 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
426ia32_sysret: 433ia32_sysret:
427 movq %rax,RAX-ARGOFFSET(%rsp) 434 movq %rax,RAX-ARGOFFSET(%rsp)
435ia32_ret_from_sys_call:
436 CLEAR_RREGS -ARGOFFSET
428 jmp int_ret_from_sys_call 437 jmp int_ret_from_sys_call
429 438
430ia32_tracesys: 439ia32_tracesys:
@@ -442,8 +451,8 @@ END(ia32_syscall)
442 451
443ia32_badsys: 452ia32_badsys:
444 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 453 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
445 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 454 movq $-ENOSYS,%rax
446 jmp int_ret_from_sys_call 455 jmp ia32_sysret
447 456
448quiet_ni_syscall: 457quiet_ni_syscall:
449 movq $-ENOSYS,%rax 458 movq $-ENOSYS,%rax
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index ac95995b7bad..4b180897e6b5 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -31,6 +31,7 @@ extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
31extern void amd_iommu_flush_all_domains(void); 31extern void amd_iommu_flush_all_domains(void);
32extern void amd_iommu_flush_all_devices(void); 32extern void amd_iommu_flush_all_devices(void);
33extern void amd_iommu_shutdown(void); 33extern void amd_iommu_shutdown(void);
34extern void amd_iommu_apply_erratum_63(u16 devid);
34#else 35#else
35static inline int amd_iommu_init(void) { return -ENODEV; } 36static inline int amd_iommu_init(void) { return -ENODEV; }
36static inline void amd_iommu_detect(void) { } 37static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 7c5ef8b14d92..46fc474fd819 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
161 "adcl $0, %0 ;\n" 161 "adcl $0, %0 ;\n"
162 : "=&r" (sum) 162 : "=&r" (sum)
163 : "r" (saddr), "r" (daddr), 163 : "r" (saddr), "r" (daddr),
164 "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); 164 "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)
165 : "memory");
165 166
166 return csum_fold(sum); 167 return csum_fold(sum);
167} 168}
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 82ceb788a981..ee1931be6593 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -312,19 +312,23 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
312 312
313extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); 313extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
314 314
315#define cmpxchg64(ptr, o, n) \ 315#define cmpxchg64(ptr, o, n) \
316({ \ 316({ \
317 __typeof__(*(ptr)) __ret; \ 317 __typeof__(*(ptr)) __ret; \
318 if (likely(boot_cpu_data.x86 > 4)) \ 318 __typeof__(*(ptr)) __old = (o); \
319 __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \ 319 __typeof__(*(ptr)) __new = (n); \
320 (unsigned long long)(o), \ 320 alternative_io("call cmpxchg8b_emu", \
321 (unsigned long long)(n)); \ 321 "lock; cmpxchg8b (%%esi)" , \
322 else \ 322 X86_FEATURE_CX8, \
323 __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ 323 "=A" (__ret), \
324 (unsigned long long)(o), \ 324 "S" ((ptr)), "0" (__old), \
325 (unsigned long long)(n)); \ 325 "b" ((unsigned int)__new), \
326 __ret; \ 326 "c" ((unsigned int)(__new>>32)) \
327}) 327 : "memory"); \
328 __ret; })
329
330
331
328#define cmpxchg64_local(ptr, o, n) \ 332#define cmpxchg64_local(ptr, o, n) \
329({ \ 333({ \
330 __typeof__(*(ptr)) __ret; \ 334 __typeof__(*(ptr)) __ret; \
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e8de2f6f5ca5..617bd56b3070 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -288,7 +288,7 @@ static inline void load_LDT(mm_context_t *pc)
288 288
289static inline unsigned long get_desc_base(const struct desc_struct *desc) 289static inline unsigned long get_desc_base(const struct desc_struct *desc)
290{ 290{
291 return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); 291 return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
292} 292}
293 293
294static inline void set_desc_base(struct desc_struct *desc, unsigned long base) 294static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 0ee770d23d0e..6a25d5d42836 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -14,6 +14,12 @@
14#include <asm/swiotlb.h> 14#include <asm/swiotlb.h>
15#include <asm-generic/dma-coherent.h> 15#include <asm-generic/dma-coherent.h>
16 16
17#ifdef CONFIG_ISA
18# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
19#else
20# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
21#endif
22
17extern dma_addr_t bad_dma_address; 23extern dma_addr_t bad_dma_address;
18extern int iommu_merge; 24extern int iommu_merge;
19extern struct device x86_dma_fallback_dev; 25extern struct device x86_dma_fallback_dev;
@@ -124,10 +130,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
124 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) 130 if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
125 return memory; 131 return memory;
126 132
127 if (!dev) { 133 if (!dev)
128 dev = &x86_dma_fallback_dev; 134 dev = &x86_dma_fallback_dev;
129 gfp |= GFP_DMA;
130 }
131 135
132 if (!is_device_dma_capable(dev)) 136 if (!is_device_dma_capable(dev))
133 return NULL; 137 return NULL;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be000435fad..d83892226f73 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
796#define KVM_ARCH_WANT_MMU_NOTIFIER 796#define KVM_ARCH_WANT_MMU_NOTIFIER
797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
798int kvm_age_hva(struct kvm *kvm, unsigned long hva); 798int kvm_age_hva(struct kvm *kvm, unsigned long hva);
799void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
799int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 800int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
800int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 801int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
801int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 802int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64c5814..f1363b72364f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
133static inline void enable_p5_mce(void) {} 133static inline void enable_p5_mce(void) {}
134#endif 134#endif
135 135
136extern void (*x86_mce_decode_callback)(struct mce *m);
137
136void mce_setup(struct mce *m); 138void mce_setup(struct mce *m);
137void mce_log(struct mce *m); 139void mce_log(struct mce *m);
138DECLARE_PER_CPU(struct sys_device, mce_dev); 140DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8aebcc41041d..efb38994859c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
840 840
841static inline unsigned long __raw_local_save_flags(void) 841static inline unsigned long __raw_local_save_flags(void)
842{ 842{
843 unsigned long f; 843 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
844
845 asm volatile(paravirt_alt(PARAVIRT_CALL)
846 : "=a"(f)
847 : paravirt_type(pv_irq_ops.save_fl),
848 paravirt_clobber(CLBR_EAX)
849 : "memory", "cc");
850 return f;
851} 844}
852 845
853static inline void raw_local_irq_restore(unsigned long f) 846static inline void raw_local_irq_restore(unsigned long f)
854{ 847{
855 asm volatile(paravirt_alt(PARAVIRT_CALL) 848 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
856 : "=a"(f)
857 : PV_FLAGS_ARG(f),
858 paravirt_type(pv_irq_ops.restore_fl),
859 paravirt_clobber(CLBR_EAX)
860 : "memory", "cc");
861} 849}
862 850
863static inline void raw_local_irq_disable(void) 851static inline void raw_local_irq_disable(void)
864{ 852{
865 asm volatile(paravirt_alt(PARAVIRT_CALL) 853 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
866 :
867 : paravirt_type(pv_irq_ops.irq_disable),
868 paravirt_clobber(CLBR_EAX)
869 : "memory", "eax", "cc");
870} 854}
871 855
872static inline void raw_local_irq_enable(void) 856static inline void raw_local_irq_enable(void)
873{ 857{
874 asm volatile(paravirt_alt(PARAVIRT_CALL) 858 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
875 :
876 : paravirt_type(pv_irq_ops.irq_enable),
877 paravirt_clobber(CLBR_EAX)
878 : "memory", "eax", "cc");
879} 859}
880 860
881static inline unsigned long __raw_local_irq_save(void) 861static inline unsigned long __raw_local_irq_save(void)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index dd0f5b32489d..9357473c8da0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void);
494#define EXTRA_CLOBBERS 494#define EXTRA_CLOBBERS
495#define VEXTRA_CLOBBERS 495#define VEXTRA_CLOBBERS
496#else /* CONFIG_X86_64 */ 496#else /* CONFIG_X86_64 */
497/* [re]ax isn't an arg, but the return val */
497#define PVOP_VCALL_ARGS \ 498#define PVOP_VCALL_ARGS \
498 unsigned long __edi = __edi, __esi = __esi, \ 499 unsigned long __edi = __edi, __esi = __esi, \
499 __edx = __edx, __ecx = __ecx 500 __edx = __edx, __ecx = __ecx, __eax = __eax
500#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax 501#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
501 502
502#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) 503#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
503#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) 504#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void);
509 "=c" (__ecx) 510 "=c" (__ecx)
510#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) 511#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
511 512
513/* void functions are still allowed [re]ax for scratch */
512#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) 514#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
513#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS 515#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
514 516
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void);
583 VEXTRA_CLOBBERS, \ 585 VEXTRA_CLOBBERS, \
584 pre, post, ##__VA_ARGS__) 586 pre, post, ##__VA_ARGS__)
585 587
586#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ 588#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
587 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ 589 ____PVOP_VCALL(op.func, CLBR_RET_REG, \
588 PVOP_VCALLEE_CLOBBERS, , \ 590 PVOP_VCALLEE_CLOBBERS, , \
589 pre, post, ##__VA_ARGS__) 591 pre, post, ##__VA_ARGS__)
590 592
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b467bf3c680..d1f4a760be23 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -277,6 +277,7 @@ static inline pteval_t pte_flags(pte_t pte)
277typedef struct page *pgtable_t; 277typedef struct page *pgtable_t;
278 278
279extern pteval_t __supported_pte_mask; 279extern pteval_t __supported_pte_mask;
280extern void set_nx(void);
280extern int nx_enabled; 281extern int nx_enabled;
281 282
282#define pgprot_writecombine pgprot_writecombine 283#define pgprot_writecombine pgprot_writecombine
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c3429e8b2424..c9786480f0fe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -1000,7 +1000,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
1000#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) 1000#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
1001 1001
1002#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) 1002#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
1003#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ 1003extern unsigned long KSTK_ESP(struct task_struct *task);
1004#endif /* CONFIG_X86_64 */ 1004#endif /* CONFIG_X86_64 */
1005 1005
1006extern void start_thread(struct pt_regs *regs, unsigned long new_ip, 1006extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6f0695d744bf..40e37b10c6c0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -143,6 +143,7 @@ extern unsigned long node_remap_size[];
143 | 1*SD_BALANCE_FORK \ 143 | 1*SD_BALANCE_FORK \
144 | 0*SD_BALANCE_WAKE \ 144 | 0*SD_BALANCE_WAKE \
145 | 1*SD_WAKE_AFFINE \ 145 | 1*SD_WAKE_AFFINE \
146 | 0*SD_PREFER_LOCAL \
146 | 0*SD_SHARE_CPUPOWER \ 147 | 0*SD_SHARE_CPUPOWER \
147 | 0*SD_POWERSAVINGS_BALANCE \ 148 | 0*SD_POWERSAVINGS_BALANCE \
148 | 0*SD_SHARE_PKG_RESOURCES \ 149 | 0*SD_SHARE_PKG_RESOURCES \
@@ -165,21 +166,11 @@ static inline int numa_node_id(void)
165 return 0; 166 return 0;
166} 167}
167 168
168static inline int cpu_to_node(int cpu)
169{
170 return 0;
171}
172
173static inline int early_cpu_to_node(int cpu) 169static inline int early_cpu_to_node(int cpu)
174{ 170{
175 return 0; 171 return 0;
176} 172}
177 173
178static inline const struct cpumask *cpumask_of_node(int node)
179{
180 return cpu_online_mask;
181}
182
183static inline void setup_node_to_cpumask_map(void) { } 174static inline void setup_node_to_cpumask_map(void) { }
184 175
185#endif 176#endif
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 04eb6c958b9d..d1414af98559 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -19,6 +19,8 @@
19#include <asm/types.h> 19#include <asm/types.h>
20#include <asm/percpu.h> 20#include <asm/percpu.h>
21#include <asm/uv/uv_mmrs.h> 21#include <asm/uv/uv_mmrs.h>
22#include <asm/irq_vectors.h>
23#include <asm/io_apic.h>
22 24
23 25
24/* 26/*
@@ -114,7 +116,7 @@
114/* 116/*
115 * The largest possible NASID of a C or M brick (+ 2) 117 * The largest possible NASID of a C or M brick (+ 2)
116 */ 118 */
117#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) 119#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
118 120
119struct uv_scir_s { 121struct uv_scir_s {
120 struct timer_list timer; 122 struct timer_list timer;
@@ -230,6 +232,20 @@ static inline unsigned long uv_gpa(void *v)
230 return uv_soc_phys_ram_to_gpa(__pa(v)); 232 return uv_soc_phys_ram_to_gpa(__pa(v));
231} 233}
232 234
235/* gnode -> pnode */
236static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
237{
238 return gpa >> uv_hub_info->m_val;
239}
240
241/* gpa -> pnode */
242static inline int uv_gpa_to_pnode(unsigned long gpa)
243{
244 unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1;
245
246 return uv_gpa_to_gnode(gpa) & n_mask;
247}
248
233/* pnode, offset --> socket virtual */ 249/* pnode, offset --> socket virtual */
234static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) 250static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
235{ 251{
@@ -421,9 +437,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
421static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) 437static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
422{ 438{
423 unsigned long val; 439 unsigned long val;
440 unsigned long dmode = dest_Fixed;
441
442 if (vector == NMI_VECTOR)
443 dmode = dest_NMI;
424 444
425 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 445 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
426 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | 446 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
447 (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
427 (vector << UVH_IPI_INT_VECTOR_SHFT); 448 (vector << UVH_IPI_INT_VECTOR_SHFT);
428 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 449 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
429} 450}
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8c44c232efcb..59cdfa4686b2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
48 * P4, Core and beyond CPUs 48 * P4, Core and beyond CPUs
49 */ 49 */
50 if (c->x86_vendor == X86_VENDOR_INTEL && 50 if (c->x86_vendor == X86_VENDOR_INTEL &&
51 (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14))) 51 (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14)))
52 flags->bm_control = 0; 52 flags->bm_control = 0;
53} 53}
54EXPORT_SYMBOL(acpi_processor_power_init_bm_check); 54EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 7da00b799cda..060fff8f5c5b 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -57,5 +57,8 @@ SECTIONS
57 *(.note*) 57 *(.note*)
58 } 58 }
59 59
60 /*
61 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
62 */
60 . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); 63 . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
61} 64}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 98f230f6a28d..0285521e0a99 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1220,6 +1220,8 @@ static void __detach_device(struct protection_domain *domain, u16 devid)
1220 amd_iommu_dev_table[devid].data[1] = 0; 1220 amd_iommu_dev_table[devid].data[1] = 0;
1221 amd_iommu_dev_table[devid].data[2] = 0; 1221 amd_iommu_dev_table[devid].data[2] = 0;
1222 1222
1223 amd_iommu_apply_erratum_63(devid);
1224
1223 /* decrease reference counter */ 1225 /* decrease reference counter */
1224 domain->dev_cnt -= 1; 1226 domain->dev_cnt -= 1;
1225 1227
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index b4b61d462dcc..c20001e4f556 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -240,7 +240,7 @@ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
240 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); 240 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
241} 241}
242 242
243static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) 243static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
244{ 244{
245 u32 ctrl; 245 u32 ctrl;
246 246
@@ -519,6 +519,26 @@ static void set_dev_entry_bit(u16 devid, u8 bit)
519 amd_iommu_dev_table[devid].data[i] |= (1 << _bit); 519 amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
520} 520}
521 521
522static int get_dev_entry_bit(u16 devid, u8 bit)
523{
524 int i = (bit >> 5) & 0x07;
525 int _bit = bit & 0x1f;
526
527 return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
528}
529
530
531void amd_iommu_apply_erratum_63(u16 devid)
532{
533 int sysmgt;
534
535 sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
536 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
537
538 if (sysmgt == 0x01)
539 set_dev_entry_bit(devid, DEV_ENTRY_IW);
540}
541
522/* Writes the specific IOMMU for a device into the rlookup table */ 542/* Writes the specific IOMMU for a device into the rlookup table */
523static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) 543static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
524{ 544{
@@ -547,6 +567,8 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
547 if (flags & ACPI_DEVFLAG_LINT1) 567 if (flags & ACPI_DEVFLAG_LINT1)
548 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); 568 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
549 569
570 amd_iommu_apply_erratum_63(devid);
571
550 set_iommu_for_device(iommu, devid); 572 set_iommu_for_device(iommu, devid);
551} 573}
552 574
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f5f5886a6b53..326c25477d3d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -352,14 +352,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
352 352
353 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { 353 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
354 alias.v = uv_read_local_mmr(redir_addrs[i].alias); 354 alias.v = uv_read_local_mmr(redir_addrs[i].alias);
355 if (alias.s.base == 0) { 355 if (alias.s.enable && alias.s.base == 0) {
356 *size = (1UL << alias.s.m_alias); 356 *size = (1UL << alias.s.m_alias);
357 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); 357 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
358 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; 358 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
359 return; 359 return;
360 } 360 }
361 } 361 }
362 BUG(); 362 *base = *size = 0;
363} 363}
364 364
365enum map_type {map_wb, map_uc}; 365enum map_type {map_wb, map_uc};
@@ -619,12 +619,12 @@ void __init uv_system_init(void)
619 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 619 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
620 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; 620 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
621 uv_cpu_hub_info(cpu)->m_val = m_val; 621 uv_cpu_hub_info(cpu)->m_val = m_val;
622 uv_cpu_hub_info(cpu)->n_val = m_val; 622 uv_cpu_hub_info(cpu)->n_val = n_val;
623 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 623 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
624 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 624 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
625 uv_cpu_hub_info(cpu)->pnode = pnode; 625 uv_cpu_hub_info(cpu)->pnode = pnode;
626 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; 626 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
627 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 627 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
628 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 628 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
629 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 629 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
630 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 630 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 7d5c3b0ea8da..8b581d3905cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -526,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = {
526 526
527static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) 527static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
528{ 528{
529 /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf 529 /* Intel Xeon Processor 7100 Series Specification Update
530 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
530 * AL30: A Machine Check Exception (MCE) Occurring during an 531 * AL30: A Machine Check Exception (MCE) Occurring during an
531 * Enhanced Intel SpeedStep Technology Ratio Change May Cause 532 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
532 * Both Processor Cores to Lock Up when HT is enabled*/ 533 * Both Processor Cores to Lock Up. */
533 if (c->x86_vendor == X86_VENDOR_INTEL) { 534 if (c->x86_vendor == X86_VENDOR_INTEL) {
534 if ((c->x86 == 15) && 535 if ((c->x86 == 15) &&
535 (c->x86_model == 6) && 536 (c->x86_model == 6) &&
536 (c->x86_mask == 8) && smt_capable()) 537 (c->x86_mask == 8)) {
538 printk(KERN_INFO "acpi-cpufreq: Intel(R) "
539 "Xeon(R) 7100 Errata AL30, processors may "
540 "lock up on frequency changes: disabling "
541 "acpi-cpufreq.\n");
537 return -ENODEV; 542 return -ENODEV;
543 }
538 } 544 }
539 return 0; 545 return 0;
540} 546}
@@ -549,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
549 unsigned int result = 0; 555 unsigned int result = 0;
550 struct cpuinfo_x86 *c = &cpu_data(policy->cpu); 556 struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
551 struct acpi_processor_performance *perf; 557 struct acpi_processor_performance *perf;
558#ifdef CONFIG_SMP
559 static int blacklisted;
560#endif
552 561
553 dprintk("acpi_cpufreq_cpu_init\n"); 562 dprintk("acpi_cpufreq_cpu_init\n");
554 563
555#ifdef CONFIG_SMP 564#ifdef CONFIG_SMP
556 result = acpi_cpufreq_blacklist(c); 565 if (blacklisted)
557 if (result) 566 return blacklisted;
558 return result; 567 blacklisted = acpi_cpufreq_blacklist(c);
568 if (blacklisted)
569 return blacklisted;
559#endif 570#endif
560 571
561 data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); 572 data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index ce2ed3e4aad9..cabd2fa3fc93 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
813 memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); 813 memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
814 break; 814 break;
815 case 1 ... 15: 815 case 1 ... 15:
816 longhaul_version = TYPE_LONGHAUL_V1; 816 longhaul_version = TYPE_LONGHAUL_V2;
817 if (c->x86_mask < 8) { 817 if (c->x86_mask < 8) {
818 cpu_model = CPU_SAMUEL2; 818 cpu_model = CPU_SAMUEL2;
819 cpuname = "C3 'Samuel 2' [C5B]"; 819 cpuname = "C3 'Samuel 2' [C5B]";
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6394aa5c7985..3f12dabeab52 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data)
1022 * set it to 1 to avoid problems in the future. 1022 * set it to 1 to avoid problems in the future.
1023 * For all others it's a BIOS bug. 1023 * For all others it's a BIOS bug.
1024 */ 1024 */
1025 if (!boot_cpu_data.x86 == 0x11) 1025 if (boot_cpu_data.x86 != 0x11)
1026 printk(KERN_ERR FW_WARN PFX "Invalid zero transition " 1026 printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1027 "latency\n"); 1027 "latency\n");
1028 max_latency = 1; 1028 max_latency = 1;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 6911e91fb4f6..3ae5a7a3a500 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void)
232 return 0; 232 return 0;
233} 233}
234 234
235struct get_freq_data { 235static void get_freq_data(void *_speed)
236 unsigned int speed;
237 unsigned int processor;
238};
239
240static void get_freq_data(void *_data)
241{ 236{
242 struct get_freq_data *data = _data; 237 unsigned int *speed = _speed;
243 238
244 data->speed = speedstep_get_frequency(data->processor); 239 *speed = speedstep_get_frequency(speedstep_processor);
245} 240}
246 241
247static unsigned int speedstep_get(unsigned int cpu) 242static unsigned int speedstep_get(unsigned int cpu)
248{ 243{
249 struct get_freq_data data = { .processor = cpu }; 244 unsigned int speed;
250 245
251 /* You're supposed to ensure CPU is online. */ 246 /* You're supposed to ensure CPU is online. */
252 if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) 247 if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
253 BUG(); 248 BUG();
254 249
255 dprintk("detected %u kHz as current frequency\n", data.speed); 250 dprintk("detected %u kHz as current frequency\n", speed);
256 return data.speed; 251 return speed;
257} 252}
258 253
259/** 254/**
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7029f0e2acad..472763d92098 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -98,8 +98,9 @@ static struct notifier_block mce_raise_nb = {
98}; 98};
99 99
100/* Inject mce on current CPU */ 100/* Inject mce on current CPU */
101static int raise_local(struct mce *m) 101static int raise_local(void)
102{ 102{
103 struct mce *m = &__get_cpu_var(injectm);
103 int context = MCJ_CTX(m->inject_flags); 104 int context = MCJ_CTX(m->inject_flags);
104 int ret = 0; 105 int ret = 0;
105 int cpu = m->extcpu; 106 int cpu = m->extcpu;
@@ -167,12 +168,12 @@ static void raise_mce(struct mce *m)
167 } 168 }
168 cpu_relax(); 169 cpu_relax();
169 } 170 }
170 raise_local(m); 171 raise_local();
171 put_cpu(); 172 put_cpu();
172 put_online_cpus(); 173 put_online_cpus();
173 } else 174 } else
174#endif 175#endif
175 raise_local(m); 176 raise_local();
176} 177}
177 178
178/* Error injection interface */ 179/* Error injection interface */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f5aab26320e..721a77ca8115 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
85static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
86static int cpu_missing; 86static int cpu_missing;
87 87
88static void default_decode_mce(struct mce *m)
89{
90 pr_emerg("No human readable MCE decoding support on this CPU type.\n");
91 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
92}
93
94/*
95 * CPU/chipset specific EDAC code can register a callback here to print
96 * MCE errors in a human-readable form:
97 */
98void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
99EXPORT_SYMBOL(x86_mce_decode_callback);
88 100
89/* MCA banks polled by the period polling timer for corrected events */ 101/* MCA banks polled by the period polling timer for corrected events */
90DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,49 +177,46 @@ void mce_log(struct mce *mce)
165 set_bit(0, &mce_need_notify); 177 set_bit(0, &mce_need_notify);
166} 178}
167 179
168void __weak decode_mce(struct mce *m)
169{
170 return;
171}
172
173static void print_mce(struct mce *m) 180static void print_mce(struct mce *m)
174{ 181{
175 printk(KERN_EMERG 182 pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
176 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
177 m->extcpu, m->mcgstatus, m->bank, m->status); 183 m->extcpu, m->mcgstatus, m->bank, m->status);
184
178 if (m->ip) { 185 if (m->ip) {
179 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", 186 pr_emerg("RIP%s %02x:<%016Lx> ",
180 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 187 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
181 m->cs, m->ip); 188 m->cs, m->ip);
189
182 if (m->cs == __KERNEL_CS) 190 if (m->cs == __KERNEL_CS)
183 print_symbol("{%s}", m->ip); 191 print_symbol("{%s}", m->ip);
184 printk(KERN_CONT "\n"); 192 pr_cont("\n");
185 } 193 }
186 printk(KERN_EMERG "TSC %llx ", m->tsc); 194
195 pr_emerg("TSC %llx ", m->tsc);
187 if (m->addr) 196 if (m->addr)
188 printk(KERN_CONT "ADDR %llx ", m->addr); 197 pr_cont("ADDR %llx ", m->addr);
189 if (m->misc) 198 if (m->misc)
190 printk(KERN_CONT "MISC %llx ", m->misc); 199 pr_cont("MISC %llx ", m->misc);
191 printk(KERN_CONT "\n");
192 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
193 m->cpuvendor, m->cpuid, m->time, m->socketid,
194 m->apicid);
195 200
196 decode_mce(m); 201 pr_cont("\n");
202 pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
203 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
204
205 /*
206 * Print out human-readable details about the MCE error,
207 * (if the CPU has an implementation for that):
208 */
209 x86_mce_decode_callback(m);
197} 210}
198 211
199static void print_mce_head(void) 212static void print_mce_head(void)
200{ 213{
201 printk(KERN_EMERG "\nHARDWARE ERROR\n"); 214 pr_emerg("\nHARDWARE ERROR\n");
202} 215}
203 216
204static void print_mce_tail(void) 217static void print_mce_tail(void)
205{ 218{
206 printk(KERN_EMERG "This is not a software problem!\n" 219 pr_emerg("This is not a software problem!\n");
207#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
208 "Run through mcelog --ascii to decode and contact your hardware vendor\n"
209#endif
210 );
211} 220}
212 221
213#define PANIC_TIMEOUT 5 /* 5 seconds */ 222#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -221,6 +230,7 @@ static atomic_t mce_fake_paniced;
221static void wait_for_panic(void) 230static void wait_for_panic(void)
222{ 231{
223 long timeout = PANIC_TIMEOUT*USEC_PER_SEC; 232 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
233
224 preempt_disable(); 234 preempt_disable();
225 local_irq_enable(); 235 local_irq_enable();
226 while (timeout-- > 0) 236 while (timeout-- > 0)
@@ -288,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
288static int msr_to_offset(u32 msr) 298static int msr_to_offset(u32 msr)
289{ 299{
290 unsigned bank = __get_cpu_var(injectm.bank); 300 unsigned bank = __get_cpu_var(injectm.bank);
301
291 if (msr == rip_msr) 302 if (msr == rip_msr)
292 return offsetof(struct mce, ip); 303 return offsetof(struct mce, ip);
293 if (msr == MSR_IA32_MCx_STATUS(bank)) 304 if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -305,13 +316,25 @@ static int msr_to_offset(u32 msr)
305static u64 mce_rdmsrl(u32 msr) 316static u64 mce_rdmsrl(u32 msr)
306{ 317{
307 u64 v; 318 u64 v;
319
308 if (__get_cpu_var(injectm).finished) { 320 if (__get_cpu_var(injectm).finished) {
309 int offset = msr_to_offset(msr); 321 int offset = msr_to_offset(msr);
322
310 if (offset < 0) 323 if (offset < 0)
311 return 0; 324 return 0;
312 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); 325 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
313 } 326 }
314 rdmsrl(msr, v); 327
328 if (rdmsrl_safe(msr, &v)) {
329 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
330 /*
331 * Return zero in case the access faulted. This should
332 * not happen normally but can happen if the CPU does
333 * something weird, or if the code is buggy.
334 */
335 v = 0;
336 }
337
315 return v; 338 return v;
316} 339}
317 340
@@ -319,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
319{ 342{
320 if (__get_cpu_var(injectm).finished) { 343 if (__get_cpu_var(injectm).finished) {
321 int offset = msr_to_offset(msr); 344 int offset = msr_to_offset(msr);
345
322 if (offset >= 0) 346 if (offset >= 0)
323 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; 347 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
324 return; 348 return;
@@ -415,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
415 m->ip = mce_rdmsrl(rip_msr); 439 m->ip = mce_rdmsrl(rip_msr);
416} 440}
417 441
418#ifdef CONFIG_X86_LOCAL_APIC 442#ifdef CONFIG_X86_LOCAL_APIC
419/* 443/*
420 * Called after interrupts have been reenabled again 444 * Called after interrupts have been reenabled again
421 * when a MCE happened during an interrupts off region 445 * when a MCE happened during an interrupts off region
@@ -1172,6 +1196,7 @@ static int mce_banks_init(void)
1172 return -ENOMEM; 1196 return -ENOMEM;
1173 for (i = 0; i < banks; i++) { 1197 for (i = 0; i < banks; i++) {
1174 struct mce_bank *b = &mce_banks[i]; 1198 struct mce_bank *b = &mce_banks[i];
1199
1175 b->ctl = -1ULL; 1200 b->ctl = -1ULL;
1176 b->init = 1; 1201 b->init = 1;
1177 } 1202 }
@@ -1189,7 +1214,8 @@ static int __cpuinit mce_cap_init(void)
1189 rdmsrl(MSR_IA32_MCG_CAP, cap); 1214 rdmsrl(MSR_IA32_MCG_CAP, cap);
1190 1215
1191 b = cap & MCG_BANKCNT_MASK; 1216 b = cap & MCG_BANKCNT_MASK;
1192 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); 1217 if (!banks)
1218 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
1193 1219
1194 if (b > MAX_NR_BANKS) { 1220 if (b > MAX_NR_BANKS) {
1195 printk(KERN_WARNING 1221 printk(KERN_WARNING
@@ -1203,6 +1229,7 @@ static int __cpuinit mce_cap_init(void)
1203 banks = b; 1229 banks = b;
1204 if (!mce_banks) { 1230 if (!mce_banks) {
1205 int err = mce_banks_init(); 1231 int err = mce_banks_init();
1232
1206 if (err) 1233 if (err)
1207 return err; 1234 return err;
1208 } 1235 }
@@ -1237,6 +1264,7 @@ static void mce_init(void)
1237 1264
1238 for (i = 0; i < banks; i++) { 1265 for (i = 0; i < banks; i++) {
1239 struct mce_bank *b = &mce_banks[i]; 1266 struct mce_bank *b = &mce_banks[i];
1267
1240 if (!b->init) 1268 if (!b->init)
1241 continue; 1269 continue;
1242 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1270 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
@@ -1626,6 +1654,7 @@ static int mce_disable(void)
1626 1654
1627 for (i = 0; i < banks; i++) { 1655 for (i = 0; i < banks; i++) {
1628 struct mce_bank *b = &mce_banks[i]; 1656 struct mce_bank *b = &mce_banks[i];
1657
1629 if (b->init) 1658 if (b->init)
1630 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1659 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1631 } 1660 }
@@ -1911,6 +1940,7 @@ static void mce_disable_cpu(void *h)
1911 cmci_clear(); 1940 cmci_clear();
1912 for (i = 0; i < banks; i++) { 1941 for (i = 0; i < banks; i++) {
1913 struct mce_bank *b = &mce_banks[i]; 1942 struct mce_bank *b = &mce_banks[i];
1943
1914 if (b->init) 1944 if (b->init)
1915 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1945 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1916 } 1946 }
@@ -1928,6 +1958,7 @@ static void mce_reenable_cpu(void *h)
1928 cmci_reenable(); 1958 cmci_reenable();
1929 for (i = 0; i < banks; i++) { 1959 for (i = 0; i < banks; i++) {
1930 struct mce_bank *b = &mce_banks[i]; 1960 struct mce_bank *b = &mce_banks[i];
1961
1931 if (b->init) 1962 if (b->init)
1932 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1963 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1933 } 1964 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 889f665fe93d..7c785634af2b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -8,6 +8,7 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/percpu.h> 10#include <linux/percpu.h>
11#include <linux/sched.h>
11#include <asm/apic.h> 12#include <asm/apic.h>
12#include <asm/processor.h> 13#include <asm/processor.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 63a56d147e4a..b3a1dba75330 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -34,20 +34,31 @@
34/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
35#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37/*
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38 * Current thermal throttling state:
39static DEFINE_PER_CPU(bool, thermal_throttle_active); 39 */
40struct thermal_state {
41 bool is_throttled;
42
43 u64 next_check;
44 unsigned long throttle_count;
45 unsigned long last_throttle_count;
46};
47
48static DEFINE_PER_CPU(struct thermal_state, thermal_state);
40 49
41static atomic_t therm_throt_en = ATOMIC_INIT(0); 50static atomic_t therm_throt_en = ATOMIC_INIT(0);
42 51
43#ifdef CONFIG_SYSFS 52#ifdef CONFIG_SYSFS
44#define define_therm_throt_sysdev_one_ro(_name) \ 53#define define_therm_throt_sysdev_one_ro(_name) \
45 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) 54 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
46 55
47#define define_therm_throt_sysdev_show_func(name) \ 56#define define_therm_throt_sysdev_show_func(name) \
48static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ 57 \
49 struct sysdev_attribute *attr, \ 58static ssize_t therm_throt_sysdev_show_##name( \
50 char *buf) \ 59 struct sys_device *dev, \
60 struct sysdev_attribute *attr, \
61 char *buf) \
51{ \ 62{ \
52 unsigned int cpu = dev->id; \ 63 unsigned int cpu = dev->id; \
53 ssize_t ret; \ 64 ssize_t ret; \
@@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
55 preempt_disable(); /* CPU hotplug */ \ 66 preempt_disable(); /* CPU hotplug */ \
56 if (cpu_online(cpu)) \ 67 if (cpu_online(cpu)) \
57 ret = sprintf(buf, "%lu\n", \ 68 ret = sprintf(buf, "%lu\n", \
58 per_cpu(thermal_throttle_##name, cpu)); \ 69 per_cpu(thermal_state, cpu).name); \
59 else \ 70 else \
60 ret = 0; \ 71 ret = 0; \
61 preempt_enable(); \ 72 preempt_enable(); \
@@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
63 return ret; \ 74 return ret; \
64} 75}
65 76
66define_therm_throt_sysdev_show_func(count); 77define_therm_throt_sysdev_show_func(throttle_count);
67define_therm_throt_sysdev_one_ro(count); 78define_therm_throt_sysdev_one_ro(throttle_count);
68 79
69static struct attribute *thermal_throttle_attrs[] = { 80static struct attribute *thermal_throttle_attrs[] = {
70 &attr_count.attr, 81 &attr_throttle_count.attr,
71 NULL 82 NULL
72}; 83};
73 84
@@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = {
93 * 1 : Event should be logged further, and a message has been 104 * 1 : Event should be logged further, and a message has been
94 * printed to the syslog. 105 * printed to the syslog.
95 */ 106 */
96static int therm_throt_process(int curr) 107static int therm_throt_process(bool is_throttled)
97{ 108{
98 unsigned int cpu = smp_processor_id(); 109 struct thermal_state *state;
99 __u64 tmp_jiffs = get_jiffies_64(); 110 unsigned int this_cpu;
100 bool was_throttled = __get_cpu_var(thermal_throttle_active); 111 bool was_throttled;
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; 112 u64 now;
113
114 this_cpu = smp_processor_id();
115 now = get_jiffies_64();
116 state = &per_cpu(thermal_state, this_cpu);
117
118 was_throttled = state->is_throttled;
119 state->is_throttled = is_throttled;
102 120
103 if (is_throttled) 121 if (is_throttled)
104 __get_cpu_var(thermal_throttle_count)++; 122 state->throttle_count++;
105 123
106 if (!(was_throttled ^ is_throttled) && 124 if (time_before64(now, state->next_check) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check))) 125 state->throttle_count != state->last_throttle_count)
108 return 0; 126 return 0;
109 127
110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 128 state->next_check = now + CHECK_INTERVAL;
129 state->last_throttle_count = state->throttle_count;
111 130
112 /* if we just entered the thermal event */ 131 /* if we just entered the thermal event */
113 if (is_throttled) { 132 if (is_throttled) {
114 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 133 printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count);
115 "cpu clock throttled (total events = %lu)\n",
116 cpu, __get_cpu_var(thermal_throttle_count));
117 134
118 add_taint(TAINT_MACHINE_CHECK); 135 add_taint(TAINT_MACHINE_CHECK);
119 return 1; 136 return 1;
120 } 137 }
121 if (was_throttled) { 138 if (was_throttled) {
122 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); 139 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu);
123 return 1; 140 return 1;
124 } 141 }
125 142
@@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void)
213 __u64 msr_val; 230 __u64 msr_val;
214 231
215 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 232 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
216 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) 233 if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0))
217 mce_log_therm_throt_event(msr_val); 234 mce_log_therm_throt_event(msr_val);
218} 235}
219 236
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 315738c74aad..73c86db5acbe 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -846,7 +846,7 @@ int __init mtrr_cleanup(unsigned address_bits)
846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
847 847
848 range_sums = sum_ranges(range, nr_range); 848 range_sums = sum_ranges(range, nr_range);
849 printk(KERN_INFO "total RAM coverred: %ldM\n", 849 printk(KERN_INFO "total RAM covered: %ldM\n",
850 range_sums >> (20 - PAGE_SHIFT)); 850 range_sums >> (20 - PAGE_SHIFT));
851 851
852 if (mtrr_chunk_size && mtrr_gran_size) { 852 if (mtrr_chunk_size && mtrr_gran_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index f04e72527604..3c1b12d461d1 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
96 unsigned long long base, size; 96 unsigned long long base, size;
97 char *ptr; 97 char *ptr;
98 char line[LINE_SIZE]; 98 char line[LINE_SIZE];
99 int length;
99 size_t linelen; 100 size_t linelen;
100 101
101 if (!capable(CAP_SYS_ADMIN)) 102 if (!capable(CAP_SYS_ADMIN))
102 return -EPERM; 103 return -EPERM;
103 if (!len)
104 return -EINVAL;
105 104
106 memset(line, 0, LINE_SIZE); 105 memset(line, 0, LINE_SIZE);
107 if (len > LINE_SIZE) 106
108 len = LINE_SIZE; 107 length = len;
109 if (copy_from_user(line, buf, len - 1)) 108 length--;
109
110 if (length > LINE_SIZE - 1)
111 length = LINE_SIZE - 1;
112
113 if (length < 0)
114 return -EINVAL;
115
116 if (copy_from_user(line, buf, length))
110 return -EFAULT; 117 return -EFAULT;
111 118
112 linelen = strlen(line); 119 linelen = strlen(line);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a3c7adb06b78..b5801c311846 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1790,6 +1790,9 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1790void set_perf_event_pending(void) 1790void set_perf_event_pending(void)
1791{ 1791{
1792#ifdef CONFIG_X86_LOCAL_APIC 1792#ifdef CONFIG_X86_LOCAL_APIC
1793 if (!x86_pmu.apic || !x86_pmu_initialized())
1794 return;
1795
1793 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1796 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1794#endif 1797#endif
1795} 1798}
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index f7cdb3b457aa..cd97ce18c29d 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -16,6 +16,22 @@ static void *kdump_buf_page;
16/* Stores the physical address of elf header of crash image. */ 16/* Stores the physical address of elf header of crash image. */
17unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; 17unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
18 18
19static inline bool is_crashed_pfn_valid(unsigned long pfn)
20{
21#ifndef CONFIG_X86_PAE
22 /*
23 * non-PAE kdump kernel executed from a PAE one will crop high pte
24 * bits and poke unwanted space counting again from address 0, we
25 * don't want that. pte must fit into unsigned long. In fact the
26 * test checks high 12 bits for being zero (pfn will be shifted left
27 * by PAGE_SHIFT).
28 */
29 return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn;
30#else
31 return true;
32#endif
33}
34
19/** 35/**
20 * copy_oldmem_page - copy one page from "oldmem" 36 * copy_oldmem_page - copy one page from "oldmem"
21 * @pfn: page frame number to be copied 37 * @pfn: page frame number to be copied
@@ -41,6 +57,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
41 if (!csize) 57 if (!csize)
42 return 0; 58 return 0;
43 59
60 if (!is_crashed_pfn_valid(pfn))
61 return -EFAULT;
62
44 vaddr = kmap_atomic_pfn(pfn, KM_PTE0); 63 vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
45 64
46 if (!userbuf) { 65 if (!userbuf) {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 85419bb7d4ab..d17d482a04f4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1378 if (mb < 16) 1378 if (mb < 16)
1379 return 1024*1024; 1379 return 1024*1024;
1380 1380
1381 /* To 32MB for anything above that */ 1381 /* To 64MB for anything above that */
1382 return 32*1024*1024; 1382 return 64*1024*1024;
1383} 1383}
1384 1384
1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1) 1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 2acfd3fdc0cc..b9c830c12b4a 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -178,6 +178,11 @@ asmlinkage void early_printk(const char *fmt, ...)
178 178
179static inline void early_console_register(struct console *con, int keep_early) 179static inline void early_console_register(struct console *con, int keep_early)
180{ 180{
181 if (early_console->index != -1) {
182 printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
183 con->name);
184 return;
185 }
181 early_console = con; 186 early_console = con;
182 if (keep_early) 187 if (keep_early)
183 early_console->flags &= ~CON_BOOT; 188 early_console->flags &= ~CON_BOOT;
@@ -201,8 +206,11 @@ static int __init setup_early_printk(char *buf)
201 206
202 while (*buf != '\0') { 207 while (*buf != '\0') {
203 if (!strncmp(buf, "serial", 6)) { 208 if (!strncmp(buf, "serial", 6)) {
204 early_serial_init(buf + 6); 209 buf += 6;
210 early_serial_init(buf);
205 early_console_register(&early_serial_console, keep); 211 early_console_register(&early_serial_console, keep);
212 if (!strncmp(buf, ",ttyS", 5))
213 buf += 5;
206 } 214 }
207 if (!strncmp(buf, "ttyS", 4)) { 215 if (!strncmp(buf, "ttyS", 4)) {
208 early_serial_init(buf + 4); 216 early_serial_init(buf + 4);
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index ad5bd988fb79..cdcfb122f256 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -454,8 +454,10 @@ void __init efi_init(void)
454 if (add_efi_memmap) 454 if (add_efi_memmap)
455 do_add_efi_memmap(); 455 do_add_efi_memmap();
456 456
457#ifdef CONFIG_X86_32
457 x86_platform.get_wallclock = efi_get_time; 458 x86_platform.get_wallclock = efi_get_time;
458 x86_platform.set_wallclock = efi_set_rtc_mmss; 459 x86_platform.set_wallclock = efi_set_rtc_mmss;
460#endif
459 461
460 /* Setup for EFI runtime service */ 462 /* Setup for EFI runtime service */
461 reboot_type = BOOT_EFI; 463 reboot_type = BOOT_EFI;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 218aad7ee76e..050c278481b1 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
79 * any particular GDT layout, because we load our own as soon as we 79 * any particular GDT layout, because we load our own as soon as we
80 * can. 80 * can.
81 */ 81 */
82.section .text.head,"ax",@progbits 82__HEAD
83ENTRY(startup_32) 83ENTRY(startup_32)
84 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 84 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
85 us to not reload segments */ 85 us to not reload segments */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d0bc0a13a437..780cd928fcd5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map)
40L3_START_KERNEL = pud_index(__START_KERNEL_map) 40L3_START_KERNEL = pud_index(__START_KERNEL_map)
41 41
42 .text 42 .text
43 .section .text.head 43 __HEAD
44 .code64 44 .code64
45 .globl startup_64 45 .globl startup_64
46startup_64: 46startup_64:
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 43cec6bdda63..9c3bd4a2050e 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -10,6 +10,16 @@
10EXPORT_SYMBOL(mcount); 10EXPORT_SYMBOL(mcount);
11#endif 11#endif
12 12
13/*
14 * Note, this is a prototype to get at the symbol for
15 * the export, but dont use it from C code, it is used
16 * by assembly code and is not using C calling convention!
17 */
18#ifndef CONFIG_X86_CMPXCHG64
19extern void cmpxchg8b_emu(void);
20EXPORT_SYMBOL(cmpxchg8b_emu);
21#endif
22
13/* Networking helper routines. */ 23/* Networking helper routines. */
14EXPORT_SYMBOL(csum_partial_copy_generic); 24EXPORT_SYMBOL(csum_partial_copy_generic);
15 25
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 74656d1d4e30..04bbd5278568 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
63 for_each_online_cpu(j) 63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n"); 65 seq_printf(p, " Spurious interrupts\n");
66 seq_printf(p, "%*s: ", prec, "CNT"); 66 seq_printf(p, "%*s: ", prec, "PMI");
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance counter interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "PND");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 366baa179913..f4c538b681ca 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -317,6 +317,12 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
317 return UCODE_NFOUND; 317 return UCODE_NFOUND;
318 } 318 }
319 319
320 if (*(u32 *)firmware->data != UCODE_MAGIC) {
321 printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n",
322 *(u32 *)firmware->data);
323 return UCODE_ERROR;
324 }
325
320 ret = generic_load_microcode(cpu, firmware->data, firmware->size); 326 ret = generic_load_microcode(cpu, firmware->data, firmware->size);
321 327
322 release_firmware(firmware); 328 release_firmware(firmware);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 64b838eac18c..a6e804d16c35 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
35 35
36/* 36/*
37 * This variable becomes 1 if iommu=pt is passed on the kernel command line. 37 * This variable becomes 1 if iommu=pt is passed on the kernel command line.
38 * If this variable is 1, IOMMU implementations do no DMA ranslation for 38 * If this variable is 1, IOMMU implementations do no DMA translation for
39 * devices and allow every device to access to whole physical memory. This is 39 * devices and allow every device to access to whole physical memory. This is
40 * useful if a user want to use an IOMMU only for KVM device assignment to 40 * useful if a user want to use an IOMMU only for KVM device assignment to
41 * guests and not for driver dma translation. 41 * guests and not for driver dma translation.
@@ -45,12 +45,10 @@ int iommu_pass_through __read_mostly;
45dma_addr_t bad_dma_address __read_mostly = 0; 45dma_addr_t bad_dma_address __read_mostly = 0;
46EXPORT_SYMBOL(bad_dma_address); 46EXPORT_SYMBOL(bad_dma_address);
47 47
48/* Dummy device used for NULL arguments (normally ISA). Better would 48/* Dummy device used for NULL arguments (normally ISA). */
49 be probably a smaller DMA mask, but this is bug-to-bug compatible
50 to older i386. */
51struct device x86_dma_fallback_dev = { 49struct device x86_dma_fallback_dev = {
52 .init_name = "fallback device", 50 .init_name = "fallback device",
53 .coherent_dma_mask = DMA_BIT_MASK(32), 51 .coherent_dma_mask = ISA_DMA_BIT_MASK,
54 .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, 52 .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
55}; 53};
56EXPORT_SYMBOL(x86_dma_fallback_dev); 54EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -311,7 +309,7 @@ void pci_iommu_shutdown(void)
311 amd_iommu_shutdown(); 309 amd_iommu_shutdown();
312} 310}
313/* Must execute after PCI subsystem */ 311/* Must execute after PCI subsystem */
314fs_initcall(pci_iommu_init); 312rootfs_initcall(pci_iommu_init);
315 313
316#ifdef CONFIG_PCI 314#ifdef CONFIG_PCI
317/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 315/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 98a827ee9ed7..a7f1b64f86e0 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -16,6 +16,7 @@
16#include <linux/agp_backend.h> 16#include <linux/agp_backend.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/sched.h>
19#include <linux/string.h> 20#include <linux/string.h>
20#include <linux/spinlock.h> 21#include <linux/spinlock.h>
21#include <linux/pci.h> 22#include <linux/pci.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ad535b683170..eb62cbcaa490 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -664,3 +664,8 @@ long sys_arch_prctl(int code, unsigned long addr)
664 return do_arch_prctl(current, code, addr); 664 return do_arch_prctl(current, code, addr);
665} 665}
666 666
667unsigned long KSTK_ESP(struct task_struct *task)
668{
669 return (test_tsk_thread_flag(task, TIF_IA32)) ?
670 (task_pt_regs(task)->sp) : ((task)->thread.usersp);
671}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 27349f92a6d7..f93078746e00 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/efi.h> 5#include <linux/efi.h>
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/sched.h>
7#include <linux/tboot.h> 8#include <linux/tboot.h>
8#include <acpi/reboot.h> 9#include <acpi/reboot.h>
9#include <asm/io.h> 10#include <asm/io.h>
@@ -435,6 +436,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
435 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), 436 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
436 }, 437 },
437 }, 438 },
439 { /* Handle problems with rebooting on Apple Macmini3,1 */
440 .callback = set_pci_reboot,
441 .ident = "Apple Macmini3,1",
442 .matches = {
443 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
444 DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
445 },
446 },
438 { } 447 { }
439}; 448};
440 449
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e09f0e2c14b5..2a34f9c5be21 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -660,6 +660,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
660 }, 660 },
661 }, 661 },
662 { 662 {
663 .callback = dmi_low_memory_corruption,
664 .ident = "Phoenix/MSC BIOS",
665 .matches = {
666 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
667 },
668 },
669 {
663 /* 670 /*
664 * AMI BIOS with low memory corruption was found on Intel DG45ID board. 671 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
665 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will 672 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dcb00d278512..be2573448ed9 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
38#ifdef CONFIG_FRAME_POINTER 38#ifdef CONFIG_FRAME_POINTER
39 return *(unsigned long *)(regs->bp + sizeof(long)); 39 return *(unsigned long *)(regs->bp + sizeof(long));
40#else 40#else
41 unsigned long *sp = (unsigned long *)regs->sp; 41 unsigned long *sp =
42 (unsigned long *)kernel_stack_pointer(regs);
42 /* 43 /*
43 * Return address is either directly at stack pointer 44 * Return address is either directly at stack pointer
44 * or above a saved flags. Eflags has bits 22-31 zero, 45 * or above a saved flags. Eflags has bits 22-31 zero,
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 503c1f2e8835..1740c85e24bb 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -23,8 +23,6 @@
23static struct bau_control **uv_bau_table_bases __read_mostly; 23static struct bau_control **uv_bau_table_bases __read_mostly;
24static int uv_bau_retry_limit __read_mostly; 24static int uv_bau_retry_limit __read_mostly;
25 25
26/* position of pnode (which is nasid>>1): */
27static int uv_nshift __read_mostly;
28/* base pnode in this partition */ 26/* base pnode in this partition */
29static int uv_partition_base_pnode __read_mostly; 27static int uv_partition_base_pnode __read_mostly;
30 28
@@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode)
723 BUG_ON(!adp); 721 BUG_ON(!adp);
724 722
725 pa = uv_gpa(adp); /* need the real nasid*/ 723 pa = uv_gpa(adp); /* need the real nasid*/
726 n = pa >> uv_nshift; 724 n = uv_gpa_to_pnode(pa);
727 m = pa & uv_mmask; 725 m = pa & uv_mmask;
728 726
729 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 727 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
@@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
778 * need the pnode of where the memory was really allocated 776 * need the pnode of where the memory was really allocated
779 */ 777 */
780 pa = uv_gpa(pqp); 778 pa = uv_gpa(pqp);
781 pn = pa >> uv_nshift; 779 pn = uv_gpa_to_pnode(pa);
782 uv_write_global_mmr64(pnode, 780 uv_write_global_mmr64(pnode,
783 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, 781 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
784 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | 782 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
@@ -843,8 +841,7 @@ static int __init uv_bau_init(void)
843 GFP_KERNEL, cpu_to_node(cur_cpu)); 841 GFP_KERNEL, cpu_to_node(cur_cpu));
844 842
845 uv_bau_retry_limit = 1; 843 uv_bau_retry_limit = 1;
846 uv_nshift = uv_hub_info->n_val; 844 uv_mmask = (1UL << uv_hub_info->m_val) - 1;
847 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
848 nblades = uv_num_possible_blades(); 845 nblades = uv_num_possible_blades();
849 846
850 uv_bau_table_bases = (struct bau_control **) 847 uv_bau_table_bases = (struct bau_control **)
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 699f7eeb896a..cd022121cab6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -3,8 +3,16 @@
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4#include <asm/e820.h> 4#include <asm/e820.h>
5 5
6#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
7#define __trampinit
8#define __trampinitdata
9#else
10#define __trampinit __cpuinit
11#define __trampinitdata __cpuinitdata
12#endif
13
6/* ready for x86_64 and x86 */ 14/* ready for x86_64 and x86 */
7unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); 15unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
8 16
9void __init reserve_trampoline_memory(void) 17void __init reserve_trampoline_memory(void)
10{ 18{
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void)
26 * bootstrap into the page concerned. The caller 34 * bootstrap into the page concerned. The caller
27 * has made sure it's suitably aligned. 35 * has made sure it's suitably aligned.
28 */ 36 */
29unsigned long __cpuinit setup_trampoline(void) 37unsigned long __trampinit setup_trampoline(void)
30{ 38{
31 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); 39 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
32 return virt_to_phys(trampoline_base); 40 return virt_to_phys(trampoline_base);
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 596d54c660a5..3af2dff58b21 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -32,8 +32,12 @@
32#include <asm/segment.h> 32#include <asm/segment.h>
33#include <asm/processor-flags.h> 33#include <asm/processor-flags.h>
34 34
35#ifdef CONFIG_ACPI_SLEEP
36.section .rodata, "a", @progbits
37#else
35/* We can free up the trampoline after bootup if cpu hotplug is not supported. */ 38/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
36__CPUINITRODATA 39__CPUINITRODATA
40#endif
37.code16 41.code16
38 42
39ENTRY(trampoline_data) 43ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a665c71352b8..7e37dcee0cc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,11 +72,9 @@ char ignore_fpu_irq;
72 72
73/* 73/*
74 * The IDT has to be page-aligned to simplify the Pentium 74 * The IDT has to be page-aligned to simplify the Pentium
75 * F0 0F bug workaround.. We have a special link segment 75 * F0 0F bug workaround.
76 * for this.
77 */ 76 */
78gate_desc idt_table[NR_VECTORS] 77gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
79 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
80#endif 78#endif
81 79
82DECLARE_BITMAP(used_vectors, NR_VECTORS); 80DECLARE_BITMAP(used_vectors, NR_VECTORS);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 027b5b498993..f37930954d15 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
114 return; 114 return;
115 115
116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
117 pr_info("Skipping synchronization checks as TSC is reliable.\n"); 117 printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
118 return; 118 return;
119 } 119 }
120 120
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 31e6f6cfe53e..d430e4c30193 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void)
648 648
649 pv_info.paravirt_enabled = 1; 649 pv_info.paravirt_enabled = 1;
650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; 650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
651 pv_info.name = "vmi"; 651 pv_info.name = "vmi [deprecated]";
652 652
653 pv_init_ops.patch = vmi_patch; 653 pv_init_ops.patch = vmi_patch;
654 654
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a46acccec38a..3c68fe2d46cf 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,17 +65,11 @@ SECTIONS
65#endif 65#endif
66 66
67 /* Text and read-only data */ 67 /* Text and read-only data */
68
69 /* bootstrapping code */
70 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
71 _text = .;
72 *(.text.head)
73 } :text = 0x9090
74
75 /* The rest of the text */
76 .text : AT(ADDR(.text) - LOAD_OFFSET) { 68 .text : AT(ADDR(.text) - LOAD_OFFSET) {
69 _text = .;
70 /* bootstrapping code */
71 HEAD_TEXT
77#ifdef CONFIG_X86_32 72#ifdef CONFIG_X86_32
78 /* not really needed, already page aligned */
79 . = ALIGN(PAGE_SIZE); 73 . = ALIGN(PAGE_SIZE);
80 *(.text.page_aligned) 74 *(.text.page_aligned)
81#endif 75#endif
@@ -94,13 +88,7 @@ SECTIONS
94 88
95 NOTES :text :note 89 NOTES :text :note
96 90
97 /* Exception table */ 91 EXCEPTION_TABLE(16) :text = 0x9090
98 . = ALIGN(16);
99 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
100 __start___ex_table = .;
101 *(__ex_table)
102 __stop___ex_table = .;
103 } :text = 0x9090
104 92
105 RO_DATA(PAGE_SIZE) 93 RO_DATA(PAGE_SIZE)
106 94
@@ -118,7 +106,6 @@ SECTIONS
118#endif 106#endif
119 107
120 PAGE_ALIGNED_DATA(PAGE_SIZE) 108 PAGE_ALIGNED_DATA(PAGE_SIZE)
121 *(.data.idt)
122 109
123 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) 110 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
124 111
@@ -135,24 +122,21 @@ SECTIONS
135#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
136 123
137#define VSYSCALL_ADDR (-10*1024*1024) 124#define VSYSCALL_ADDR (-10*1024*1024)
138#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
139 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
140#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
141 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
142 125
143#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) 126#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
144#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) 127#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
145 128
146#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) 129#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
147#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) 130#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
148 131
132 . = ALIGN(4096);
133 __vsyscall_0 = .;
134
149 . = VSYSCALL_ADDR; 135 . = VSYSCALL_ADDR;
150 .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { 136 .vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
151 *(.vsyscall_0) 137 *(.vsyscall_0)
152 } :user 138 } :user
153 139
154 __vsyscall_0 = VSYSCALL_VIRT_ADDR;
155
156 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 140 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
157 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { 141 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
158 *(.vsyscall_fn) 142 *(.vsyscall_fn)
@@ -192,11 +176,9 @@ SECTIONS
192 *(.vsyscall_3) 176 *(.vsyscall_3)
193 } 177 }
194 178
195 . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; 179 . = __vsyscall_0 + PAGE_SIZE;
196 180
197#undef VSYSCALL_ADDR 181#undef VSYSCALL_ADDR
198#undef VSYSCALL_PHYS_ADDR
199#undef VSYSCALL_VIRT_ADDR
200#undef VLOAD_OFFSET 182#undef VLOAD_OFFSET
201#undef VLOAD 183#undef VLOAD
202#undef VVIRT_OFFSET 184#undef VVIRT_OFFSET
@@ -219,36 +201,12 @@ SECTIONS
219 PERCPU_VADDR(0, :percpu) 201 PERCPU_VADDR(0, :percpu)
220#endif 202#endif
221 203
222 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 204 INIT_TEXT_SECTION(PAGE_SIZE)
223 _sinittext = .;
224 INIT_TEXT
225 _einittext = .;
226 }
227#ifdef CONFIG_X86_64 205#ifdef CONFIG_X86_64
228 :init 206 :init
229#endif 207#endif
230 208
231 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { 209 INIT_DATA_SECTION(16)
232 INIT_DATA
233 }
234
235 . = ALIGN(16);
236 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
237 __setup_start = .;
238 *(.init.setup)
239 __setup_end = .;
240 }
241 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
242 __initcall_start = .;
243 INITCALLS
244 __initcall_end = .;
245 }
246
247 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
248 __con_initcall_start = .;
249 *(.con_initcall.init)
250 __con_initcall_end = .;
251 }
252 210
253 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { 211 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
254 __x86_cpu_dev_start = .; 212 __x86_cpu_dev_start = .;
@@ -256,8 +214,6 @@ SECTIONS
256 __x86_cpu_dev_end = .; 214 __x86_cpu_dev_end = .;
257 } 215 }
258 216
259 SECURITY_INIT
260
261 . = ALIGN(8); 217 . = ALIGN(8);
262 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 218 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
263 __parainstructions = .; 219 __parainstructions = .;
@@ -288,15 +244,6 @@ SECTIONS
288 EXIT_DATA 244 EXIT_DATA
289 } 245 }
290 246
291#ifdef CONFIG_BLK_DEV_INITRD
292 . = ALIGN(PAGE_SIZE);
293 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
294 __initramfs_start = .;
295 *(.init.ramfs)
296 __initramfs_end = .;
297 }
298#endif
299
300#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 247#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
301 PERCPU(PAGE_SIZE) 248 PERCPU(PAGE_SIZE)
302#endif 249#endif
@@ -358,6 +305,9 @@ SECTIONS
358 305
359 306
360#ifdef CONFIG_X86_32 307#ifdef CONFIG_X86_32
308/*
309 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
310 */
361. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), 311. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
362 "kernel image bigger than KERNEL_IMAGE_SIZE"); 312 "kernel image bigger than KERNEL_IMAGE_SIZE");
363#else 313#else
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523b4901..144e7f60b5e2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
116 * itself with the initial count and continues counting 116 * itself with the initial count and continues counting
117 * from there. 117 * from there.
118 */ 118 */
119 remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); 119 remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
120 elapsed = ps->pit_timer.period - ktime_to_ns(remaining); 120 elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
121 elapsed = mod_64(elapsed, ps->pit_timer.period); 121 elapsed = mod_64(elapsed, ps->pit_timer.period);
122 122
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1ae5ceba7eb2..23c217692ea9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
521 if (apic_get_reg(apic, APIC_TMICT) == 0) 521 if (apic_get_reg(apic, APIC_TMICT) == 0)
522 return 0; 522 return 0;
523 523
524 remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); 524 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
525 if (ktime_to_ns(remaining) < 0) 525 if (ktime_to_ns(remaining) < 0)
526 remaining = ktime_set(0, 0); 526 remaining = ktime_set(0, 0);
527 527
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
664{ 664{
665 ktime_t now = apic->lapic_timer.timer.base->get_time(); 665 ktime_t now = apic->lapic_timer.timer.base->get_time();
666 666
667 apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * 667 apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
668 APIC_BUS_CYCLE_NS * apic->divide_count; 668 APIC_BUS_CYCLE_NS * apic->divide_count;
669 atomic_set(&apic->lapic_timer.pending, 0); 669 atomic_set(&apic->lapic_timer.pending, 0);
670 670
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca41ae9f453..818b92ad82cf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
156#define CREATE_TRACE_POINTS 156#define CREATE_TRACE_POINTS
157#include "mmutrace.h" 157#include "mmutrace.h"
158 158
159#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
160
159#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 161#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
160 162
161struct kvm_rmap_desc { 163struct kvm_rmap_desc {
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
634 if (*spte & shadow_accessed_mask) 636 if (*spte & shadow_accessed_mask)
635 kvm_set_pfn_accessed(pfn); 637 kvm_set_pfn_accessed(pfn);
636 if (is_writeble_pte(*spte)) 638 if (is_writeble_pte(*spte))
637 kvm_release_pfn_dirty(pfn); 639 kvm_set_pfn_dirty(pfn);
638 else
639 kvm_release_pfn_clean(pfn);
640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
641 if (!*rmapp) { 641 if (!*rmapp) {
642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
748 return write_protected; 748 return write_protected;
749} 749}
750 750
751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
752 unsigned long data)
752{ 753{
753 u64 *spte; 754 u64 *spte;
754 int need_tlb_flush = 0; 755 int need_tlb_flush = 0;
@@ -763,8 +764,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
763 return need_tlb_flush; 764 return need_tlb_flush;
764} 765}
765 766
767static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
768 unsigned long data)
769{
770 int need_flush = 0;
771 u64 *spte, new_spte;
772 pte_t *ptep = (pte_t *)data;
773 pfn_t new_pfn;
774
775 WARN_ON(pte_huge(*ptep));
776 new_pfn = pte_pfn(*ptep);
777 spte = rmap_next(kvm, rmapp, NULL);
778 while (spte) {
779 BUG_ON(!is_shadow_present_pte(*spte));
780 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
781 need_flush = 1;
782 if (pte_write(*ptep)) {
783 rmap_remove(kvm, spte);
784 __set_spte(spte, shadow_trap_nonpresent_pte);
785 spte = rmap_next(kvm, rmapp, NULL);
786 } else {
787 new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
788 new_spte |= (u64)new_pfn << PAGE_SHIFT;
789
790 new_spte &= ~PT_WRITABLE_MASK;
791 new_spte &= ~SPTE_HOST_WRITEABLE;
792 if (is_writeble_pte(*spte))
793 kvm_set_pfn_dirty(spte_to_pfn(*spte));
794 __set_spte(spte, new_spte);
795 spte = rmap_next(kvm, rmapp, spte);
796 }
797 }
798 if (need_flush)
799 kvm_flush_remote_tlbs(kvm);
800
801 return 0;
802}
803
766static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 804static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
767 int (*handler)(struct kvm *kvm, unsigned long *rmapp)) 805 unsigned long data,
806 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
807 unsigned long data))
768{ 808{
769 int i, j; 809 int i, j;
770 int retval = 0; 810 int retval = 0;
@@ -786,13 +826,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
786 if (hva >= start && hva < end) { 826 if (hva >= start && hva < end) {
787 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 827 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
788 828
789 retval |= handler(kvm, &memslot->rmap[gfn_offset]); 829 retval |= handler(kvm, &memslot->rmap[gfn_offset],
830 data);
790 831
791 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 832 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
792 int idx = gfn_offset; 833 int idx = gfn_offset;
793 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 834 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
794 retval |= handler(kvm, 835 retval |= handler(kvm,
795 &memslot->lpage_info[j][idx].rmap_pde); 836 &memslot->lpage_info[j][idx].rmap_pde,
837 data);
796 } 838 }
797 } 839 }
798 } 840 }
@@ -802,10 +844,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
802 844
803int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 845int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
804{ 846{
805 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 847 return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
806} 848}
807 849
808static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) 850void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
851{
852 kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
853}
854
855static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
856 unsigned long data)
809{ 857{
810 u64 *spte; 858 u64 *spte;
811 int young = 0; 859 int young = 0;
@@ -841,13 +889,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
841 gfn = unalias_gfn(vcpu->kvm, gfn); 889 gfn = unalias_gfn(vcpu->kvm, gfn);
842 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 890 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
843 891
844 kvm_unmap_rmapp(vcpu->kvm, rmapp); 892 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
845 kvm_flush_remote_tlbs(vcpu->kvm); 893 kvm_flush_remote_tlbs(vcpu->kvm);
846} 894}
847 895
848int kvm_age_hva(struct kvm *kvm, unsigned long hva) 896int kvm_age_hva(struct kvm *kvm, unsigned long hva)
849{ 897{
850 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 898 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
851} 899}
852 900
853#ifdef MMU_DEBUG 901#ifdef MMU_DEBUG
@@ -1756,7 +1804,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1756 unsigned pte_access, int user_fault, 1804 unsigned pte_access, int user_fault,
1757 int write_fault, int dirty, int level, 1805 int write_fault, int dirty, int level,
1758 gfn_t gfn, pfn_t pfn, bool speculative, 1806 gfn_t gfn, pfn_t pfn, bool speculative,
1759 bool can_unsync) 1807 bool can_unsync, bool reset_host_protection)
1760{ 1808{
1761 u64 spte; 1809 u64 spte;
1762 int ret = 0; 1810 int ret = 0;
@@ -1783,6 +1831,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1783 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, 1831 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
1784 kvm_is_mmio_pfn(pfn)); 1832 kvm_is_mmio_pfn(pfn));
1785 1833
1834 if (reset_host_protection)
1835 spte |= SPTE_HOST_WRITEABLE;
1836
1786 spte |= (u64)pfn << PAGE_SHIFT; 1837 spte |= (u64)pfn << PAGE_SHIFT;
1787 1838
1788 if ((pte_access & ACC_WRITE_MASK) 1839 if ((pte_access & ACC_WRITE_MASK)
@@ -1828,7 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1828 unsigned pt_access, unsigned pte_access, 1879 unsigned pt_access, unsigned pte_access,
1829 int user_fault, int write_fault, int dirty, 1880 int user_fault, int write_fault, int dirty,
1830 int *ptwrite, int level, gfn_t gfn, 1881 int *ptwrite, int level, gfn_t gfn,
1831 pfn_t pfn, bool speculative) 1882 pfn_t pfn, bool speculative,
1883 bool reset_host_protection)
1832{ 1884{
1833 int was_rmapped = 0; 1885 int was_rmapped = 0;
1834 int was_writeble = is_writeble_pte(*sptep); 1886 int was_writeble = is_writeble_pte(*sptep);
@@ -1860,7 +1912,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1860 } 1912 }
1861 1913
1862 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 1914 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
1863 dirty, level, gfn, pfn, speculative, true)) { 1915 dirty, level, gfn, pfn, speculative, true,
1916 reset_host_protection)) {
1864 if (write_fault) 1917 if (write_fault)
1865 *ptwrite = 1; 1918 *ptwrite = 1;
1866 kvm_x86_ops->tlb_flush(vcpu); 1919 kvm_x86_ops->tlb_flush(vcpu);
@@ -1877,8 +1930,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1877 page_header_update_slot(vcpu->kvm, sptep, gfn); 1930 page_header_update_slot(vcpu->kvm, sptep, gfn);
1878 if (!was_rmapped) { 1931 if (!was_rmapped) {
1879 rmap_count = rmap_add(vcpu, sptep, gfn); 1932 rmap_count = rmap_add(vcpu, sptep, gfn);
1880 if (!is_rmap_spte(*sptep)) 1933 kvm_release_pfn_clean(pfn);
1881 kvm_release_pfn_clean(pfn);
1882 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1934 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1883 rmap_recycle(vcpu, sptep, gfn); 1935 rmap_recycle(vcpu, sptep, gfn);
1884 } else { 1936 } else {
@@ -1909,7 +1961,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1909 if (iterator.level == level) { 1961 if (iterator.level == level) {
1910 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 1962 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1911 0, write, 1, &pt_write, 1963 0, write, 1, &pt_write,
1912 level, gfn, pfn, false); 1964 level, gfn, pfn, false, true);
1913 ++vcpu->stat.pf_fixed; 1965 ++vcpu->stat.pf_fixed;
1914 break; 1966 break;
1915 } 1967 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2fec9c12d22..72558f8ff3f5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) 273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
274 return; 274 return;
275 kvm_get_pfn(pfn); 275 kvm_get_pfn(pfn);
276 /*
277 * we call mmu_set_spte() with reset_host_protection = true beacuse that
278 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
279 */
276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 280 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, 281 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
278 gpte_to_gfn(gpte), pfn, true); 282 gpte_to_gfn(gpte), pfn, true, true);
279} 283}
280 284
281/* 285/*
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
308 user_fault, write_fault, 312 user_fault, write_fault,
309 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 313 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
310 ptwrite, level, 314 ptwrite, level,
311 gw->gfn, pfn, false); 315 gw->gfn, pfn, false, true);
312 break; 316 break;
313 } 317 }
314 318
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
558static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 562static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
559{ 563{
560 int i, offset, nr_present; 564 int i, offset, nr_present;
565 bool reset_host_protection;
561 566
562 offset = nr_present = 0; 567 offset = nr_present = 0;
563 568
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
595 600
596 nr_present++; 601 nr_present++;
597 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 602 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
603 if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
604 pte_access &= ~ACC_WRITE_MASK;
605 reset_host_protection = 0;
606 } else {
607 reset_host_protection = 1;
608 }
598 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 609 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
599 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, 610 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
600 spte_to_pfn(sp->spt[i]), true, false); 611 spte_to_pfn(sp->spt[i]), true, false,
612 reset_host_protection);
601 } 613 }
602 614
603 return !nr_present; 615 return !nr_present;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 944cc9c04b3c..c17404add91f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
767 rdtscll(tsc_this); 767 rdtscll(tsc_this);
768 delta = vcpu->arch.host_tsc - tsc_this; 768 delta = vcpu->arch.host_tsc - tsc_this;
769 svm->vmcb->control.tsc_offset += delta; 769 svm->vmcb->control.tsc_offset += delta;
770 if (is_nested(svm))
771 svm->nested.hsave->control.tsc_offset += delta;
770 vcpu->cpu = cpu; 772 vcpu->cpu = cpu;
771 kvm_migrate_timers(vcpu); 773 kvm_migrate_timers(vcpu);
772 svm->asid_generation = 0; 774 svm->asid_generation = 0;
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2057 2059
2058 switch (ecx) { 2060 switch (ecx) {
2059 case MSR_IA32_TSC: { 2061 case MSR_IA32_TSC: {
2060 u64 tsc; 2062 u64 tsc_offset;
2063
2064 if (is_nested(svm))
2065 tsc_offset = svm->nested.hsave->control.tsc_offset;
2066 else
2067 tsc_offset = svm->vmcb->control.tsc_offset;
2061 2068
2062 rdtscll(tsc); 2069 *data = tsc_offset + native_read_tsc();
2063 *data = svm->vmcb->control.tsc_offset + tsc;
2064 break; 2070 break;
2065 } 2071 }
2066 case MSR_K6_STAR: 2072 case MSR_K6_STAR:
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2146 2152
2147 switch (ecx) { 2153 switch (ecx) {
2148 case MSR_IA32_TSC: { 2154 case MSR_IA32_TSC: {
2149 u64 tsc; 2155 u64 tsc_offset = data - native_read_tsc();
2156 u64 g_tsc_offset = 0;
2157
2158 if (is_nested(svm)) {
2159 g_tsc_offset = svm->vmcb->control.tsc_offset -
2160 svm->nested.hsave->control.tsc_offset;
2161 svm->nested.hsave->control.tsc_offset = tsc_offset;
2162 }
2163
2164 svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
2150 2165
2151 rdtscll(tsc);
2152 svm->vmcb->control.tsc_offset = data - tsc;
2153 break; 2166 break;
2154 } 2167 }
2155 case MSR_K6_STAR: 2168 case MSR_K6_STAR:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3812014bd0b..ed53b42caba1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
709 if (vcpu->cpu != cpu) { 709 if (vcpu->cpu != cpu) {
710 vcpu_clear(vmx); 710 vcpu_clear(vmx);
711 kvm_migrate_timers(vcpu); 711 kvm_migrate_timers(vcpu);
712 vpid_sync_vcpu_all(vmx); 712 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
713 local_irq_disable(); 713 local_irq_disable();
714 list_add(&vmx->local_vcpus_link, 714 list_add(&vmx->local_vcpus_link,
715 &per_cpu(vcpus_on_cpu, cpu)); 715 &per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be451ee44249..ae07d261527c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1591 1591
1592 if (cpuid->nent < 1) 1592 if (cpuid->nent < 1)
1593 goto out; 1593 goto out;
1594 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1595 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
1594 r = -ENOMEM; 1596 r = -ENOMEM;
1595 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 1597 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
1596 if (!cpuid_entries) 1598 if (!cpuid_entries)
@@ -1690,7 +1692,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
1690 unsigned bank_num = mcg_cap & 0xff, bank; 1692 unsigned bank_num = mcg_cap & 0xff, bank;
1691 1693
1692 r = -EINVAL; 1694 r = -EINVAL;
1693 if (!bank_num) 1695 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
1694 goto out; 1696 goto out;
1695 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) 1697 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
1696 goto out; 1698 goto out;
@@ -4049,7 +4051,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4049 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4051 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
4050} 4052}
4051 4053
4052static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, 4054static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
4053 struct desc_struct *seg_desc) 4055 struct desc_struct *seg_desc)
4054{ 4056{
4055 u32 base_addr = get_desc_base(seg_desc); 4057 u32 base_addr = get_desc_base(seg_desc);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 9e609206fac9..85f5db95c60f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,9 @@ ifeq ($(CONFIG_X86_32),y)
16 lib-y += checksum_32.o 16 lib-y += checksum_32.o
17 lib-y += strstr_32.o 17 lib-y += strstr_32.o
18 lib-y += semaphore_32.o string_32.o 18 lib-y += semaphore_32.o string_32.o
19 19ifneq ($(CONFIG_X86_CMPXCHG64),y)
20 lib-y += cmpxchg8b_emu.o
21endif
20 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 22 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
21else 23else
22 obj-y += io_64.o iomap_copy_64.o 24 obj-y += io_64.o iomap_copy_64.o
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
new file mode 100644
index 000000000000..828cb710dec2
--- /dev/null
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -0,0 +1,57 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; version 2
5 * of the License.
6 *
7 */
8
9#include <linux/linkage.h>
10#include <asm/alternative-asm.h>
11#include <asm/frame.h>
12#include <asm/dwarf2.h>
13
14
15.text
16
17/*
18 * Inputs:
19 * %esi : memory location to compare
20 * %eax : low 32 bits of old value
21 * %edx : high 32 bits of old value
22 * %ebx : low 32 bits of new value
23 * %ecx : high 32 bits of new value
24 */
25ENTRY(cmpxchg8b_emu)
26CFI_STARTPROC
27
28#
29# Emulate 'cmpxchg8b (%esi)' on UP except we don't
30# set the whole ZF thing (caller will just compare
31# eax:edx with the expected value)
32#
33cmpxchg8b_emu:
34 pushfl
35 cli
36
37 cmpl (%esi), %eax
38 jne not_same
39 cmpl 4(%esi), %edx
40 jne half_same
41
42 movl %ebx, (%esi)
43 movl %ecx, 4(%esi)
44
45 popfl
46 ret
47
48 not_same:
49 movl (%esi), %eax
50 half_same:
51 movl 4(%esi), %edx
52
53 popfl
54 ret
55
56CFI_ENDPROC
57ENDPROC(cmpxchg8b_emu)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9b5a9f59a478..06630d26e56d 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,9 +1,10 @@
1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o physaddr.o gup.o 2 pat.o pgtable.o physaddr.o gup.o setup_nx.o
3 3
4# Make sure __phys_addr has no stackprotector 4# Make sure __phys_addr has no stackprotector
5nostackp := $(call cc-option, -fno-stack-protector) 5nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_physaddr.o := $(nostackp) 6CFLAGS_physaddr.o := $(nostackp)
7CFLAGS_setup_nx.o := $(nostackp)
7 8
8obj-$(CONFIG_SMP) += tlb.o 9obj-$(CONFIG_SMP) += tlb.o
9 10
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 0607119cef94..73ffd5536f62 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,69 +28,6 @@ int direct_gbpages
28#endif 28#endif
29; 29;
30 30
31int nx_enabled;
32
33#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
34static int disable_nx __cpuinitdata;
35
36/*
37 * noexec = on|off
38 *
39 * Control non-executable mappings for processes.
40 *
41 * on Enable
42 * off Disable
43 */
44static int __init noexec_setup(char *str)
45{
46 if (!str)
47 return -EINVAL;
48 if (!strncmp(str, "on", 2)) {
49 __supported_pte_mask |= _PAGE_NX;
50 disable_nx = 0;
51 } else if (!strncmp(str, "off", 3)) {
52 disable_nx = 1;
53 __supported_pte_mask &= ~_PAGE_NX;
54 }
55 return 0;
56}
57early_param("noexec", noexec_setup);
58#endif
59
60#ifdef CONFIG_X86_PAE
61static void __init set_nx(void)
62{
63 unsigned int v[4], l, h;
64
65 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
66 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
67
68 if ((v[3] & (1 << 20)) && !disable_nx) {
69 rdmsr(MSR_EFER, l, h);
70 l |= EFER_NX;
71 wrmsr(MSR_EFER, l, h);
72 nx_enabled = 1;
73 __supported_pte_mask |= _PAGE_NX;
74 }
75 }
76}
77#else
78static inline void set_nx(void)
79{
80}
81#endif
82
83#ifdef CONFIG_X86_64
84void __cpuinit check_efer(void)
85{
86 unsigned long efer;
87
88 rdmsrl(MSR_EFER, efer);
89 if (!(efer & EFER_NX) || disable_nx)
90 __supported_pte_mask &= ~_PAGE_NX;
91}
92#endif
93
94static void __init find_early_table_space(unsigned long end, int use_pse, 31static void __init find_early_table_space(unsigned long end, int use_pse,
95 int use_gbpages) 32 int use_gbpages)
96{ 33{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 334e63ca7b2b..2feb9bdedaaf 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -170,8 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
170 (unsigned long long)phys_addr, 170 (unsigned long long)phys_addr,
171 (unsigned long long)(phys_addr + size), 171 (unsigned long long)(phys_addr + size),
172 prot_val, new_prot_val); 172 prot_val, new_prot_val);
173 free_memtype(phys_addr, phys_addr + size); 173 goto err_free_memtype;
174 return NULL;
175 } 174 }
176 prot_val = new_prot_val; 175 prot_val = new_prot_val;
177 } 176 }
@@ -197,26 +196,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
197 */ 196 */
198 area = get_vm_area_caller(size, VM_IOREMAP, caller); 197 area = get_vm_area_caller(size, VM_IOREMAP, caller);
199 if (!area) 198 if (!area)
200 return NULL; 199 goto err_free_memtype;
201 area->phys_addr = phys_addr; 200 area->phys_addr = phys_addr;
202 vaddr = (unsigned long) area->addr; 201 vaddr = (unsigned long) area->addr;
203 202
204 if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { 203 if (kernel_map_sync_memtype(phys_addr, size, prot_val))
205 free_memtype(phys_addr, phys_addr + size); 204 goto err_free_area;
206 free_vm_area(area);
207 return NULL;
208 }
209 205
210 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { 206 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
211 free_memtype(phys_addr, phys_addr + size); 207 goto err_free_area;
212 free_vm_area(area);
213 return NULL;
214 }
215 208
216 ret_addr = (void __iomem *) (vaddr + offset); 209 ret_addr = (void __iomem *) (vaddr + offset);
217 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); 210 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
218 211
219 return ret_addr; 212 return ret_addr;
213err_free_area:
214 free_vm_area(area);
215err_free_memtype:
216 free_memtype(phys_addr, phys_addr + size);
217 return NULL;
220} 218}
221 219
222/** 220/**
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7257cf3decf9..e78cd0ec2bcf 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -81,6 +81,7 @@ enum {
81void pat_init(void) 81void pat_init(void)
82{ 82{
83 u64 pat; 83 u64 pat;
84 bool boot_cpu = !boot_pat_state;
84 85
85 if (!pat_enabled) 86 if (!pat_enabled)
86 return; 87 return;
@@ -122,8 +123,10 @@ void pat_init(void)
122 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); 123 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
123 124
124 wrmsrl(MSR_IA32_CR_PAT, pat); 125 wrmsrl(MSR_IA32_CR_PAT, pat);
125 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", 126
126 smp_processor_id(), boot_pat_state, pat); 127 if (boot_cpu)
128 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
129 smp_processor_id(), boot_pat_state, pat);
127} 130}
128 131
129#undef PAT 132#undef PAT
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
new file mode 100644
index 000000000000..513d8ed5d2ec
--- /dev/null
+++ b/arch/x86/mm/setup_nx.c
@@ -0,0 +1,69 @@
1#include <linux/spinlock.h>
2#include <linux/errno.h>
3#include <linux/init.h>
4
5#include <asm/pgtable.h>
6
7int nx_enabled;
8
9#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
10static int disable_nx __cpuinitdata;
11
12/*
13 * noexec = on|off
14 *
15 * Control non-executable mappings for processes.
16 *
17 * on Enable
18 * off Disable
19 */
20static int __init noexec_setup(char *str)
21{
22 if (!str)
23 return -EINVAL;
24 if (!strncmp(str, "on", 2)) {
25 __supported_pte_mask |= _PAGE_NX;
26 disable_nx = 0;
27 } else if (!strncmp(str, "off", 3)) {
28 disable_nx = 1;
29 __supported_pte_mask &= ~_PAGE_NX;
30 }
31 return 0;
32}
33early_param("noexec", noexec_setup);
34#endif
35
36#ifdef CONFIG_X86_PAE
37void __init set_nx(void)
38{
39 unsigned int v[4], l, h;
40
41 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
42 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
43
44 if ((v[3] & (1 << 20)) && !disable_nx) {
45 rdmsr(MSR_EFER, l, h);
46 l |= EFER_NX;
47 wrmsr(MSR_EFER, l, h);
48 nx_enabled = 1;
49 __supported_pte_mask |= _PAGE_NX;
50 }
51 }
52}
53#else
54void set_nx(void)
55{
56}
57#endif
58
59#ifdef CONFIG_X86_64
60void __cpuinit check_efer(void)
61{
62 unsigned long efer;
63
64 rdmsrl(MSR_EFER, efer);
65 if (!(efer & EFER_NX) || disable_nx)
66 __supported_pte_mask &= ~_PAGE_NX;
67}
68#endif
69
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 52e62e57fedd..b22d13b0c71d 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev)
266 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); 266 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
267} 267}
268 268
269static struct vm_operations_struct pci_mmap_ops = { 269static const struct vm_operations_struct pci_mmap_ops = {
270 .access = generic_access_phys, 270 .access = generic_access_phys,
271}; 271};
272 272
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index b53225d2cac3..e133ce25e290 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file)
100 return 0; 100 return 0;
101} 101}
102 102
103static struct file_operations u32_array_fops = { 103static const struct file_operations u32_array_fops = {
104 .owner = THIS_MODULE, 104 .owner = THIS_MODULE,
105 .open = u32_array_open, 105 .open = u32_array_open,
106 .release= xen_array_release, 106 .release= xen_array_release,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 544eb7496531..dfbf70e65860 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -178,6 +178,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
178static void xen_cpuid(unsigned int *ax, unsigned int *bx, 178static void xen_cpuid(unsigned int *ax, unsigned int *bx,
179 unsigned int *cx, unsigned int *dx) 179 unsigned int *cx, unsigned int *dx)
180{ 180{
181 unsigned maskebx = ~0;
181 unsigned maskecx = ~0; 182 unsigned maskecx = ~0;
182 unsigned maskedx = ~0; 183 unsigned maskedx = ~0;
183 184
@@ -185,9 +186,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
185 * Mask out inconvenient features, to try and disable as many 186 * Mask out inconvenient features, to try and disable as many
186 * unsupported kernel subsystems as possible. 187 * unsupported kernel subsystems as possible.
187 */ 188 */
188 if (*ax == 1) { 189 switch (*ax) {
190 case 1:
189 maskecx = cpuid_leaf1_ecx_mask; 191 maskecx = cpuid_leaf1_ecx_mask;
190 maskedx = cpuid_leaf1_edx_mask; 192 maskedx = cpuid_leaf1_edx_mask;
193 break;
194
195 case 0xb:
196 /* Suppress extended topology stuff */
197 maskebx = 0;
198 break;
191 } 199 }
192 200
193 asm(XEN_EMULATE_PREFIX "cpuid" 201 asm(XEN_EMULATE_PREFIX "cpuid"
@@ -197,6 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
197 "=d" (*dx) 205 "=d" (*dx)
198 : "0" (*ax), "2" (*cx)); 206 : "0" (*ax), "2" (*cx));
199 207
208 *bx &= maskebx;
200 *cx &= maskecx; 209 *cx &= maskecx;
201 *dx &= maskedx; 210 *dx &= maskedx;
202} 211}
@@ -1075,6 +1084,8 @@ asmlinkage void __init xen_start_kernel(void)
1075 * Set up some pagetable state before starting to set any ptes. 1084 * Set up some pagetable state before starting to set any ptes.
1076 */ 1085 */
1077 1086
1087 xen_init_mmu_ops();
1088
1078 /* Prevent unwanted bits from being set in PTEs. */ 1089 /* Prevent unwanted bits from being set in PTEs. */
1079 __supported_pte_mask &= ~_PAGE_GLOBAL; 1090 __supported_pte_mask &= ~_PAGE_GLOBAL;
1080 if (!xen_initial_domain()) 1091 if (!xen_initial_domain())
@@ -1082,6 +1093,11 @@ asmlinkage void __init xen_start_kernel(void)
1082 1093
1083 __supported_pte_mask |= _PAGE_IOMAP; 1094 __supported_pte_mask |= _PAGE_IOMAP;
1084 1095
1096#ifdef CONFIG_X86_64
1097 /* Work out if we support NX */
1098 check_efer();
1099#endif
1100
1085 xen_setup_features(); 1101 xen_setup_features();
1086 1102
1087 /* Get mfn list */ 1103 /* Get mfn list */
@@ -1094,7 +1110,6 @@ asmlinkage void __init xen_start_kernel(void)
1094 */ 1110 */
1095 xen_setup_stackprotector(); 1111 xen_setup_stackprotector();
1096 1112
1097 xen_init_mmu_ops();
1098 xen_init_irq_ops(); 1113 xen_init_irq_ops();
1099 xen_init_cpuid_mask(); 1114 xen_init_cpuid_mask();
1100 1115
@@ -1123,11 +1138,6 @@ asmlinkage void __init xen_start_kernel(void)
1123 1138
1124 pgd = (pgd_t *)xen_start_info->pt_base; 1139 pgd = (pgd_t *)xen_start_info->pt_base;
1125 1140
1126#ifdef CONFIG_X86_64
1127 /* Work out if we support NX */
1128 check_efer();
1129#endif
1130
1131 /* Don't do the full vcpu_info placement stuff until we have a 1141 /* Don't do the full vcpu_info placement stuff until we have a
1132 possible map and a non-dummy shared_info. */ 1142 possible map and a non-dummy shared_info. */
1133 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1143 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];