aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/boot/setup.ld3
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c10
-rw-r--r--arch/x86/ia32/ia32entry.S36
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/paravirt.h28
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/topology.h1
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h23
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c61
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c17
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/early_printk.c5
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c2
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/trampoline.c12
-rw-r--r--arch/x86/kernel/trampoline_64.S4
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c86
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/Makefile6
-rw-r--r--arch/x86/xen/enlighten.c3
38 files changed, 289 insertions, 135 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8da93745c087..07e01149e3bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT
86config HAVE_LATENCYTOP_SUPPORT 86config HAVE_LATENCYTOP_SUPPORT
87 def_bool y 87 def_bool y
88 88
89config FAST_CMPXCHG_LOCAL
90 bool
91 default y
92
93config MMU 89config MMU
94 def_bool y 90 def_bool y
95 91
@@ -495,7 +491,7 @@ if PARAVIRT_GUEST
495source "arch/x86/xen/Kconfig" 491source "arch/x86/xen/Kconfig"
496 492
497config VMI 493config VMI
498 bool "VMI Guest support" 494 bool "VMI Guest support (DEPRECATED)"
499 select PARAVIRT 495 select PARAVIRT
500 depends on X86_32 496 depends on X86_32
501 ---help--- 497 ---help---
@@ -504,6 +500,15 @@ config VMI
504 at the moment), by linking the kernel to a GPL-ed ROM module 500 at the moment), by linking the kernel to a GPL-ed ROM module
505 provided by the hypervisor. 501 provided by the hypervisor.
506 502
503 As of September 2009, VMware has started a phased retirement
504 of this feature from VMware's products. Please see
505 feature-removal-schedule.txt for details. If you are
506 planning to enable this option, please note that you cannot
507 live migrate a VMI enabled VM to a future VMware product,
508 which doesn't support VMI. So if you expect your kernel to
509 seamlessly migrate to newer VMware products, keep this
510 disabled.
511
507config KVM_CLOCK 512config KVM_CLOCK
508 bool "KVM paravirtualized clock" 513 bool "KVM paravirtualized clock"
509 select PARAVIRT 514 select PARAVIRT
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 527519b8a9f9..f2824fb8c79c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -400,7 +400,7 @@ config X86_TSC
400 400
401config X86_CMPXCHG64 401config X86_CMPXCHG64
402 def_bool y 402 def_bool y
403 depends on X86_PAE || X86_64 403 depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
404 404
405# this should be set for all -march=.. options where the compiler 405# this should be set for all -march=.. options where the compiler
406# generates cmov. 406# generates cmov.
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY
412 int 412 int
413 default "64" if X86_64 413 default "64" if X86_64
414 default "6" if X86_32 && X86_P6_NOP 414 default "6" if X86_32 && X86_P6_NOP
415 default "5" if X86_32 && X86_CMPXCHG64
415 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) 416 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
416 default "3" 417 default "3"
417 418
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 0f6ec455a2b1..03c0683636b6 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -53,6 +53,9 @@ SECTIONS
53 53
54 /DISCARD/ : { *(.note*) } 54 /DISCARD/ : { *(.note*) }
55 55
56 /*
57 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
58 */
56 . = ASSERT(_end <= 0x8000, "Setup too big!"); 59 . = ASSERT(_end <= 0x8000, "Setup too big!");
57 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); 60 . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
58 /* Necessary for the very-old-loader check to work... */ 61 /* Necessary for the very-old-loader check to work... */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 585edebe12cf..49c552c060e9 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
82 return -EINVAL; 82 return -EINVAL;
83 } 83 }
84 84
85 if (irq_fpu_usable()) 85 if (!irq_fpu_usable())
86 err = crypto_aes_expand_key(ctx, in_key, key_len); 86 err = crypto_aes_expand_key(ctx, in_key, key_len);
87 else { 87 else {
88 kernel_fpu_begin(); 88 kernel_fpu_begin();
@@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
103{ 103{
104 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 104 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
105 105
106 if (irq_fpu_usable()) 106 if (!irq_fpu_usable())
107 crypto_aes_encrypt_x86(ctx, dst, src); 107 crypto_aes_encrypt_x86(ctx, dst, src);
108 else { 108 else {
109 kernel_fpu_begin(); 109 kernel_fpu_begin();
@@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
116{ 116{
117 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 117 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
118 118
119 if (irq_fpu_usable()) 119 if (!irq_fpu_usable())
120 crypto_aes_decrypt_x86(ctx, dst, src); 120 crypto_aes_decrypt_x86(ctx, dst, src);
121 else { 121 else {
122 kernel_fpu_begin(); 122 kernel_fpu_begin();
@@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req)
342 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 342 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
343 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 343 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
344 344
345 if (irq_fpu_usable()) { 345 if (!irq_fpu_usable()) {
346 struct ablkcipher_request *cryptd_req = 346 struct ablkcipher_request *cryptd_req =
347 ablkcipher_request_ctx(req); 347 ablkcipher_request_ctx(req);
348 memcpy(cryptd_req, req, sizeof(*req)); 348 memcpy(cryptd_req, req, sizeof(*req));
@@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req)
363 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); 363 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
364 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); 364 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
365 365
366 if (irq_fpu_usable()) { 366 if (!irq_fpu_usable()) {
367 struct ablkcipher_request *cryptd_req = 367 struct ablkcipher_request *cryptd_req =
368 ablkcipher_request_ctx(req); 368 ablkcipher_request_ctx(req);
369 memcpy(cryptd_req, req, sizeof(*req)); 369 memcpy(cryptd_req, req, sizeof(*req));
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 74619c4f9fda..1733f9f65e82 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -21,8 +21,8 @@
21#define __AUDIT_ARCH_LE 0x40000000 21#define __AUDIT_ARCH_LE 0x40000000
22 22
23#ifndef CONFIG_AUDITSYSCALL 23#ifndef CONFIG_AUDITSYSCALL
24#define sysexit_audit int_ret_from_sys_call 24#define sysexit_audit ia32_ret_from_sys_call
25#define sysretl_audit int_ret_from_sys_call 25#define sysretl_audit ia32_ret_from_sys_call
26#endif 26#endif
27 27
28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -39,12 +39,12 @@
39 .endm 39 .endm
40 40
41 /* clobbers %eax */ 41 /* clobbers %eax */
42 .macro CLEAR_RREGS _r9=rax 42 .macro CLEAR_RREGS offset=0, _r9=rax
43 xorl %eax,%eax 43 xorl %eax,%eax
44 movq %rax,R11(%rsp) 44 movq %rax,\offset+R11(%rsp)
45 movq %rax,R10(%rsp) 45 movq %rax,\offset+R10(%rsp)
46 movq %\_r9,R9(%rsp) 46 movq %\_r9,\offset+R9(%rsp)
47 movq %rax,R8(%rsp) 47 movq %rax,\offset+R8(%rsp)
48 .endm 48 .endm
49 49
50 /* 50 /*
@@ -172,6 +172,10 @@ sysexit_from_sys_call:
172 movl RIP-R11(%rsp),%edx /* User %eip */ 172 movl RIP-R11(%rsp),%edx /* User %eip */
173 CFI_REGISTER rip,rdx 173 CFI_REGISTER rip,rdx
174 RESTORE_ARGS 1,24,1,1,1,1 174 RESTORE_ARGS 1,24,1,1,1,1
175 xorq %r8,%r8
176 xorq %r9,%r9
177 xorq %r10,%r10
178 xorq %r11,%r11
175 popfq 179 popfq
176 CFI_ADJUST_CFA_OFFSET -8 180 CFI_ADJUST_CFA_OFFSET -8
177 /*CFI_RESTORE rflags*/ 181 /*CFI_RESTORE rflags*/
@@ -202,7 +206,7 @@ sysexit_from_sys_call:
202 206
203 .macro auditsys_exit exit,ebpsave=RBP 207 .macro auditsys_exit exit,ebpsave=RBP
204 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) 208 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
205 jnz int_ret_from_sys_call 209 jnz ia32_ret_from_sys_call
206 TRACE_IRQS_ON 210 TRACE_IRQS_ON
207 sti 211 sti
208 movl %eax,%esi /* second arg, syscall return value */ 212 movl %eax,%esi /* second arg, syscall return value */
@@ -218,8 +222,9 @@ sysexit_from_sys_call:
218 cli 222 cli
219 TRACE_IRQS_OFF 223 TRACE_IRQS_OFF
220 testl %edi,TI_flags(%r10) 224 testl %edi,TI_flags(%r10)
221 jnz int_with_check 225 jz \exit
222 jmp \exit 226 CLEAR_RREGS -ARGOFFSET
227 jmp int_with_check
223 .endm 228 .endm
224 229
225sysenter_auditsys: 230sysenter_auditsys:
@@ -329,6 +334,9 @@ sysretl_from_sys_call:
329 CFI_REGISTER rip,rcx 334 CFI_REGISTER rip,rcx
330 movl EFLAGS-ARGOFFSET(%rsp),%r11d 335 movl EFLAGS-ARGOFFSET(%rsp),%r11d
331 /*CFI_REGISTER rflags,r11*/ 336 /*CFI_REGISTER rflags,r11*/
337 xorq %r10,%r10
338 xorq %r9,%r9
339 xorq %r8,%r8
332 TRACE_IRQS_ON 340 TRACE_IRQS_ON
333 movl RSP-ARGOFFSET(%rsp),%esp 341 movl RSP-ARGOFFSET(%rsp),%esp
334 CFI_RESTORE rsp 342 CFI_RESTORE rsp
@@ -353,7 +361,7 @@ cstar_tracesys:
353#endif 361#endif
354 xchgl %r9d,%ebp 362 xchgl %r9d,%ebp
355 SAVE_REST 363 SAVE_REST
356 CLEAR_RREGS r9 364 CLEAR_RREGS 0, r9
357 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 365 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
358 movq %rsp,%rdi /* &pt_regs -> arg1 */ 366 movq %rsp,%rdi /* &pt_regs -> arg1 */
359 call syscall_trace_enter 367 call syscall_trace_enter
@@ -425,6 +433,8 @@ ia32_do_call:
425 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative 433 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
426ia32_sysret: 434ia32_sysret:
427 movq %rax,RAX-ARGOFFSET(%rsp) 435 movq %rax,RAX-ARGOFFSET(%rsp)
436ia32_ret_from_sys_call:
437 CLEAR_RREGS -ARGOFFSET
428 jmp int_ret_from_sys_call 438 jmp int_ret_from_sys_call
429 439
430ia32_tracesys: 440ia32_tracesys:
@@ -442,8 +452,8 @@ END(ia32_syscall)
442 452
443ia32_badsys: 453ia32_badsys:
444 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 454 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
445 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 455 movq $-ENOSYS,%rax
446 jmp int_ret_from_sys_call 456 jmp ia32_sysret
447 457
448quiet_ni_syscall: 458quiet_ni_syscall:
449 movq $-ENOSYS,%rax 459 movq $-ENOSYS,%rax
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be000435fad..d83892226f73 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
796#define KVM_ARCH_WANT_MMU_NOTIFIER 796#define KVM_ARCH_WANT_MMU_NOTIFIER
797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 797int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
798int kvm_age_hva(struct kvm *kvm, unsigned long hva); 798int kvm_age_hva(struct kvm *kvm, unsigned long hva);
799void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
799int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 800int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
800int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 801int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
801int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 802int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64c5814..f1363b72364f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
133static inline void enable_p5_mce(void) {} 133static inline void enable_p5_mce(void) {}
134#endif 134#endif
135 135
136extern void (*x86_mce_decode_callback)(struct mce *m);
137
136void mce_setup(struct mce *m); 138void mce_setup(struct mce *m);
137void mce_log(struct mce *m); 139void mce_log(struct mce *m);
138DECLARE_PER_CPU(struct sys_device, mce_dev); 140DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8aebcc41041d..efb38994859c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
840 840
841static inline unsigned long __raw_local_save_flags(void) 841static inline unsigned long __raw_local_save_flags(void)
842{ 842{
843 unsigned long f; 843 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
844
845 asm volatile(paravirt_alt(PARAVIRT_CALL)
846 : "=a"(f)
847 : paravirt_type(pv_irq_ops.save_fl),
848 paravirt_clobber(CLBR_EAX)
849 : "memory", "cc");
850 return f;
851} 844}
852 845
853static inline void raw_local_irq_restore(unsigned long f) 846static inline void raw_local_irq_restore(unsigned long f)
854{ 847{
855 asm volatile(paravirt_alt(PARAVIRT_CALL) 848 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
856 : "=a"(f)
857 : PV_FLAGS_ARG(f),
858 paravirt_type(pv_irq_ops.restore_fl),
859 paravirt_clobber(CLBR_EAX)
860 : "memory", "cc");
861} 849}
862 850
863static inline void raw_local_irq_disable(void) 851static inline void raw_local_irq_disable(void)
864{ 852{
865 asm volatile(paravirt_alt(PARAVIRT_CALL) 853 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
866 :
867 : paravirt_type(pv_irq_ops.irq_disable),
868 paravirt_clobber(CLBR_EAX)
869 : "memory", "eax", "cc");
870} 854}
871 855
872static inline void raw_local_irq_enable(void) 856static inline void raw_local_irq_enable(void)
873{ 857{
874 asm volatile(paravirt_alt(PARAVIRT_CALL) 858 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
875 :
876 : paravirt_type(pv_irq_ops.irq_enable),
877 paravirt_clobber(CLBR_EAX)
878 : "memory", "eax", "cc");
879} 859}
880 860
881static inline unsigned long __raw_local_irq_save(void) 861static inline unsigned long __raw_local_irq_save(void)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index dd0f5b32489d..9357473c8da0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void);
494#define EXTRA_CLOBBERS 494#define EXTRA_CLOBBERS
495#define VEXTRA_CLOBBERS 495#define VEXTRA_CLOBBERS
496#else /* CONFIG_X86_64 */ 496#else /* CONFIG_X86_64 */
497/* [re]ax isn't an arg, but the return val */
497#define PVOP_VCALL_ARGS \ 498#define PVOP_VCALL_ARGS \
498 unsigned long __edi = __edi, __esi = __esi, \ 499 unsigned long __edi = __edi, __esi = __esi, \
499 __edx = __edx, __ecx = __ecx 500 __edx = __edx, __ecx = __ecx, __eax = __eax
500#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax 501#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
501 502
502#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) 503#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
503#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) 504#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void);
509 "=c" (__ecx) 510 "=c" (__ecx)
510#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) 511#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
511 512
513/* void functions are still allowed [re]ax for scratch */
512#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) 514#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
513#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS 515#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
514 516
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void);
583 VEXTRA_CLOBBERS, \ 585 VEXTRA_CLOBBERS, \
584 pre, post, ##__VA_ARGS__) 586 pre, post, ##__VA_ARGS__)
585 587
586#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ 588#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
587 ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ 589 ____PVOP_VCALL(op.func, CLBR_RET_REG, \
588 PVOP_VCALLEE_CLOBBERS, , \ 590 PVOP_VCALLEE_CLOBBERS, , \
589 pre, post, ##__VA_ARGS__) 591 pre, post, ##__VA_ARGS__)
590 592
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 25a92842dd99..d823c245f63b 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -143,6 +143,7 @@ extern unsigned long node_remap_size[];
143 | 1*SD_BALANCE_FORK \ 143 | 1*SD_BALANCE_FORK \
144 | 0*SD_BALANCE_WAKE \ 144 | 0*SD_BALANCE_WAKE \
145 | 1*SD_WAKE_AFFINE \ 145 | 1*SD_WAKE_AFFINE \
146 | 1*SD_PREFER_LOCAL \
146 | 0*SD_SHARE_CPUPOWER \ 147 | 0*SD_SHARE_CPUPOWER \
147 | 0*SD_POWERSAVINGS_BALANCE \ 148 | 0*SD_POWERSAVINGS_BALANCE \
148 | 0*SD_SHARE_PKG_RESOURCES \ 149 | 0*SD_SHARE_PKG_RESOURCES \
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 04eb6c958b9d..d1414af98559 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -19,6 +19,8 @@
19#include <asm/types.h> 19#include <asm/types.h>
20#include <asm/percpu.h> 20#include <asm/percpu.h>
21#include <asm/uv/uv_mmrs.h> 21#include <asm/uv/uv_mmrs.h>
22#include <asm/irq_vectors.h>
23#include <asm/io_apic.h>
22 24
23 25
24/* 26/*
@@ -114,7 +116,7 @@
114/* 116/*
115 * The largest possible NASID of a C or M brick (+ 2) 117 * The largest possible NASID of a C or M brick (+ 2)
116 */ 118 */
117#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) 119#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
118 120
119struct uv_scir_s { 121struct uv_scir_s {
120 struct timer_list timer; 122 struct timer_list timer;
@@ -230,6 +232,20 @@ static inline unsigned long uv_gpa(void *v)
230 return uv_soc_phys_ram_to_gpa(__pa(v)); 232 return uv_soc_phys_ram_to_gpa(__pa(v));
231} 233}
232 234
235/* gnode -> pnode */
236static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
237{
238 return gpa >> uv_hub_info->m_val;
239}
240
241/* gpa -> pnode */
242static inline int uv_gpa_to_pnode(unsigned long gpa)
243{
244 unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1;
245
246 return uv_gpa_to_gnode(gpa) & n_mask;
247}
248
233/* pnode, offset --> socket virtual */ 249/* pnode, offset --> socket virtual */
234static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) 250static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
235{ 251{
@@ -421,9 +437,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
421static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) 437static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
422{ 438{
423 unsigned long val; 439 unsigned long val;
440 unsigned long dmode = dest_Fixed;
441
442 if (vector == NMI_VECTOR)
443 dmode = dest_NMI;
424 444
425 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 445 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
426 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | 446 ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
447 (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
427 (vector << UVH_IPI_INT_VECTOR_SHFT); 448 (vector << UVH_IPI_INT_VECTOR_SHFT);
428 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 449 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
429} 450}
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 7da00b799cda..060fff8f5c5b 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -57,5 +57,8 @@ SECTIONS
57 *(.note*) 57 *(.note*)
58 } 58 }
59 59
60 /*
61 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
62 */
60 . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); 63 . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
61} 64}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f5f5886a6b53..326c25477d3d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -352,14 +352,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
352 352
353 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { 353 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
354 alias.v = uv_read_local_mmr(redir_addrs[i].alias); 354 alias.v = uv_read_local_mmr(redir_addrs[i].alias);
355 if (alias.s.base == 0) { 355 if (alias.s.enable && alias.s.base == 0) {
356 *size = (1UL << alias.s.m_alias); 356 *size = (1UL << alias.s.m_alias);
357 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); 357 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
358 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; 358 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
359 return; 359 return;
360 } 360 }
361 } 361 }
362 BUG(); 362 *base = *size = 0;
363} 363}
364 364
365enum map_type {map_wb, map_uc}; 365enum map_type {map_wb, map_uc};
@@ -619,12 +619,12 @@ void __init uv_system_init(void)
619 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 619 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
620 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; 620 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
621 uv_cpu_hub_info(cpu)->m_val = m_val; 621 uv_cpu_hub_info(cpu)->m_val = m_val;
622 uv_cpu_hub_info(cpu)->n_val = m_val; 622 uv_cpu_hub_info(cpu)->n_val = n_val;
623 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 623 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
624 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 624 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
625 uv_cpu_hub_info(cpu)->pnode = pnode; 625 uv_cpu_hub_info(cpu)->pnode = pnode;
626 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; 626 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
627 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 627 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
628 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 628 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
629 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 629 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
630 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 630 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 183c3457d2f4..721a77ca8115 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
85static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
86static int cpu_missing; 86static int cpu_missing;
87 87
88static void default_decode_mce(struct mce *m)
89{
90 pr_emerg("No human readable MCE decoding support on this CPU type.\n");
91 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
92}
93
94/*
95 * CPU/chipset specific EDAC code can register a callback here to print
96 * MCE errors in a human-readable form:
97 */
98void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
99EXPORT_SYMBOL(x86_mce_decode_callback);
88 100
89/* MCA banks polled by the period polling timer for corrected events */ 101/* MCA banks polled by the period polling timer for corrected events */
90DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,46 +177,46 @@ void mce_log(struct mce *mce)
165 set_bit(0, &mce_need_notify); 177 set_bit(0, &mce_need_notify);
166} 178}
167 179
168void __weak decode_mce(struct mce *m)
169{
170 return;
171}
172
173static void print_mce(struct mce *m) 180static void print_mce(struct mce *m)
174{ 181{
175 printk(KERN_EMERG 182 pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
176 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
177 m->extcpu, m->mcgstatus, m->bank, m->status); 183 m->extcpu, m->mcgstatus, m->bank, m->status);
184
178 if (m->ip) { 185 if (m->ip) {
179 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", 186 pr_emerg("RIP%s %02x:<%016Lx> ",
180 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 187 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
181 m->cs, m->ip); 188 m->cs, m->ip);
189
182 if (m->cs == __KERNEL_CS) 190 if (m->cs == __KERNEL_CS)
183 print_symbol("{%s}", m->ip); 191 print_symbol("{%s}", m->ip);
184 printk(KERN_CONT "\n"); 192 pr_cont("\n");
185 } 193 }
186 printk(KERN_EMERG "TSC %llx ", m->tsc); 194
195 pr_emerg("TSC %llx ", m->tsc);
187 if (m->addr) 196 if (m->addr)
188 printk(KERN_CONT "ADDR %llx ", m->addr); 197 pr_cont("ADDR %llx ", m->addr);
189 if (m->misc) 198 if (m->misc)
190 printk(KERN_CONT "MISC %llx ", m->misc); 199 pr_cont("MISC %llx ", m->misc);
191 printk(KERN_CONT "\n");
192 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
193 m->cpuvendor, m->cpuid, m->time, m->socketid,
194 m->apicid);
195 200
196 decode_mce(m); 201 pr_cont("\n");
202 pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
203 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
204
205 /*
206 * Print out human-readable details about the MCE error,
207 * (if the CPU has an implementation for that):
208 */
209 x86_mce_decode_callback(m);
197} 210}
198 211
199static void print_mce_head(void) 212static void print_mce_head(void)
200{ 213{
201 printk(KERN_EMERG "\nHARDWARE ERROR\n"); 214 pr_emerg("\nHARDWARE ERROR\n");
202} 215}
203 216
204static void print_mce_tail(void) 217static void print_mce_tail(void)
205{ 218{
206 printk(KERN_EMERG "This is not a software problem!\n" 219 pr_emerg("This is not a software problem!\n");
207 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
208} 220}
209 221
210#define PANIC_TIMEOUT 5 /* 5 seconds */ 222#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced;
218static void wait_for_panic(void) 230static void wait_for_panic(void)
219{ 231{
220 long timeout = PANIC_TIMEOUT*USEC_PER_SEC; 232 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
233
221 preempt_disable(); 234 preempt_disable();
222 local_irq_enable(); 235 local_irq_enable();
223 while (timeout-- > 0) 236 while (timeout-- > 0)
@@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
285static int msr_to_offset(u32 msr) 298static int msr_to_offset(u32 msr)
286{ 299{
287 unsigned bank = __get_cpu_var(injectm.bank); 300 unsigned bank = __get_cpu_var(injectm.bank);
301
288 if (msr == rip_msr) 302 if (msr == rip_msr)
289 return offsetof(struct mce, ip); 303 return offsetof(struct mce, ip);
290 if (msr == MSR_IA32_MCx_STATUS(bank)) 304 if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -1200,7 +1214,8 @@ static int __cpuinit mce_cap_init(void)
1200 rdmsrl(MSR_IA32_MCG_CAP, cap); 1214 rdmsrl(MSR_IA32_MCG_CAP, cap);
1201 1215
1202 b = cap & MCG_BANKCNT_MASK; 1216 b = cap & MCG_BANKCNT_MASK;
1203 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); 1217 if (!banks)
1218 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
1204 1219
1205 if (b > MAX_NR_BANKS) { 1220 if (b > MAX_NR_BANKS) {
1206 printk(KERN_WARNING 1221 printk(KERN_WARNING
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 889f665fe93d..7c785634af2b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -8,6 +8,7 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/percpu.h> 10#include <linux/percpu.h>
11#include <linux/sched.h>
11#include <asm/apic.h> 12#include <asm/apic.h>
12#include <asm/processor.h> 13#include <asm/processor.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index f04e72527604..3c1b12d461d1 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
96 unsigned long long base, size; 96 unsigned long long base, size;
97 char *ptr; 97 char *ptr;
98 char line[LINE_SIZE]; 98 char line[LINE_SIZE];
99 int length;
99 size_t linelen; 100 size_t linelen;
100 101
101 if (!capable(CAP_SYS_ADMIN)) 102 if (!capable(CAP_SYS_ADMIN))
102 return -EPERM; 103 return -EPERM;
103 if (!len)
104 return -EINVAL;
105 104
106 memset(line, 0, LINE_SIZE); 105 memset(line, 0, LINE_SIZE);
107 if (len > LINE_SIZE) 106
108 len = LINE_SIZE; 107 length = len;
109 if (copy_from_user(line, buf, len - 1)) 108 length--;
109
110 if (length > LINE_SIZE - 1)
111 length = LINE_SIZE - 1;
112
113 if (length < 0)
114 return -EINVAL;
115
116 if (copy_from_user(line, buf, length))
110 return -EFAULT; 117 return -EFAULT;
111 118
112 linelen = strlen(line); 119 linelen = strlen(line);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 85419bb7d4ab..d17d482a04f4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1378 if (mb < 16) 1378 if (mb < 16)
1379 return 1024*1024; 1379 return 1024*1024;
1380 1380
1381 /* To 32MB for anything above that */ 1381 /* To 64MB for anything above that */
1382 return 32*1024*1024; 1382 return 64*1024*1024;
1383} 1383}
1384 1384
1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1) 1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 41fd965c80c6..b9c830c12b4a 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -206,8 +206,11 @@ static int __init setup_early_printk(char *buf)
206 206
207 while (*buf != '\0') { 207 while (*buf != '\0') {
208 if (!strncmp(buf, "serial", 6)) { 208 if (!strncmp(buf, "serial", 6)) {
209 early_serial_init(buf + 6); 209 buf += 6;
210 early_serial_init(buf);
210 early_console_register(&early_serial_console, keep); 211 early_console_register(&early_serial_console, keep);
212 if (!strncmp(buf, ",ttyS", 5))
213 buf += 5;
211 } 214 }
212 if (!strncmp(buf, "ttyS", 4)) { 215 if (!strncmp(buf, "ttyS", 4)) {
213 early_serial_init(buf + 4); 216 early_serial_init(buf + 4);
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 1736c5a725aa..9c3bd4a2050e 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -15,8 +15,10 @@ EXPORT_SYMBOL(mcount);
15 * the export, but dont use it from C code, it is used 15 * the export, but dont use it from C code, it is used
16 * by assembly code and is not using C calling convention! 16 * by assembly code and is not using C calling convention!
17 */ 17 */
18#ifndef CONFIG_X86_CMPXCHG64
18extern void cmpxchg8b_emu(void); 19extern void cmpxchg8b_emu(void);
19EXPORT_SYMBOL(cmpxchg8b_emu); 20EXPORT_SYMBOL(cmpxchg8b_emu);
21#endif
20 22
21/* Networking helper routines. */ 23/* Networking helper routines. */
22EXPORT_SYMBOL(csum_partial_copy_generic); 24EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 74656d1d4e30..04bbd5278568 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
63 for_each_online_cpu(j) 63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); 64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n"); 65 seq_printf(p, " Spurious interrupts\n");
66 seq_printf(p, "%*s: ", prec, "CNT"); 66 seq_printf(p, "%*s: ", prec, "PMI");
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance counter interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "PND");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 64b838eac18c..b2a71dca5642 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
35 35
36/* 36/*
37 * This variable becomes 1 if iommu=pt is passed on the kernel command line. 37 * This variable becomes 1 if iommu=pt is passed on the kernel command line.
38 * If this variable is 1, IOMMU implementations do no DMA ranslation for 38 * If this variable is 1, IOMMU implementations do no DMA translation for
39 * devices and allow every device to access to whole physical memory. This is 39 * devices and allow every device to access to whole physical memory. This is
40 * useful if a user want to use an IOMMU only for KVM device assignment to 40 * useful if a user want to use an IOMMU only for KVM device assignment to
41 * guests and not for driver dma translation. 41 * guests and not for driver dma translation.
@@ -311,7 +311,7 @@ void pci_iommu_shutdown(void)
311 amd_iommu_shutdown(); 311 amd_iommu_shutdown();
312} 312}
313/* Must execute after PCI subsystem */ 313/* Must execute after PCI subsystem */
314fs_initcall(pci_iommu_init); 314rootfs_initcall(pci_iommu_init);
315 315
316#ifdef CONFIG_PCI 316#ifdef CONFIG_PCI
317/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ 317/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 98a827ee9ed7..a7f1b64f86e0 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -16,6 +16,7 @@
16#include <linux/agp_backend.h> 16#include <linux/agp_backend.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/sched.h>
19#include <linux/string.h> 20#include <linux/string.h>
20#include <linux/spinlock.h> 21#include <linux/spinlock.h>
21#include <linux/pci.h> 22#include <linux/pci.h>
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 27349f92a6d7..a1a3cdda06e1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/efi.h> 5#include <linux/efi.h>
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/sched.h>
7#include <linux/tboot.h> 8#include <linux/tboot.h>
8#include <acpi/reboot.h> 9#include <acpi/reboot.h>
9#include <asm/io.h> 10#include <asm/io.h>
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dcb00d278512..be2573448ed9 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
38#ifdef CONFIG_FRAME_POINTER 38#ifdef CONFIG_FRAME_POINTER
39 return *(unsigned long *)(regs->bp + sizeof(long)); 39 return *(unsigned long *)(regs->bp + sizeof(long));
40#else 40#else
41 unsigned long *sp = (unsigned long *)regs->sp; 41 unsigned long *sp =
42 (unsigned long *)kernel_stack_pointer(regs);
42 /* 43 /*
43 * Return address is either directly at stack pointer 44 * Return address is either directly at stack pointer
44 * or above a saved flags. Eflags has bits 22-31 zero, 45 * or above a saved flags. Eflags has bits 22-31 zero,
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 503c1f2e8835..1740c85e24bb 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -23,8 +23,6 @@
23static struct bau_control **uv_bau_table_bases __read_mostly; 23static struct bau_control **uv_bau_table_bases __read_mostly;
24static int uv_bau_retry_limit __read_mostly; 24static int uv_bau_retry_limit __read_mostly;
25 25
26/* position of pnode (which is nasid>>1): */
27static int uv_nshift __read_mostly;
28/* base pnode in this partition */ 26/* base pnode in this partition */
29static int uv_partition_base_pnode __read_mostly; 27static int uv_partition_base_pnode __read_mostly;
30 28
@@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode)
723 BUG_ON(!adp); 721 BUG_ON(!adp);
724 722
725 pa = uv_gpa(adp); /* need the real nasid*/ 723 pa = uv_gpa(adp); /* need the real nasid*/
726 n = pa >> uv_nshift; 724 n = uv_gpa_to_pnode(pa);
727 m = pa & uv_mmask; 725 m = pa & uv_mmask;
728 726
729 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 727 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
@@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
778 * need the pnode of where the memory was really allocated 776 * need the pnode of where the memory was really allocated
779 */ 777 */
780 pa = uv_gpa(pqp); 778 pa = uv_gpa(pqp);
781 pn = pa >> uv_nshift; 779 pn = uv_gpa_to_pnode(pa);
782 uv_write_global_mmr64(pnode, 780 uv_write_global_mmr64(pnode,
783 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, 781 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
784 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | 782 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
@@ -843,8 +841,7 @@ static int __init uv_bau_init(void)
843 GFP_KERNEL, cpu_to_node(cur_cpu)); 841 GFP_KERNEL, cpu_to_node(cur_cpu));
844 842
845 uv_bau_retry_limit = 1; 843 uv_bau_retry_limit = 1;
846 uv_nshift = uv_hub_info->n_val; 844 uv_mmask = (1UL << uv_hub_info->m_val) - 1;
847 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
848 nblades = uv_num_possible_blades(); 845 nblades = uv_num_possible_blades();
849 846
850 uv_bau_table_bases = (struct bau_control **) 847 uv_bau_table_bases = (struct bau_control **)
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 699f7eeb896a..cd022121cab6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -3,8 +3,16 @@
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4#include <asm/e820.h> 4#include <asm/e820.h>
5 5
6#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
7#define __trampinit
8#define __trampinitdata
9#else
10#define __trampinit __cpuinit
11#define __trampinitdata __cpuinitdata
12#endif
13
6/* ready for x86_64 and x86 */ 14/* ready for x86_64 and x86 */
7unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); 15unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
8 16
9void __init reserve_trampoline_memory(void) 17void __init reserve_trampoline_memory(void)
10{ 18{
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void)
26 * bootstrap into the page concerned. The caller 34 * bootstrap into the page concerned. The caller
27 * has made sure it's suitably aligned. 35 * has made sure it's suitably aligned.
28 */ 36 */
29unsigned long __cpuinit setup_trampoline(void) 37unsigned long __trampinit setup_trampoline(void)
30{ 38{
31 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); 39 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
32 return virt_to_phys(trampoline_base); 40 return virt_to_phys(trampoline_base);
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 596d54c660a5..3af2dff58b21 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -32,8 +32,12 @@
32#include <asm/segment.h> 32#include <asm/segment.h>
33#include <asm/processor-flags.h> 33#include <asm/processor-flags.h>
34 34
35#ifdef CONFIG_ACPI_SLEEP
36.section .rodata, "a", @progbits
37#else
35/* We can free up the trampoline after bootup if cpu hotplug is not supported. */ 38/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
36__CPUINITRODATA 39__CPUINITRODATA
40#endif
37.code16 41.code16
38 42
39ENTRY(trampoline_data) 43ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 31e6f6cfe53e..d430e4c30193 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void)
648 648
649 pv_info.paravirt_enabled = 1; 649 pv_info.paravirt_enabled = 1;
650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; 650 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
651 pv_info.name = "vmi"; 651 pv_info.name = "vmi [deprecated]";
652 652
653 pv_init_ops.patch = vmi_patch; 653 pv_init_ops.patch = vmi_patch;
654 654
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 92929fb3f9fa..3c68fe2d46cf 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -305,6 +305,9 @@ SECTIONS
305 305
306 306
307#ifdef CONFIG_X86_32 307#ifdef CONFIG_X86_32
308/*
309 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
310 */
308. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), 311. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
309 "kernel image bigger than KERNEL_IMAGE_SIZE"); 312 "kernel image bigger than KERNEL_IMAGE_SIZE");
310#else 313#else
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523b4901..144e7f60b5e2 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
116 * itself with the initial count and continues counting 116 * itself with the initial count and continues counting
117 * from there. 117 * from there.
118 */ 118 */
119 remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); 119 remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
120 elapsed = ps->pit_timer.period - ktime_to_ns(remaining); 120 elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
121 elapsed = mod_64(elapsed, ps->pit_timer.period); 121 elapsed = mod_64(elapsed, ps->pit_timer.period);
122 122
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1ae5ceba7eb2..23c217692ea9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
521 if (apic_get_reg(apic, APIC_TMICT) == 0) 521 if (apic_get_reg(apic, APIC_TMICT) == 0)
522 return 0; 522 return 0;
523 523
524 remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); 524 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
525 if (ktime_to_ns(remaining) < 0) 525 if (ktime_to_ns(remaining) < 0)
526 remaining = ktime_set(0, 0); 526 remaining = ktime_set(0, 0);
527 527
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
664{ 664{
665 ktime_t now = apic->lapic_timer.timer.base->get_time(); 665 ktime_t now = apic->lapic_timer.timer.base->get_time();
666 666
667 apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * 667 apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
668 APIC_BUS_CYCLE_NS * apic->divide_count; 668 APIC_BUS_CYCLE_NS * apic->divide_count;
669 atomic_set(&apic->lapic_timer.pending, 0); 669 atomic_set(&apic->lapic_timer.pending, 0);
670 670
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca41ae9f453..818b92ad82cf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
156#define CREATE_TRACE_POINTS 156#define CREATE_TRACE_POINTS
157#include "mmutrace.h" 157#include "mmutrace.h"
158 158
159#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
160
159#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 161#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
160 162
161struct kvm_rmap_desc { 163struct kvm_rmap_desc {
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
634 if (*spte & shadow_accessed_mask) 636 if (*spte & shadow_accessed_mask)
635 kvm_set_pfn_accessed(pfn); 637 kvm_set_pfn_accessed(pfn);
636 if (is_writeble_pte(*spte)) 638 if (is_writeble_pte(*spte))
637 kvm_release_pfn_dirty(pfn); 639 kvm_set_pfn_dirty(pfn);
638 else
639 kvm_release_pfn_clean(pfn);
640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 640 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
641 if (!*rmapp) { 641 if (!*rmapp) {
642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); 642 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
748 return write_protected; 748 return write_protected;
749} 749}
750 750
751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 751static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
752 unsigned long data)
752{ 753{
753 u64 *spte; 754 u64 *spte;
754 int need_tlb_flush = 0; 755 int need_tlb_flush = 0;
@@ -763,8 +764,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
763 return need_tlb_flush; 764 return need_tlb_flush;
764} 765}
765 766
767static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
768 unsigned long data)
769{
770 int need_flush = 0;
771 u64 *spte, new_spte;
772 pte_t *ptep = (pte_t *)data;
773 pfn_t new_pfn;
774
775 WARN_ON(pte_huge(*ptep));
776 new_pfn = pte_pfn(*ptep);
777 spte = rmap_next(kvm, rmapp, NULL);
778 while (spte) {
779 BUG_ON(!is_shadow_present_pte(*spte));
780 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
781 need_flush = 1;
782 if (pte_write(*ptep)) {
783 rmap_remove(kvm, spte);
784 __set_spte(spte, shadow_trap_nonpresent_pte);
785 spte = rmap_next(kvm, rmapp, NULL);
786 } else {
787 new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
788 new_spte |= (u64)new_pfn << PAGE_SHIFT;
789
790 new_spte &= ~PT_WRITABLE_MASK;
791 new_spte &= ~SPTE_HOST_WRITEABLE;
792 if (is_writeble_pte(*spte))
793 kvm_set_pfn_dirty(spte_to_pfn(*spte));
794 __set_spte(spte, new_spte);
795 spte = rmap_next(kvm, rmapp, spte);
796 }
797 }
798 if (need_flush)
799 kvm_flush_remote_tlbs(kvm);
800
801 return 0;
802}
803
766static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 804static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
767 int (*handler)(struct kvm *kvm, unsigned long *rmapp)) 805 unsigned long data,
806 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
807 unsigned long data))
768{ 808{
769 int i, j; 809 int i, j;
770 int retval = 0; 810 int retval = 0;
@@ -786,13 +826,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
786 if (hva >= start && hva < end) { 826 if (hva >= start && hva < end) {
787 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 827 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
788 828
789 retval |= handler(kvm, &memslot->rmap[gfn_offset]); 829 retval |= handler(kvm, &memslot->rmap[gfn_offset],
830 data);
790 831
791 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 832 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
792 int idx = gfn_offset; 833 int idx = gfn_offset;
793 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 834 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
794 retval |= handler(kvm, 835 retval |= handler(kvm,
795 &memslot->lpage_info[j][idx].rmap_pde); 836 &memslot->lpage_info[j][idx].rmap_pde,
837 data);
796 } 838 }
797 } 839 }
798 } 840 }
@@ -802,10 +844,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
802 844
803int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 845int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
804{ 846{
805 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 847 return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
806} 848}
807 849
808static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) 850void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
851{
852 kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
853}
854
855static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
856 unsigned long data)
809{ 857{
810 u64 *spte; 858 u64 *spte;
811 int young = 0; 859 int young = 0;
@@ -841,13 +889,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
841 gfn = unalias_gfn(vcpu->kvm, gfn); 889 gfn = unalias_gfn(vcpu->kvm, gfn);
842 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); 890 rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
843 891
844 kvm_unmap_rmapp(vcpu->kvm, rmapp); 892 kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
845 kvm_flush_remote_tlbs(vcpu->kvm); 893 kvm_flush_remote_tlbs(vcpu->kvm);
846} 894}
847 895
848int kvm_age_hva(struct kvm *kvm, unsigned long hva) 896int kvm_age_hva(struct kvm *kvm, unsigned long hva)
849{ 897{
850 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 898 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
851} 899}
852 900
853#ifdef MMU_DEBUG 901#ifdef MMU_DEBUG
@@ -1756,7 +1804,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1756 unsigned pte_access, int user_fault, 1804 unsigned pte_access, int user_fault,
1757 int write_fault, int dirty, int level, 1805 int write_fault, int dirty, int level,
1758 gfn_t gfn, pfn_t pfn, bool speculative, 1806 gfn_t gfn, pfn_t pfn, bool speculative,
1759 bool can_unsync) 1807 bool can_unsync, bool reset_host_protection)
1760{ 1808{
1761 u64 spte; 1809 u64 spte;
1762 int ret = 0; 1810 int ret = 0;
@@ -1783,6 +1831,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1783 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, 1831 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
1784 kvm_is_mmio_pfn(pfn)); 1832 kvm_is_mmio_pfn(pfn));
1785 1833
1834 if (reset_host_protection)
1835 spte |= SPTE_HOST_WRITEABLE;
1836
1786 spte |= (u64)pfn << PAGE_SHIFT; 1837 spte |= (u64)pfn << PAGE_SHIFT;
1787 1838
1788 if ((pte_access & ACC_WRITE_MASK) 1839 if ((pte_access & ACC_WRITE_MASK)
@@ -1828,7 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1828 unsigned pt_access, unsigned pte_access, 1879 unsigned pt_access, unsigned pte_access,
1829 int user_fault, int write_fault, int dirty, 1880 int user_fault, int write_fault, int dirty,
1830 int *ptwrite, int level, gfn_t gfn, 1881 int *ptwrite, int level, gfn_t gfn,
1831 pfn_t pfn, bool speculative) 1882 pfn_t pfn, bool speculative,
1883 bool reset_host_protection)
1832{ 1884{
1833 int was_rmapped = 0; 1885 int was_rmapped = 0;
1834 int was_writeble = is_writeble_pte(*sptep); 1886 int was_writeble = is_writeble_pte(*sptep);
@@ -1860,7 +1912,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1860 } 1912 }
1861 1913
1862 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 1914 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
1863 dirty, level, gfn, pfn, speculative, true)) { 1915 dirty, level, gfn, pfn, speculative, true,
1916 reset_host_protection)) {
1864 if (write_fault) 1917 if (write_fault)
1865 *ptwrite = 1; 1918 *ptwrite = 1;
1866 kvm_x86_ops->tlb_flush(vcpu); 1919 kvm_x86_ops->tlb_flush(vcpu);
@@ -1877,8 +1930,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1877 page_header_update_slot(vcpu->kvm, sptep, gfn); 1930 page_header_update_slot(vcpu->kvm, sptep, gfn);
1878 if (!was_rmapped) { 1931 if (!was_rmapped) {
1879 rmap_count = rmap_add(vcpu, sptep, gfn); 1932 rmap_count = rmap_add(vcpu, sptep, gfn);
1880 if (!is_rmap_spte(*sptep)) 1933 kvm_release_pfn_clean(pfn);
1881 kvm_release_pfn_clean(pfn);
1882 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1934 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1883 rmap_recycle(vcpu, sptep, gfn); 1935 rmap_recycle(vcpu, sptep, gfn);
1884 } else { 1936 } else {
@@ -1909,7 +1961,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1909 if (iterator.level == level) { 1961 if (iterator.level == level) {
1910 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 1962 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1911 0, write, 1, &pt_write, 1963 0, write, 1, &pt_write,
1912 level, gfn, pfn, false); 1964 level, gfn, pfn, false, true);
1913 ++vcpu->stat.pf_fixed; 1965 ++vcpu->stat.pf_fixed;
1914 break; 1966 break;
1915 } 1967 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2fec9c12d22..72558f8ff3f5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) 273 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
274 return; 274 return;
275 kvm_get_pfn(pfn); 275 kvm_get_pfn(pfn);
276 /*
277 * we call mmu_set_spte() with reset_host_protection = true beacuse that
278 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
279 */
276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 280 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, 281 gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
278 gpte_to_gfn(gpte), pfn, true); 282 gpte_to_gfn(gpte), pfn, true, true);
279} 283}
280 284
281/* 285/*
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
308 user_fault, write_fault, 312 user_fault, write_fault,
309 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 313 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
310 ptwrite, level, 314 ptwrite, level,
311 gw->gfn, pfn, false); 315 gw->gfn, pfn, false, true);
312 break; 316 break;
313 } 317 }
314 318
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
558static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 562static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
559{ 563{
560 int i, offset, nr_present; 564 int i, offset, nr_present;
565 bool reset_host_protection;
561 566
562 offset = nr_present = 0; 567 offset = nr_present = 0;
563 568
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
595 600
596 nr_present++; 601 nr_present++;
597 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 602 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
603 if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
604 pte_access &= ~ACC_WRITE_MASK;
605 reset_host_protection = 0;
606 } else {
607 reset_host_protection = 1;
608 }
598 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 609 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
599 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, 610 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
600 spte_to_pfn(sp->spt[i]), true, false); 611 spte_to_pfn(sp->spt[i]), true, false,
612 reset_host_protection);
601 } 613 }
602 614
603 return !nr_present; 615 return !nr_present;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 944cc9c04b3c..c17404add91f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
767 rdtscll(tsc_this); 767 rdtscll(tsc_this);
768 delta = vcpu->arch.host_tsc - tsc_this; 768 delta = vcpu->arch.host_tsc - tsc_this;
769 svm->vmcb->control.tsc_offset += delta; 769 svm->vmcb->control.tsc_offset += delta;
770 if (is_nested(svm))
771 svm->nested.hsave->control.tsc_offset += delta;
770 vcpu->cpu = cpu; 772 vcpu->cpu = cpu;
771 kvm_migrate_timers(vcpu); 773 kvm_migrate_timers(vcpu);
772 svm->asid_generation = 0; 774 svm->asid_generation = 0;
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2057 2059
2058 switch (ecx) { 2060 switch (ecx) {
2059 case MSR_IA32_TSC: { 2061 case MSR_IA32_TSC: {
2060 u64 tsc; 2062 u64 tsc_offset;
2063
2064 if (is_nested(svm))
2065 tsc_offset = svm->nested.hsave->control.tsc_offset;
2066 else
2067 tsc_offset = svm->vmcb->control.tsc_offset;
2061 2068
2062 rdtscll(tsc); 2069 *data = tsc_offset + native_read_tsc();
2063 *data = svm->vmcb->control.tsc_offset + tsc;
2064 break; 2070 break;
2065 } 2071 }
2066 case MSR_K6_STAR: 2072 case MSR_K6_STAR:
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2146 2152
2147 switch (ecx) { 2153 switch (ecx) {
2148 case MSR_IA32_TSC: { 2154 case MSR_IA32_TSC: {
2149 u64 tsc; 2155 u64 tsc_offset = data - native_read_tsc();
2156 u64 g_tsc_offset = 0;
2157
2158 if (is_nested(svm)) {
2159 g_tsc_offset = svm->vmcb->control.tsc_offset -
2160 svm->nested.hsave->control.tsc_offset;
2161 svm->nested.hsave->control.tsc_offset = tsc_offset;
2162 }
2163
2164 svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
2150 2165
2151 rdtscll(tsc);
2152 svm->vmcb->control.tsc_offset = data - tsc;
2153 break; 2166 break;
2154 } 2167 }
2155 case MSR_K6_STAR: 2168 case MSR_K6_STAR:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3812014bd0b..ed53b42caba1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
709 if (vcpu->cpu != cpu) { 709 if (vcpu->cpu != cpu) {
710 vcpu_clear(vmx); 710 vcpu_clear(vmx);
711 kvm_migrate_timers(vcpu); 711 kvm_migrate_timers(vcpu);
712 vpid_sync_vcpu_all(vmx); 712 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
713 local_irq_disable(); 713 local_irq_disable();
714 list_add(&vmx->local_vcpus_link, 714 list_add(&vmx->local_vcpus_link,
715 &per_cpu(vcpus_on_cpu, cpu)); 715 &per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be451ee44249..9b9695322f56 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1591 1591
1592 if (cpuid->nent < 1) 1592 if (cpuid->nent < 1)
1593 goto out; 1593 goto out;
1594 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1595 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
1594 r = -ENOMEM; 1596 r = -ENOMEM;
1595 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 1597 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
1596 if (!cpuid_entries) 1598 if (!cpuid_entries)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 3e549b8ec8c9..85f5db95c60f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -15,8 +15,10 @@ ifeq ($(CONFIG_X86_32),y)
15 obj-y += atomic64_32.o 15 obj-y += atomic64_32.o
16 lib-y += checksum_32.o 16 lib-y += checksum_32.o
17 lib-y += strstr_32.o 17 lib-y += strstr_32.o
18 lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o 18 lib-y += semaphore_32.o string_32.o
19 19ifneq ($(CONFIG_X86_CMPXCHG64),y)
20 lib-y += cmpxchg8b_emu.o
21endif
20 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 22 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
21else 23else
22 obj-y += io_64.o iomap_copy_64.o 24 obj-y += io_64.o iomap_copy_64.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3439616d69f1..23a4d80fb39e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1075,6 +1075,8 @@ asmlinkage void __init xen_start_kernel(void)
1075 * Set up some pagetable state before starting to set any ptes. 1075 * Set up some pagetable state before starting to set any ptes.
1076 */ 1076 */
1077 1077
1078 xen_init_mmu_ops();
1079
1078 /* Prevent unwanted bits from being set in PTEs. */ 1080 /* Prevent unwanted bits from being set in PTEs. */
1079 __supported_pte_mask &= ~_PAGE_GLOBAL; 1081 __supported_pte_mask &= ~_PAGE_GLOBAL;
1080 if (!xen_initial_domain()) 1082 if (!xen_initial_domain())
@@ -1099,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void)
1099 */ 1101 */
1100 xen_setup_stackprotector(); 1102 xen_setup_stackprotector();
1101 1103
1102 xen_init_mmu_ops();
1103 xen_init_irq_ops(); 1104 xen_init_irq_ops();
1104 xen_init_cpuid_mask(); 1105 xen_init_cpuid_mask();
1105 1106