diff options
Diffstat (limited to 'arch/x86')
38 files changed, 289 insertions, 135 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8da93745c087..07e01149e3bf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT | |||
86 | config HAVE_LATENCYTOP_SUPPORT | 86 | config HAVE_LATENCYTOP_SUPPORT |
87 | def_bool y | 87 | def_bool y |
88 | 88 | ||
89 | config FAST_CMPXCHG_LOCAL | ||
90 | bool | ||
91 | default y | ||
92 | |||
93 | config MMU | 89 | config MMU |
94 | def_bool y | 90 | def_bool y |
95 | 91 | ||
@@ -495,7 +491,7 @@ if PARAVIRT_GUEST | |||
495 | source "arch/x86/xen/Kconfig" | 491 | source "arch/x86/xen/Kconfig" |
496 | 492 | ||
497 | config VMI | 493 | config VMI |
498 | bool "VMI Guest support" | 494 | bool "VMI Guest support (DEPRECATED)" |
499 | select PARAVIRT | 495 | select PARAVIRT |
500 | depends on X86_32 | 496 | depends on X86_32 |
501 | ---help--- | 497 | ---help--- |
@@ -504,6 +500,15 @@ config VMI | |||
504 | at the moment), by linking the kernel to a GPL-ed ROM module | 500 | at the moment), by linking the kernel to a GPL-ed ROM module |
505 | provided by the hypervisor. | 501 | provided by the hypervisor. |
506 | 502 | ||
503 | As of September 2009, VMware has started a phased retirement | ||
504 | of this feature from VMware's products. Please see | ||
505 | feature-removal-schedule.txt for details. If you are | ||
506 | planning to enable this option, please note that you cannot | ||
507 | live migrate a VMI enabled VM to a future VMware product, | ||
508 | which doesn't support VMI. So if you expect your kernel to | ||
509 | seamlessly migrate to newer VMware products, keep this | ||
510 | disabled. | ||
511 | |||
507 | config KVM_CLOCK | 512 | config KVM_CLOCK |
508 | bool "KVM paravirtualized clock" | 513 | bool "KVM paravirtualized clock" |
509 | select PARAVIRT | 514 | select PARAVIRT |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b8a9f9..f2824fb8c79c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -400,7 +400,7 @@ config X86_TSC | |||
400 | 400 | ||
401 | config X86_CMPXCHG64 | 401 | config X86_CMPXCHG64 |
402 | def_bool y | 402 | def_bool y |
403 | depends on X86_PAE || X86_64 | 403 | depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM |
404 | 404 | ||
405 | # this should be set for all -march=.. options where the compiler | 405 | # this should be set for all -march=.. options where the compiler |
406 | # generates cmov. | 406 | # generates cmov. |
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY | |||
412 | int | 412 | int |
413 | default "64" if X86_64 | 413 | default "64" if X86_64 |
414 | default "6" if X86_32 && X86_P6_NOP | 414 | default "6" if X86_32 && X86_P6_NOP |
415 | default "5" if X86_32 && X86_CMPXCHG64 | ||
415 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) | 416 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) |
416 | default "3" | 417 | default "3" |
417 | 418 | ||
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 0f6ec455a2b1..03c0683636b6 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld | |||
@@ -53,6 +53,9 @@ SECTIONS | |||
53 | 53 | ||
54 | /DISCARD/ : { *(.note*) } | 54 | /DISCARD/ : { *(.note*) } |
55 | 55 | ||
56 | /* | ||
57 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
58 | */ | ||
56 | . = ASSERT(_end <= 0x8000, "Setup too big!"); | 59 | . = ASSERT(_end <= 0x8000, "Setup too big!"); |
57 | . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); | 60 | . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); |
58 | /* Necessary for the very-old-loader check to work... */ | 61 | /* Necessary for the very-old-loader check to work... */ |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 585edebe12cf..49c552c060e9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, | |||
82 | return -EINVAL; | 82 | return -EINVAL; |
83 | } | 83 | } |
84 | 84 | ||
85 | if (irq_fpu_usable()) | 85 | if (!irq_fpu_usable()) |
86 | err = crypto_aes_expand_key(ctx, in_key, key_len); | 86 | err = crypto_aes_expand_key(ctx, in_key, key_len); |
87 | else { | 87 | else { |
88 | kernel_fpu_begin(); | 88 | kernel_fpu_begin(); |
@@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
103 | { | 103 | { |
104 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 104 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
105 | 105 | ||
106 | if (irq_fpu_usable()) | 106 | if (!irq_fpu_usable()) |
107 | crypto_aes_encrypt_x86(ctx, dst, src); | 107 | crypto_aes_encrypt_x86(ctx, dst, src); |
108 | else { | 108 | else { |
109 | kernel_fpu_begin(); | 109 | kernel_fpu_begin(); |
@@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
116 | { | 116 | { |
117 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 117 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
118 | 118 | ||
119 | if (irq_fpu_usable()) | 119 | if (!irq_fpu_usable()) |
120 | crypto_aes_decrypt_x86(ctx, dst, src); | 120 | crypto_aes_decrypt_x86(ctx, dst, src); |
121 | else { | 121 | else { |
122 | kernel_fpu_begin(); | 122 | kernel_fpu_begin(); |
@@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req) | |||
342 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 342 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
343 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 343 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
344 | 344 | ||
345 | if (irq_fpu_usable()) { | 345 | if (!irq_fpu_usable()) { |
346 | struct ablkcipher_request *cryptd_req = | 346 | struct ablkcipher_request *cryptd_req = |
347 | ablkcipher_request_ctx(req); | 347 | ablkcipher_request_ctx(req); |
348 | memcpy(cryptd_req, req, sizeof(*req)); | 348 | memcpy(cryptd_req, req, sizeof(*req)); |
@@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req) | |||
363 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | 363 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
364 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 364 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
365 | 365 | ||
366 | if (irq_fpu_usable()) { | 366 | if (!irq_fpu_usable()) { |
367 | struct ablkcipher_request *cryptd_req = | 367 | struct ablkcipher_request *cryptd_req = |
368 | ablkcipher_request_ctx(req); | 368 | ablkcipher_request_ctx(req); |
369 | memcpy(cryptd_req, req, sizeof(*req)); | 369 | memcpy(cryptd_req, req, sizeof(*req)); |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74619c4f9fda..1733f9f65e82 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -21,8 +21,8 @@ | |||
21 | #define __AUDIT_ARCH_LE 0x40000000 | 21 | #define __AUDIT_ARCH_LE 0x40000000 |
22 | 22 | ||
23 | #ifndef CONFIG_AUDITSYSCALL | 23 | #ifndef CONFIG_AUDITSYSCALL |
24 | #define sysexit_audit int_ret_from_sys_call | 24 | #define sysexit_audit ia32_ret_from_sys_call |
25 | #define sysretl_audit int_ret_from_sys_call | 25 | #define sysretl_audit ia32_ret_from_sys_call |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
@@ -39,12 +39,12 @@ | |||
39 | .endm | 39 | .endm |
40 | 40 | ||
41 | /* clobbers %eax */ | 41 | /* clobbers %eax */ |
42 | .macro CLEAR_RREGS _r9=rax | 42 | .macro CLEAR_RREGS offset=0, _r9=rax |
43 | xorl %eax,%eax | 43 | xorl %eax,%eax |
44 | movq %rax,R11(%rsp) | 44 | movq %rax,\offset+R11(%rsp) |
45 | movq %rax,R10(%rsp) | 45 | movq %rax,\offset+R10(%rsp) |
46 | movq %\_r9,R9(%rsp) | 46 | movq %\_r9,\offset+R9(%rsp) |
47 | movq %rax,R8(%rsp) | 47 | movq %rax,\offset+R8(%rsp) |
48 | .endm | 48 | .endm |
49 | 49 | ||
50 | /* | 50 | /* |
@@ -172,6 +172,10 @@ sysexit_from_sys_call: | |||
172 | movl RIP-R11(%rsp),%edx /* User %eip */ | 172 | movl RIP-R11(%rsp),%edx /* User %eip */ |
173 | CFI_REGISTER rip,rdx | 173 | CFI_REGISTER rip,rdx |
174 | RESTORE_ARGS 1,24,1,1,1,1 | 174 | RESTORE_ARGS 1,24,1,1,1,1 |
175 | xorq %r8,%r8 | ||
176 | xorq %r9,%r9 | ||
177 | xorq %r10,%r10 | ||
178 | xorq %r11,%r11 | ||
175 | popfq | 179 | popfq |
176 | CFI_ADJUST_CFA_OFFSET -8 | 180 | CFI_ADJUST_CFA_OFFSET -8 |
177 | /*CFI_RESTORE rflags*/ | 181 | /*CFI_RESTORE rflags*/ |
@@ -202,7 +206,7 @@ sysexit_from_sys_call: | |||
202 | 206 | ||
203 | .macro auditsys_exit exit,ebpsave=RBP | 207 | .macro auditsys_exit exit,ebpsave=RBP |
204 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 208 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) |
205 | jnz int_ret_from_sys_call | 209 | jnz ia32_ret_from_sys_call |
206 | TRACE_IRQS_ON | 210 | TRACE_IRQS_ON |
207 | sti | 211 | sti |
208 | movl %eax,%esi /* second arg, syscall return value */ | 212 | movl %eax,%esi /* second arg, syscall return value */ |
@@ -218,8 +222,9 @@ sysexit_from_sys_call: | |||
218 | cli | 222 | cli |
219 | TRACE_IRQS_OFF | 223 | TRACE_IRQS_OFF |
220 | testl %edi,TI_flags(%r10) | 224 | testl %edi,TI_flags(%r10) |
221 | jnz int_with_check | 225 | jz \exit |
222 | jmp \exit | 226 | CLEAR_RREGS -ARGOFFSET |
227 | jmp int_with_check | ||
223 | .endm | 228 | .endm |
224 | 229 | ||
225 | sysenter_auditsys: | 230 | sysenter_auditsys: |
@@ -329,6 +334,9 @@ sysretl_from_sys_call: | |||
329 | CFI_REGISTER rip,rcx | 334 | CFI_REGISTER rip,rcx |
330 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 335 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
331 | /*CFI_REGISTER rflags,r11*/ | 336 | /*CFI_REGISTER rflags,r11*/ |
337 | xorq %r10,%r10 | ||
338 | xorq %r9,%r9 | ||
339 | xorq %r8,%r8 | ||
332 | TRACE_IRQS_ON | 340 | TRACE_IRQS_ON |
333 | movl RSP-ARGOFFSET(%rsp),%esp | 341 | movl RSP-ARGOFFSET(%rsp),%esp |
334 | CFI_RESTORE rsp | 342 | CFI_RESTORE rsp |
@@ -353,7 +361,7 @@ cstar_tracesys: | |||
353 | #endif | 361 | #endif |
354 | xchgl %r9d,%ebp | 362 | xchgl %r9d,%ebp |
355 | SAVE_REST | 363 | SAVE_REST |
356 | CLEAR_RREGS r9 | 364 | CLEAR_RREGS 0, r9 |
357 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 365 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
358 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 366 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
359 | call syscall_trace_enter | 367 | call syscall_trace_enter |
@@ -425,6 +433,8 @@ ia32_do_call: | |||
425 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative | 433 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative |
426 | ia32_sysret: | 434 | ia32_sysret: |
427 | movq %rax,RAX-ARGOFFSET(%rsp) | 435 | movq %rax,RAX-ARGOFFSET(%rsp) |
436 | ia32_ret_from_sys_call: | ||
437 | CLEAR_RREGS -ARGOFFSET | ||
428 | jmp int_ret_from_sys_call | 438 | jmp int_ret_from_sys_call |
429 | 439 | ||
430 | ia32_tracesys: | 440 | ia32_tracesys: |
@@ -442,8 +452,8 @@ END(ia32_syscall) | |||
442 | 452 | ||
443 | ia32_badsys: | 453 | ia32_badsys: |
444 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) | 454 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) |
445 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 455 | movq $-ENOSYS,%rax |
446 | jmp int_ret_from_sys_call | 456 | jmp ia32_sysret |
447 | 457 | ||
448 | quiet_ni_syscall: | 458 | quiet_ni_syscall: |
449 | movq $-ENOSYS,%rax | 459 | movq $-ENOSYS,%rax |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3be000435fad..d83892226f73 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); | |||
796 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 796 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
797 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 797 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
798 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 798 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
799 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | ||
799 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 800 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
800 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 801 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
801 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 802 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b608a64c5814..f1363b72364f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | |||
133 | static inline void enable_p5_mce(void) {} | 133 | static inline void enable_p5_mce(void) {} |
134 | #endif | 134 | #endif |
135 | 135 | ||
136 | extern void (*x86_mce_decode_callback)(struct mce *m); | ||
137 | |||
136 | void mce_setup(struct mce *m); | 138 | void mce_setup(struct mce *m); |
137 | void mce_log(struct mce *m); | 139 | void mce_log(struct mce *m); |
138 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 140 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 8aebcc41041d..efb38994859c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | |||
840 | 840 | ||
841 | static inline unsigned long __raw_local_save_flags(void) | 841 | static inline unsigned long __raw_local_save_flags(void) |
842 | { | 842 | { |
843 | unsigned long f; | 843 | return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); |
844 | |||
845 | asm volatile(paravirt_alt(PARAVIRT_CALL) | ||
846 | : "=a"(f) | ||
847 | : paravirt_type(pv_irq_ops.save_fl), | ||
848 | paravirt_clobber(CLBR_EAX) | ||
849 | : "memory", "cc"); | ||
850 | return f; | ||
851 | } | 844 | } |
852 | 845 | ||
853 | static inline void raw_local_irq_restore(unsigned long f) | 846 | static inline void raw_local_irq_restore(unsigned long f) |
854 | { | 847 | { |
855 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 848 | PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); |
856 | : "=a"(f) | ||
857 | : PV_FLAGS_ARG(f), | ||
858 | paravirt_type(pv_irq_ops.restore_fl), | ||
859 | paravirt_clobber(CLBR_EAX) | ||
860 | : "memory", "cc"); | ||
861 | } | 849 | } |
862 | 850 | ||
863 | static inline void raw_local_irq_disable(void) | 851 | static inline void raw_local_irq_disable(void) |
864 | { | 852 | { |
865 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 853 | PVOP_VCALLEE0(pv_irq_ops.irq_disable); |
866 | : | ||
867 | : paravirt_type(pv_irq_ops.irq_disable), | ||
868 | paravirt_clobber(CLBR_EAX) | ||
869 | : "memory", "eax", "cc"); | ||
870 | } | 854 | } |
871 | 855 | ||
872 | static inline void raw_local_irq_enable(void) | 856 | static inline void raw_local_irq_enable(void) |
873 | { | 857 | { |
874 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 858 | PVOP_VCALLEE0(pv_irq_ops.irq_enable); |
875 | : | ||
876 | : paravirt_type(pv_irq_ops.irq_enable), | ||
877 | paravirt_clobber(CLBR_EAX) | ||
878 | : "memory", "eax", "cc"); | ||
879 | } | 859 | } |
880 | 860 | ||
881 | static inline unsigned long __raw_local_irq_save(void) | 861 | static inline unsigned long __raw_local_irq_save(void) |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index dd0f5b32489d..9357473c8da0 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void); | |||
494 | #define EXTRA_CLOBBERS | 494 | #define EXTRA_CLOBBERS |
495 | #define VEXTRA_CLOBBERS | 495 | #define VEXTRA_CLOBBERS |
496 | #else /* CONFIG_X86_64 */ | 496 | #else /* CONFIG_X86_64 */ |
497 | /* [re]ax isn't an arg, but the return val */ | ||
497 | #define PVOP_VCALL_ARGS \ | 498 | #define PVOP_VCALL_ARGS \ |
498 | unsigned long __edi = __edi, __esi = __esi, \ | 499 | unsigned long __edi = __edi, __esi = __esi, \ |
499 | __edx = __edx, __ecx = __ecx | 500 | __edx = __edx, __ecx = __ecx, __eax = __eax |
500 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | 501 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS |
501 | 502 | ||
502 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | 503 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) |
503 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | 504 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) |
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void); | |||
509 | "=c" (__ecx) | 510 | "=c" (__ecx) |
510 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | 511 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) |
511 | 512 | ||
513 | /* void functions are still allowed [re]ax for scratch */ | ||
512 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | 514 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) |
513 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | 515 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS |
514 | 516 | ||
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void); | |||
583 | VEXTRA_CLOBBERS, \ | 585 | VEXTRA_CLOBBERS, \ |
584 | pre, post, ##__VA_ARGS__) | 586 | pre, post, ##__VA_ARGS__) |
585 | 587 | ||
586 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | 588 | #define __PVOP_VCALLEESAVE(op, pre, post, ...) \ |
587 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | 589 | ____PVOP_VCALL(op.func, CLBR_RET_REG, \ |
588 | PVOP_VCALLEE_CLOBBERS, , \ | 590 | PVOP_VCALLEE_CLOBBERS, , \ |
589 | pre, post, ##__VA_ARGS__) | 591 | pre, post, ##__VA_ARGS__) |
590 | 592 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 25a92842dd99..d823c245f63b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -143,6 +143,7 @@ extern unsigned long node_remap_size[]; | |||
143 | | 1*SD_BALANCE_FORK \ | 143 | | 1*SD_BALANCE_FORK \ |
144 | | 0*SD_BALANCE_WAKE \ | 144 | | 0*SD_BALANCE_WAKE \ |
145 | | 1*SD_WAKE_AFFINE \ | 145 | | 1*SD_WAKE_AFFINE \ |
146 | | 1*SD_PREFER_LOCAL \ | ||
146 | | 0*SD_SHARE_CPUPOWER \ | 147 | | 0*SD_SHARE_CPUPOWER \ |
147 | | 0*SD_POWERSAVINGS_BALANCE \ | 148 | | 0*SD_POWERSAVINGS_BALANCE \ |
148 | | 0*SD_SHARE_PKG_RESOURCES \ | 149 | | 0*SD_SHARE_PKG_RESOURCES \ |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 04eb6c958b9d..d1414af98559 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <asm/types.h> | 19 | #include <asm/types.h> |
20 | #include <asm/percpu.h> | 20 | #include <asm/percpu.h> |
21 | #include <asm/uv/uv_mmrs.h> | 21 | #include <asm/uv/uv_mmrs.h> |
22 | #include <asm/irq_vectors.h> | ||
23 | #include <asm/io_apic.h> | ||
22 | 24 | ||
23 | 25 | ||
24 | /* | 26 | /* |
@@ -114,7 +116,7 @@ | |||
114 | /* | 116 | /* |
115 | * The largest possible NASID of a C or M brick (+ 2) | 117 | * The largest possible NASID of a C or M brick (+ 2) |
116 | */ | 118 | */ |
117 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) | 119 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2) |
118 | 120 | ||
119 | struct uv_scir_s { | 121 | struct uv_scir_s { |
120 | struct timer_list timer; | 122 | struct timer_list timer; |
@@ -230,6 +232,20 @@ static inline unsigned long uv_gpa(void *v) | |||
230 | return uv_soc_phys_ram_to_gpa(__pa(v)); | 232 | return uv_soc_phys_ram_to_gpa(__pa(v)); |
231 | } | 233 | } |
232 | 234 | ||
235 | /* gnode -> pnode */ | ||
236 | static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) | ||
237 | { | ||
238 | return gpa >> uv_hub_info->m_val; | ||
239 | } | ||
240 | |||
241 | /* gpa -> pnode */ | ||
242 | static inline int uv_gpa_to_pnode(unsigned long gpa) | ||
243 | { | ||
244 | unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1; | ||
245 | |||
246 | return uv_gpa_to_gnode(gpa) & n_mask; | ||
247 | } | ||
248 | |||
233 | /* pnode, offset --> socket virtual */ | 249 | /* pnode, offset --> socket virtual */ |
234 | static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) | 250 | static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) |
235 | { | 251 | { |
@@ -421,9 +437,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) | |||
421 | static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) | 437 | static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) |
422 | { | 438 | { |
423 | unsigned long val; | 439 | unsigned long val; |
440 | unsigned long dmode = dest_Fixed; | ||
441 | |||
442 | if (vector == NMI_VECTOR) | ||
443 | dmode = dest_NMI; | ||
424 | 444 | ||
425 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 445 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
426 | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | | 446 | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | |
447 | (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | | ||
427 | (vector << UVH_IPI_INT_VECTOR_SHFT); | 448 | (vector << UVH_IPI_INT_VECTOR_SHFT); |
428 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 449 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
429 | } | 450 | } |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..060fff8f5c5b 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S | |||
@@ -57,5 +57,8 @@ SECTIONS | |||
57 | *(.note*) | 57 | *(.note*) |
58 | } | 58 | } |
59 | 59 | ||
60 | /* | ||
61 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
62 | */ | ||
60 | . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); | 63 | . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); |
61 | } | 64 | } |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5f5886a6b53..326c25477d3d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -352,14 +352,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | |||
352 | 352 | ||
353 | for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { | 353 | for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { |
354 | alias.v = uv_read_local_mmr(redir_addrs[i].alias); | 354 | alias.v = uv_read_local_mmr(redir_addrs[i].alias); |
355 | if (alias.s.base == 0) { | 355 | if (alias.s.enable && alias.s.base == 0) { |
356 | *size = (1UL << alias.s.m_alias); | 356 | *size = (1UL << alias.s.m_alias); |
357 | redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); | 357 | redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); |
358 | *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; | 358 | *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; |
359 | return; | 359 | return; |
360 | } | 360 | } |
361 | } | 361 | } |
362 | BUG(); | 362 | *base = *size = 0; |
363 | } | 363 | } |
364 | 364 | ||
365 | enum map_type {map_wb, map_uc}; | 365 | enum map_type {map_wb, map_uc}; |
@@ -619,12 +619,12 @@ void __init uv_system_init(void) | |||
619 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 619 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
620 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; | 620 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
621 | uv_cpu_hub_info(cpu)->m_val = m_val; | 621 | uv_cpu_hub_info(cpu)->m_val = m_val; |
622 | uv_cpu_hub_info(cpu)->n_val = m_val; | 622 | uv_cpu_hub_info(cpu)->n_val = n_val; |
623 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 623 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
624 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; | 624 | uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; |
625 | uv_cpu_hub_info(cpu)->pnode = pnode; | 625 | uv_cpu_hub_info(cpu)->pnode = pnode; |
626 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | 626 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; |
627 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 627 | uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; |
628 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 628 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
629 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | 629 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; |
630 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 630 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f4..721a77ca8115 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 85 | static DEFINE_PER_CPU(struct mce, mces_seen); |
86 | static int cpu_missing; | 86 | static int cpu_missing; |
87 | 87 | ||
88 | static void default_decode_mce(struct mce *m) | ||
89 | { | ||
90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | ||
91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * CPU/chipset specific EDAC code can register a callback here to print | ||
96 | * MCE errors in a human-readable form: | ||
97 | */ | ||
98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
88 | 100 | ||
89 | /* MCA banks polled by the period polling timer for corrected events */ | 101 | /* MCA banks polled by the period polling timer for corrected events */ |
90 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
@@ -165,46 +177,46 @@ void mce_log(struct mce *mce) | |||
165 | set_bit(0, &mce_need_notify); | 177 | set_bit(0, &mce_need_notify); |
166 | } | 178 | } |
167 | 179 | ||
168 | void __weak decode_mce(struct mce *m) | ||
169 | { | ||
170 | return; | ||
171 | } | ||
172 | |||
173 | static void print_mce(struct mce *m) | 180 | static void print_mce(struct mce *m) |
174 | { | 181 | { |
175 | printk(KERN_EMERG | 182 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
176 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
177 | m->extcpu, m->mcgstatus, m->bank, m->status); | 183 | m->extcpu, m->mcgstatus, m->bank, m->status); |
184 | |||
178 | if (m->ip) { | 185 | if (m->ip) { |
179 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | 186 | pr_emerg("RIP%s %02x:<%016Lx> ", |
180 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 187 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
181 | m->cs, m->ip); | 188 | m->cs, m->ip); |
189 | |||
182 | if (m->cs == __KERNEL_CS) | 190 | if (m->cs == __KERNEL_CS) |
183 | print_symbol("{%s}", m->ip); | 191 | print_symbol("{%s}", m->ip); |
184 | printk(KERN_CONT "\n"); | 192 | pr_cont("\n"); |
185 | } | 193 | } |
186 | printk(KERN_EMERG "TSC %llx ", m->tsc); | 194 | |
195 | pr_emerg("TSC %llx ", m->tsc); | ||
187 | if (m->addr) | 196 | if (m->addr) |
188 | printk(KERN_CONT "ADDR %llx ", m->addr); | 197 | pr_cont("ADDR %llx ", m->addr); |
189 | if (m->misc) | 198 | if (m->misc) |
190 | printk(KERN_CONT "MISC %llx ", m->misc); | 199 | pr_cont("MISC %llx ", m->misc); |
191 | printk(KERN_CONT "\n"); | ||
192 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
193 | m->cpuvendor, m->cpuid, m->time, m->socketid, | ||
194 | m->apicid); | ||
195 | 200 | ||
196 | decode_mce(m); | 201 | pr_cont("\n"); |
202 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
203 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | ||
204 | |||
205 | /* | ||
206 | * Print out human-readable details about the MCE error, | ||
207 | * (if the CPU has an implementation for that): | ||
208 | */ | ||
209 | x86_mce_decode_callback(m); | ||
197 | } | 210 | } |
198 | 211 | ||
199 | static void print_mce_head(void) | 212 | static void print_mce_head(void) |
200 | { | 213 | { |
201 | printk(KERN_EMERG "\nHARDWARE ERROR\n"); | 214 | pr_emerg("\nHARDWARE ERROR\n"); |
202 | } | 215 | } |
203 | 216 | ||
204 | static void print_mce_tail(void) | 217 | static void print_mce_tail(void) |
205 | { | 218 | { |
206 | printk(KERN_EMERG "This is not a software problem!\n" | 219 | pr_emerg("This is not a software problem!\n"); |
207 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
208 | } | 220 | } |
209 | 221 | ||
210 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 222 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
@@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced; | |||
218 | static void wait_for_panic(void) | 230 | static void wait_for_panic(void) |
219 | { | 231 | { |
220 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; | 232 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; |
233 | |||
221 | preempt_disable(); | 234 | preempt_disable(); |
222 | local_irq_enable(); | 235 | local_irq_enable(); |
223 | while (timeout-- > 0) | 236 | while (timeout-- > 0) |
@@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
285 | static int msr_to_offset(u32 msr) | 298 | static int msr_to_offset(u32 msr) |
286 | { | 299 | { |
287 | unsigned bank = __get_cpu_var(injectm.bank); | 300 | unsigned bank = __get_cpu_var(injectm.bank); |
301 | |||
288 | if (msr == rip_msr) | 302 | if (msr == rip_msr) |
289 | return offsetof(struct mce, ip); | 303 | return offsetof(struct mce, ip); |
290 | if (msr == MSR_IA32_MCx_STATUS(bank)) | 304 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
@@ -1200,7 +1214,8 @@ static int __cpuinit mce_cap_init(void) | |||
1200 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 1214 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
1201 | 1215 | ||
1202 | b = cap & MCG_BANKCNT_MASK; | 1216 | b = cap & MCG_BANKCNT_MASK; |
1203 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | 1217 | if (!banks) |
1218 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | ||
1204 | 1219 | ||
1205 | if (b > MAX_NR_BANKS) { | 1220 | if (b > MAX_NR_BANKS) { |
1206 | printk(KERN_WARNING | 1221 | printk(KERN_WARNING |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 889f665fe93d..7c785634af2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <linux/sched.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index f04e72527604..3c1b12d461d1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
96 | unsigned long long base, size; | 96 | unsigned long long base, size; |
97 | char *ptr; | 97 | char *ptr; |
98 | char line[LINE_SIZE]; | 98 | char line[LINE_SIZE]; |
99 | int length; | ||
99 | size_t linelen; | 100 | size_t linelen; |
100 | 101 | ||
101 | if (!capable(CAP_SYS_ADMIN)) | 102 | if (!capable(CAP_SYS_ADMIN)) |
102 | return -EPERM; | 103 | return -EPERM; |
103 | if (!len) | ||
104 | return -EINVAL; | ||
105 | 104 | ||
106 | memset(line, 0, LINE_SIZE); | 105 | memset(line, 0, LINE_SIZE); |
107 | if (len > LINE_SIZE) | 106 | |
108 | len = LINE_SIZE; | 107 | length = len; |
109 | if (copy_from_user(line, buf, len - 1)) | 108 | length--; |
109 | |||
110 | if (length > LINE_SIZE - 1) | ||
111 | length = LINE_SIZE - 1; | ||
112 | |||
113 | if (length < 0) | ||
114 | return -EINVAL; | ||
115 | |||
116 | if (copy_from_user(line, buf, length)) | ||
110 | return -EFAULT; | 117 | return -EFAULT; |
111 | 118 | ||
112 | linelen = strlen(line); | 119 | linelen = strlen(line); |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 85419bb7d4ab..d17d482a04f4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos) | |||
1378 | if (mb < 16) | 1378 | if (mb < 16) |
1379 | return 1024*1024; | 1379 | return 1024*1024; |
1380 | 1380 | ||
1381 | /* To 32MB for anything above that */ | 1381 | /* To 64MB for anything above that */ |
1382 | return 32*1024*1024; | 1382 | return 64*1024*1024; |
1383 | } | 1383 | } |
1384 | 1384 | ||
1385 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) | 1385 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 41fd965c80c6..b9c830c12b4a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -206,8 +206,11 @@ static int __init setup_early_printk(char *buf) | |||
206 | 206 | ||
207 | while (*buf != '\0') { | 207 | while (*buf != '\0') { |
208 | if (!strncmp(buf, "serial", 6)) { | 208 | if (!strncmp(buf, "serial", 6)) { |
209 | early_serial_init(buf + 6); | 209 | buf += 6; |
210 | early_serial_init(buf); | ||
210 | early_console_register(&early_serial_console, keep); | 211 | early_console_register(&early_serial_console, keep); |
212 | if (!strncmp(buf, ",ttyS", 5)) | ||
213 | buf += 5; | ||
211 | } | 214 | } |
212 | if (!strncmp(buf, "ttyS", 4)) { | 215 | if (!strncmp(buf, "ttyS", 4)) { |
213 | early_serial_init(buf + 4); | 216 | early_serial_init(buf + 4); |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 1736c5a725aa..9c3bd4a2050e 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -15,8 +15,10 @@ EXPORT_SYMBOL(mcount); | |||
15 | * the export, but dont use it from C code, it is used | 15 | * the export, but dont use it from C code, it is used |
16 | * by assembly code and is not using C calling convention! | 16 | * by assembly code and is not using C calling convention! |
17 | */ | 17 | */ |
18 | #ifndef CONFIG_X86_CMPXCHG64 | ||
18 | extern void cmpxchg8b_emu(void); | 19 | extern void cmpxchg8b_emu(void); |
19 | EXPORT_SYMBOL(cmpxchg8b_emu); | 20 | EXPORT_SYMBOL(cmpxchg8b_emu); |
21 | #endif | ||
20 | 22 | ||
21 | /* Networking helper routines. */ | 23 | /* Networking helper routines. */ |
22 | EXPORT_SYMBOL(csum_partial_copy_generic); | 24 | EXPORT_SYMBOL(csum_partial_copy_generic); |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 74656d1d4e30..04bbd5278568 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
63 | for_each_online_cpu(j) | 63 | for_each_online_cpu(j) |
64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); | 64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); |
65 | seq_printf(p, " Spurious interrupts\n"); | 65 | seq_printf(p, " Spurious interrupts\n"); |
66 | seq_printf(p, "%*s: ", prec, "CNT"); | 66 | seq_printf(p, "%*s: ", prec, "PMI"); |
67 | for_each_online_cpu(j) | 67 | for_each_online_cpu(j) |
68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | 68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); |
69 | seq_printf(p, " Performance counter interrupts\n"); | 69 | seq_printf(p, " Performance monitoring interrupts\n"); |
70 | seq_printf(p, "%*s: ", prec, "PND"); | 70 | seq_printf(p, "%*s: ", prec, "PND"); |
71 | for_each_online_cpu(j) | 71 | for_each_online_cpu(j) |
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | 72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..b2a71dca5642 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0; | |||
35 | 35 | ||
36 | /* | 36 | /* |
37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | 37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. |
38 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | 38 | * If this variable is 1, IOMMU implementations do no DMA translation for |
39 | * devices and allow every device to access to whole physical memory. This is | 39 | * devices and allow every device to access to whole physical memory. This is |
40 | * useful if a user want to use an IOMMU only for KVM device assignment to | 40 | * useful if a user want to use an IOMMU only for KVM device assignment to |
41 | * guests and not for driver dma translation. | 41 | * guests and not for driver dma translation. |
@@ -311,7 +311,7 @@ void pci_iommu_shutdown(void) | |||
311 | amd_iommu_shutdown(); | 311 | amd_iommu_shutdown(); |
312 | } | 312 | } |
313 | /* Must execute after PCI subsystem */ | 313 | /* Must execute after PCI subsystem */ |
314 | fs_initcall(pci_iommu_init); | 314 | rootfs_initcall(pci_iommu_init); |
315 | 315 | ||
316 | #ifdef CONFIG_PCI | 316 | #ifdef CONFIG_PCI |
317 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ | 317 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 98a827ee9ed7..a7f1b64f86e0 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/agp_backend.h> | 16 | #include <linux/agp_backend.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/sched.h> | ||
19 | #include <linux/string.h> | 20 | #include <linux/string.h> |
20 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
21 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 27349f92a6d7..a1a3cdda06e1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | #include <linux/efi.h> | 5 | #include <linux/efi.h> |
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/sched.h> | ||
7 | #include <linux/tboot.h> | 8 | #include <linux/tboot.h> |
8 | #include <acpi/reboot.h> | 9 | #include <acpi/reboot.h> |
9 | #include <asm/io.h> | 10 | #include <asm/io.h> |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index dcb00d278512..be2573448ed9 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
38 | #ifdef CONFIG_FRAME_POINTER | 38 | #ifdef CONFIG_FRAME_POINTER |
39 | return *(unsigned long *)(regs->bp + sizeof(long)); | 39 | return *(unsigned long *)(regs->bp + sizeof(long)); |
40 | #else | 40 | #else |
41 | unsigned long *sp = (unsigned long *)regs->sp; | 41 | unsigned long *sp = |
42 | (unsigned long *)kernel_stack_pointer(regs); | ||
42 | /* | 43 | /* |
43 | * Return address is either directly at stack pointer | 44 | * Return address is either directly at stack pointer |
44 | * or above a saved flags. Eflags has bits 22-31 zero, | 45 | * or above a saved flags. Eflags has bits 22-31 zero, |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 503c1f2e8835..1740c85e24bb 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -23,8 +23,6 @@ | |||
23 | static struct bau_control **uv_bau_table_bases __read_mostly; | 23 | static struct bau_control **uv_bau_table_bases __read_mostly; |
24 | static int uv_bau_retry_limit __read_mostly; | 24 | static int uv_bau_retry_limit __read_mostly; |
25 | 25 | ||
26 | /* position of pnode (which is nasid>>1): */ | ||
27 | static int uv_nshift __read_mostly; | ||
28 | /* base pnode in this partition */ | 26 | /* base pnode in this partition */ |
29 | static int uv_partition_base_pnode __read_mostly; | 27 | static int uv_partition_base_pnode __read_mostly; |
30 | 28 | ||
@@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode) | |||
723 | BUG_ON(!adp); | 721 | BUG_ON(!adp); |
724 | 722 | ||
725 | pa = uv_gpa(adp); /* need the real nasid*/ | 723 | pa = uv_gpa(adp); /* need the real nasid*/ |
726 | n = pa >> uv_nshift; | 724 | n = uv_gpa_to_pnode(pa); |
727 | m = pa & uv_mmask; | 725 | m = pa & uv_mmask; |
728 | 726 | ||
729 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 727 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
@@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) | |||
778 | * need the pnode of where the memory was really allocated | 776 | * need the pnode of where the memory was really allocated |
779 | */ | 777 | */ |
780 | pa = uv_gpa(pqp); | 778 | pa = uv_gpa(pqp); |
781 | pn = pa >> uv_nshift; | 779 | pn = uv_gpa_to_pnode(pa); |
782 | uv_write_global_mmr64(pnode, | 780 | uv_write_global_mmr64(pnode, |
783 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | 781 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, |
784 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | 782 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | |
@@ -843,8 +841,7 @@ static int __init uv_bau_init(void) | |||
843 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 841 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
844 | 842 | ||
845 | uv_bau_retry_limit = 1; | 843 | uv_bau_retry_limit = 1; |
846 | uv_nshift = uv_hub_info->n_val; | 844 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; |
847 | uv_mmask = (1UL << uv_hub_info->n_val) - 1; | ||
848 | nblades = uv_num_possible_blades(); | 845 | nblades = uv_num_possible_blades(); |
849 | 846 | ||
850 | uv_bau_table_bases = (struct bau_control **) | 847 | uv_bau_table_bases = (struct bau_control **) |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 699f7eeb896a..cd022121cab6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -3,8 +3,16 @@ | |||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | #include <asm/e820.h> | 4 | #include <asm/e820.h> |
5 | 5 | ||
6 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | ||
7 | #define __trampinit | ||
8 | #define __trampinitdata | ||
9 | #else | ||
10 | #define __trampinit __cpuinit | ||
11 | #define __trampinitdata __cpuinitdata | ||
12 | #endif | ||
13 | |||
6 | /* ready for x86_64 and x86 */ | 14 | /* ready for x86_64 and x86 */ |
7 | unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); | 15 | unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); |
8 | 16 | ||
9 | void __init reserve_trampoline_memory(void) | 17 | void __init reserve_trampoline_memory(void) |
10 | { | 18 | { |
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void) | |||
26 | * bootstrap into the page concerned. The caller | 34 | * bootstrap into the page concerned. The caller |
27 | * has made sure it's suitably aligned. | 35 | * has made sure it's suitably aligned. |
28 | */ | 36 | */ |
29 | unsigned long __cpuinit setup_trampoline(void) | 37 | unsigned long __trampinit setup_trampoline(void) |
30 | { | 38 | { |
31 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); | 39 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
32 | return virt_to_phys(trampoline_base); | 40 | return virt_to_phys(trampoline_base); |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 596d54c660a5..3af2dff58b21 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -32,8 +32,12 @@ | |||
32 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
33 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
34 | 34 | ||
35 | #ifdef CONFIG_ACPI_SLEEP | ||
36 | .section .rodata, "a", @progbits | ||
37 | #else | ||
35 | /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ | 38 | /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ |
36 | __CPUINITRODATA | 39 | __CPUINITRODATA |
40 | #endif | ||
37 | .code16 | 41 | .code16 |
38 | 42 | ||
39 | ENTRY(trampoline_data) | 43 | ENTRY(trampoline_data) |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 31e6f6cfe53e..d430e4c30193 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void) | |||
648 | 648 | ||
649 | pv_info.paravirt_enabled = 1; | 649 | pv_info.paravirt_enabled = 1; |
650 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | 650 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; |
651 | pv_info.name = "vmi"; | 651 | pv_info.name = "vmi [deprecated]"; |
652 | 652 | ||
653 | pv_init_ops.patch = vmi_patch; | 653 | pv_init_ops.patch = vmi_patch; |
654 | 654 | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9fa..3c68fe2d46cf 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -305,6 +305,9 @@ SECTIONS | |||
305 | 305 | ||
306 | 306 | ||
307 | #ifdef CONFIG_X86_32 | 307 | #ifdef CONFIG_X86_32 |
308 | /* | ||
309 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
310 | */ | ||
308 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), | 311 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), |
309 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 312 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
310 | #else | 313 | #else |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 82ad523b4901..144e7f60b5e2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) | |||
116 | * itself with the initial count and continues counting | 116 | * itself with the initial count and continues counting |
117 | * from there. | 117 | * from there. |
118 | */ | 118 | */ |
119 | remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); | 119 | remaining = hrtimer_get_remaining(&ps->pit_timer.timer); |
120 | elapsed = ps->pit_timer.period - ktime_to_ns(remaining); | 120 | elapsed = ps->pit_timer.period - ktime_to_ns(remaining); |
121 | elapsed = mod_64(elapsed, ps->pit_timer.period); | 121 | elapsed = mod_64(elapsed, ps->pit_timer.period); |
122 | 122 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1ae5ceba7eb2..23c217692ea9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) | |||
521 | if (apic_get_reg(apic, APIC_TMICT) == 0) | 521 | if (apic_get_reg(apic, APIC_TMICT) == 0) |
522 | return 0; | 522 | return 0; |
523 | 523 | ||
524 | remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); | 524 | remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); |
525 | if (ktime_to_ns(remaining) < 0) | 525 | if (ktime_to_ns(remaining) < 0) |
526 | remaining = ktime_set(0, 0); | 526 | remaining = ktime_set(0, 0); |
527 | 527 | ||
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
664 | { | 664 | { |
665 | ktime_t now = apic->lapic_timer.timer.base->get_time(); | 665 | ktime_t now = apic->lapic_timer.timer.base->get_time(); |
666 | 666 | ||
667 | apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * | 667 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * |
668 | APIC_BUS_CYCLE_NS * apic->divide_count; | 668 | APIC_BUS_CYCLE_NS * apic->divide_count; |
669 | atomic_set(&apic->lapic_timer.pending, 0); | 669 | atomic_set(&apic->lapic_timer.pending, 0); |
670 | 670 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca41ae9f453..818b92ad82cf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); | |||
156 | #define CREATE_TRACE_POINTS | 156 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 157 | #include "mmutrace.h" |
158 | 158 | ||
159 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
160 | |||
159 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 161 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
160 | 162 | ||
161 | struct kvm_rmap_desc { | 163 | struct kvm_rmap_desc { |
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
634 | if (*spte & shadow_accessed_mask) | 636 | if (*spte & shadow_accessed_mask) |
635 | kvm_set_pfn_accessed(pfn); | 637 | kvm_set_pfn_accessed(pfn); |
636 | if (is_writeble_pte(*spte)) | 638 | if (is_writeble_pte(*spte)) |
637 | kvm_release_pfn_dirty(pfn); | 639 | kvm_set_pfn_dirty(pfn); |
638 | else | ||
639 | kvm_release_pfn_clean(pfn); | ||
640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
641 | if (!*rmapp) { | 641 | if (!*rmapp) { |
642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
@@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
748 | return write_protected; | 748 | return write_protected; |
749 | } | 749 | } |
750 | 750 | ||
751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
752 | unsigned long data) | ||
752 | { | 753 | { |
753 | u64 *spte; | 754 | u64 *spte; |
754 | int need_tlb_flush = 0; | 755 | int need_tlb_flush = 0; |
@@ -763,8 +764,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | |||
763 | return need_tlb_flush; | 764 | return need_tlb_flush; |
764 | } | 765 | } |
765 | 766 | ||
767 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
768 | unsigned long data) | ||
769 | { | ||
770 | int need_flush = 0; | ||
771 | u64 *spte, new_spte; | ||
772 | pte_t *ptep = (pte_t *)data; | ||
773 | pfn_t new_pfn; | ||
774 | |||
775 | WARN_ON(pte_huge(*ptep)); | ||
776 | new_pfn = pte_pfn(*ptep); | ||
777 | spte = rmap_next(kvm, rmapp, NULL); | ||
778 | while (spte) { | ||
779 | BUG_ON(!is_shadow_present_pte(*spte)); | ||
780 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | ||
781 | need_flush = 1; | ||
782 | if (pte_write(*ptep)) { | ||
783 | rmap_remove(kvm, spte); | ||
784 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
785 | spte = rmap_next(kvm, rmapp, NULL); | ||
786 | } else { | ||
787 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | ||
788 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | ||
789 | |||
790 | new_spte &= ~PT_WRITABLE_MASK; | ||
791 | new_spte &= ~SPTE_HOST_WRITEABLE; | ||
792 | if (is_writeble_pte(*spte)) | ||
793 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | ||
794 | __set_spte(spte, new_spte); | ||
795 | spte = rmap_next(kvm, rmapp, spte); | ||
796 | } | ||
797 | } | ||
798 | if (need_flush) | ||
799 | kvm_flush_remote_tlbs(kvm); | ||
800 | |||
801 | return 0; | ||
802 | } | ||
803 | |||
766 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 804 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, |
767 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | 805 | unsigned long data, |
806 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
807 | unsigned long data)) | ||
768 | { | 808 | { |
769 | int i, j; | 809 | int i, j; |
770 | int retval = 0; | 810 | int retval = 0; |
@@ -786,13 +826,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
786 | if (hva >= start && hva < end) { | 826 | if (hva >= start && hva < end) { |
787 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 827 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
788 | 828 | ||
789 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | 829 | retval |= handler(kvm, &memslot->rmap[gfn_offset], |
830 | data); | ||
790 | 831 | ||
791 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 832 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
792 | int idx = gfn_offset; | 833 | int idx = gfn_offset; |
793 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 834 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
794 | retval |= handler(kvm, | 835 | retval |= handler(kvm, |
795 | &memslot->lpage_info[j][idx].rmap_pde); | 836 | &memslot->lpage_info[j][idx].rmap_pde, |
837 | data); | ||
796 | } | 838 | } |
797 | } | 839 | } |
798 | } | 840 | } |
@@ -802,10 +844,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
802 | 844 | ||
803 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 845 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
804 | { | 846 | { |
805 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | 847 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
806 | } | 848 | } |
807 | 849 | ||
808 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | 850 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
851 | { | ||
852 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); | ||
853 | } | ||
854 | |||
855 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
856 | unsigned long data) | ||
809 | { | 857 | { |
810 | u64 *spte; | 858 | u64 *spte; |
811 | int young = 0; | 859 | int young = 0; |
@@ -841,13 +889,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
841 | gfn = unalias_gfn(vcpu->kvm, gfn); | 889 | gfn = unalias_gfn(vcpu->kvm, gfn); |
842 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 890 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
843 | 891 | ||
844 | kvm_unmap_rmapp(vcpu->kvm, rmapp); | 892 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
845 | kvm_flush_remote_tlbs(vcpu->kvm); | 893 | kvm_flush_remote_tlbs(vcpu->kvm); |
846 | } | 894 | } |
847 | 895 | ||
848 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 896 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
849 | { | 897 | { |
850 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | 898 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
851 | } | 899 | } |
852 | 900 | ||
853 | #ifdef MMU_DEBUG | 901 | #ifdef MMU_DEBUG |
@@ -1756,7 +1804,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1756 | unsigned pte_access, int user_fault, | 1804 | unsigned pte_access, int user_fault, |
1757 | int write_fault, int dirty, int level, | 1805 | int write_fault, int dirty, int level, |
1758 | gfn_t gfn, pfn_t pfn, bool speculative, | 1806 | gfn_t gfn, pfn_t pfn, bool speculative, |
1759 | bool can_unsync) | 1807 | bool can_unsync, bool reset_host_protection) |
1760 | { | 1808 | { |
1761 | u64 spte; | 1809 | u64 spte; |
1762 | int ret = 0; | 1810 | int ret = 0; |
@@ -1783,6 +1831,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1783 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 1831 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1784 | kvm_is_mmio_pfn(pfn)); | 1832 | kvm_is_mmio_pfn(pfn)); |
1785 | 1833 | ||
1834 | if (reset_host_protection) | ||
1835 | spte |= SPTE_HOST_WRITEABLE; | ||
1836 | |||
1786 | spte |= (u64)pfn << PAGE_SHIFT; | 1837 | spte |= (u64)pfn << PAGE_SHIFT; |
1787 | 1838 | ||
1788 | if ((pte_access & ACC_WRITE_MASK) | 1839 | if ((pte_access & ACC_WRITE_MASK) |
@@ -1828,7 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1828 | unsigned pt_access, unsigned pte_access, | 1879 | unsigned pt_access, unsigned pte_access, |
1829 | int user_fault, int write_fault, int dirty, | 1880 | int user_fault, int write_fault, int dirty, |
1830 | int *ptwrite, int level, gfn_t gfn, | 1881 | int *ptwrite, int level, gfn_t gfn, |
1831 | pfn_t pfn, bool speculative) | 1882 | pfn_t pfn, bool speculative, |
1883 | bool reset_host_protection) | ||
1832 | { | 1884 | { |
1833 | int was_rmapped = 0; | 1885 | int was_rmapped = 0; |
1834 | int was_writeble = is_writeble_pte(*sptep); | 1886 | int was_writeble = is_writeble_pte(*sptep); |
@@ -1860,7 +1912,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1860 | } | 1912 | } |
1861 | 1913 | ||
1862 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 1914 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
1863 | dirty, level, gfn, pfn, speculative, true)) { | 1915 | dirty, level, gfn, pfn, speculative, true, |
1916 | reset_host_protection)) { | ||
1864 | if (write_fault) | 1917 | if (write_fault) |
1865 | *ptwrite = 1; | 1918 | *ptwrite = 1; |
1866 | kvm_x86_ops->tlb_flush(vcpu); | 1919 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1877,8 +1930,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1877 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 1930 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
1878 | if (!was_rmapped) { | 1931 | if (!was_rmapped) { |
1879 | rmap_count = rmap_add(vcpu, sptep, gfn); | 1932 | rmap_count = rmap_add(vcpu, sptep, gfn); |
1880 | if (!is_rmap_spte(*sptep)) | 1933 | kvm_release_pfn_clean(pfn); |
1881 | kvm_release_pfn_clean(pfn); | ||
1882 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1934 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1883 | rmap_recycle(vcpu, sptep, gfn); | 1935 | rmap_recycle(vcpu, sptep, gfn); |
1884 | } else { | 1936 | } else { |
@@ -1909,7 +1961,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1909 | if (iterator.level == level) { | 1961 | if (iterator.level == level) { |
1910 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 1962 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
1911 | 0, write, 1, &pt_write, | 1963 | 0, write, 1, &pt_write, |
1912 | level, gfn, pfn, false); | 1964 | level, gfn, pfn, false, true); |
1913 | ++vcpu->stat.pf_fixed; | 1965 | ++vcpu->stat.pf_fixed; |
1914 | break; | 1966 | break; |
1915 | } | 1967 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d2fec9c12d22..72558f8ff3f5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
273 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | 273 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) |
274 | return; | 274 | return; |
275 | kvm_get_pfn(pfn); | 275 | kvm_get_pfn(pfn); |
276 | /* | ||
277 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | ||
278 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | ||
279 | */ | ||
276 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 280 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
277 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, | 281 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, |
278 | gpte_to_gfn(gpte), pfn, true); | 282 | gpte_to_gfn(gpte), pfn, true, true); |
279 | } | 283 | } |
280 | 284 | ||
281 | /* | 285 | /* |
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
308 | user_fault, write_fault, | 312 | user_fault, write_fault, |
309 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | 313 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, |
310 | ptwrite, level, | 314 | ptwrite, level, |
311 | gw->gfn, pfn, false); | 315 | gw->gfn, pfn, false, true); |
312 | break; | 316 | break; |
313 | } | 317 | } |
314 | 318 | ||
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
558 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 562 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
559 | { | 563 | { |
560 | int i, offset, nr_present; | 564 | int i, offset, nr_present; |
565 | bool reset_host_protection; | ||
561 | 566 | ||
562 | offset = nr_present = 0; | 567 | offset = nr_present = 0; |
563 | 568 | ||
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
595 | 600 | ||
596 | nr_present++; | 601 | nr_present++; |
597 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 602 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
603 | if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { | ||
604 | pte_access &= ~ACC_WRITE_MASK; | ||
605 | reset_host_protection = 0; | ||
606 | } else { | ||
607 | reset_host_protection = 1; | ||
608 | } | ||
598 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 609 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
599 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, | 610 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, |
600 | spte_to_pfn(sp->spt[i]), true, false); | 611 | spte_to_pfn(sp->spt[i]), true, false, |
612 | reset_host_protection); | ||
601 | } | 613 | } |
602 | 614 | ||
603 | return !nr_present; | 615 | return !nr_present; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 944cc9c04b3c..c17404add91f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
767 | rdtscll(tsc_this); | 767 | rdtscll(tsc_this); |
768 | delta = vcpu->arch.host_tsc - tsc_this; | 768 | delta = vcpu->arch.host_tsc - tsc_this; |
769 | svm->vmcb->control.tsc_offset += delta; | 769 | svm->vmcb->control.tsc_offset += delta; |
770 | if (is_nested(svm)) | ||
771 | svm->nested.hsave->control.tsc_offset += delta; | ||
770 | vcpu->cpu = cpu; | 772 | vcpu->cpu = cpu; |
771 | kvm_migrate_timers(vcpu); | 773 | kvm_migrate_timers(vcpu); |
772 | svm->asid_generation = 0; | 774 | svm->asid_generation = 0; |
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2057 | 2059 | ||
2058 | switch (ecx) { | 2060 | switch (ecx) { |
2059 | case MSR_IA32_TSC: { | 2061 | case MSR_IA32_TSC: { |
2060 | u64 tsc; | 2062 | u64 tsc_offset; |
2063 | |||
2064 | if (is_nested(svm)) | ||
2065 | tsc_offset = svm->nested.hsave->control.tsc_offset; | ||
2066 | else | ||
2067 | tsc_offset = svm->vmcb->control.tsc_offset; | ||
2061 | 2068 | ||
2062 | rdtscll(tsc); | 2069 | *data = tsc_offset + native_read_tsc(); |
2063 | *data = svm->vmcb->control.tsc_offset + tsc; | ||
2064 | break; | 2070 | break; |
2065 | } | 2071 | } |
2066 | case MSR_K6_STAR: | 2072 | case MSR_K6_STAR: |
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2146 | 2152 | ||
2147 | switch (ecx) { | 2153 | switch (ecx) { |
2148 | case MSR_IA32_TSC: { | 2154 | case MSR_IA32_TSC: { |
2149 | u64 tsc; | 2155 | u64 tsc_offset = data - native_read_tsc(); |
2156 | u64 g_tsc_offset = 0; | ||
2157 | |||
2158 | if (is_nested(svm)) { | ||
2159 | g_tsc_offset = svm->vmcb->control.tsc_offset - | ||
2160 | svm->nested.hsave->control.tsc_offset; | ||
2161 | svm->nested.hsave->control.tsc_offset = tsc_offset; | ||
2162 | } | ||
2163 | |||
2164 | svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset; | ||
2150 | 2165 | ||
2151 | rdtscll(tsc); | ||
2152 | svm->vmcb->control.tsc_offset = data - tsc; | ||
2153 | break; | 2166 | break; |
2154 | } | 2167 | } |
2155 | case MSR_K6_STAR: | 2168 | case MSR_K6_STAR: |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3812014bd0b..ed53b42caba1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
709 | if (vcpu->cpu != cpu) { | 709 | if (vcpu->cpu != cpu) { |
710 | vcpu_clear(vmx); | 710 | vcpu_clear(vmx); |
711 | kvm_migrate_timers(vcpu); | 711 | kvm_migrate_timers(vcpu); |
712 | vpid_sync_vcpu_all(vmx); | 712 | set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); |
713 | local_irq_disable(); | 713 | local_irq_disable(); |
714 | list_add(&vmx->local_vcpus_link, | 714 | list_add(&vmx->local_vcpus_link, |
715 | &per_cpu(vcpus_on_cpu, cpu)); | 715 | &per_cpu(vcpus_on_cpu, cpu)); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be451ee44249..9b9695322f56 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
1591 | 1591 | ||
1592 | if (cpuid->nent < 1) | 1592 | if (cpuid->nent < 1) |
1593 | goto out; | 1593 | goto out; |
1594 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
1595 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
1594 | r = -ENOMEM; | 1596 | r = -ENOMEM; |
1595 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | 1597 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); |
1596 | if (!cpuid_entries) | 1598 | if (!cpuid_entries) |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 3e549b8ec8c9..85f5db95c60f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -15,8 +15,10 @@ ifeq ($(CONFIG_X86_32),y) | |||
15 | obj-y += atomic64_32.o | 15 | obj-y += atomic64_32.o |
16 | lib-y += checksum_32.o | 16 | lib-y += checksum_32.o |
17 | lib-y += strstr_32.o | 17 | lib-y += strstr_32.o |
18 | lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o | 18 | lib-y += semaphore_32.o string_32.o |
19 | 19 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | |
20 | lib-y += cmpxchg8b_emu.o | ||
21 | endif | ||
20 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 22 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
21 | else | 23 | else |
22 | obj-y += io_64.o iomap_copy_64.o | 24 | obj-y += io_64.o iomap_copy_64.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3439616d69f1..23a4d80fb39e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1075,6 +1075,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1075 | * Set up some pagetable state before starting to set any ptes. | 1075 | * Set up some pagetable state before starting to set any ptes. |
1076 | */ | 1076 | */ |
1077 | 1077 | ||
1078 | xen_init_mmu_ops(); | ||
1079 | |||
1078 | /* Prevent unwanted bits from being set in PTEs. */ | 1080 | /* Prevent unwanted bits from being set in PTEs. */ |
1079 | __supported_pte_mask &= ~_PAGE_GLOBAL; | 1081 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1080 | if (!xen_initial_domain()) | 1082 | if (!xen_initial_domain()) |
@@ -1099,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1099 | */ | 1101 | */ |
1100 | xen_setup_stackprotector(); | 1102 | xen_setup_stackprotector(); |
1101 | 1103 | ||
1102 | xen_init_mmu_ops(); | ||
1103 | xen_init_irq_ops(); | 1104 | xen_init_irq_ops(); |
1104 | xen_init_cpuid_mask(); | 1105 | xen_init_cpuid_mask(); |
1105 | 1106 | ||