diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-17 21:40:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-17 21:40:35 -0400 |
commit | ec0afc9311adcfb10b90e547c23250f63939f990 (patch) | |
tree | 2093d2668898a8a03f30acbfd5568e65b8c086b9 /arch | |
parent | 804f18536984939622ddca60ab6b25743e0ec68d (diff) | |
parent | 776e58ea3d3735f85678155398241d2513afa67a (diff) |
Merge branch 'kvm-updates/2.6.39' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.39' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (55 commits)
KVM: unbreak userspace that does not sets tss address
KVM: MMU: cleanup pte write path
KVM: MMU: introduce a common function to get no-dirty-logged slot
KVM: fix rcu usage in init_rmode_* functions
KVM: fix kvmclock regression due to missing clock update
KVM: emulator: Fix permission checking in io permission bitmap
KVM: emulator: Fix io permission checking for 64bit guest
KVM: SVM: Load %gs earlier if CONFIG_X86_32_LAZY_GS=n
KVM: x86: Remove useless regs_page pointer from kvm_lapic
KVM: improve comment on rcu use in irqfd_deassign
KVM: MMU: remove unused macros
KVM: MMU: cleanup page alloc and free
KVM: MMU: do not record gfn in kvm_mmu_pte_write
KVM: MMU: move mmu pages calculated out of mmu lock
KVM: MMU: set spte accessed bit properly
KVM: MMU: fix kvm_mmu_slot_remove_write_access dropping intermediate W bits
KVM: Start lock documentation
KVM: better readability of efer_reserved_bits
KVM: Clear async page fault hash after switching to real mode
KVM: VMX: Initialize vm86 TSS only once.
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/alpha/include/asm/errno.h | 2 | ||||
-rw-r--r-- | arch/ia64/kvm/kvm-ia64.c | 2 | ||||
-rw-r--r-- | arch/mips/include/asm/errno.h | 2 | ||||
-rw-r--r-- | arch/parisc/include/asm/errno.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 14 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 14 | ||||
-rw-r--r-- | arch/sparc/include/asm/errno.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 52 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 25 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 13 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 150 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 17 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 27 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 128 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 153 |
20 files changed, 333 insertions, 291 deletions
diff --git a/arch/alpha/include/asm/errno.h b/arch/alpha/include/asm/errno.h index 98099bda9370..e5f29ca28180 100644 --- a/arch/alpha/include/asm/errno.h +++ b/arch/alpha/include/asm/errno.h | |||
@@ -122,4 +122,6 @@ | |||
122 | 122 | ||
123 | #define ERFKILL 138 /* Operation not possible due to RF-kill */ | 123 | #define ERFKILL 138 /* Operation not possible due to RF-kill */ |
124 | 124 | ||
125 | #define EHWPOISON 139 /* Memory page has hardware error */ | ||
126 | |||
125 | #endif | 127 | #endif |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 70d224d4264c..8213efe1998c 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -662,6 +662,7 @@ again: | |||
662 | goto vcpu_run_fail; | 662 | goto vcpu_run_fail; |
663 | 663 | ||
664 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 664 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
665 | vcpu->mode = IN_GUEST_MODE; | ||
665 | kvm_guest_enter(); | 666 | kvm_guest_enter(); |
666 | 667 | ||
667 | /* | 668 | /* |
@@ -683,6 +684,7 @@ again: | |||
683 | */ | 684 | */ |
684 | barrier(); | 685 | barrier(); |
685 | kvm_guest_exit(); | 686 | kvm_guest_exit(); |
687 | vcpu->mode = OUTSIDE_GUEST_MODE; | ||
686 | preempt_enable(); | 688 | preempt_enable(); |
687 | 689 | ||
688 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 690 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
diff --git a/arch/mips/include/asm/errno.h b/arch/mips/include/asm/errno.h index a0efc73819e4..6dcd3583ed04 100644 --- a/arch/mips/include/asm/errno.h +++ b/arch/mips/include/asm/errno.h | |||
@@ -121,6 +121,8 @@ | |||
121 | 121 | ||
122 | #define ERFKILL 167 /* Operation not possible due to RF-kill */ | 122 | #define ERFKILL 167 /* Operation not possible due to RF-kill */ |
123 | 123 | ||
124 | #define EHWPOISON 168 /* Memory page has hardware error */ | ||
125 | |||
124 | #define EDQUOT 1133 /* Quota exceeded */ | 126 | #define EDQUOT 1133 /* Quota exceeded */ |
125 | 127 | ||
126 | #ifdef __KERNEL__ | 128 | #ifdef __KERNEL__ |
diff --git a/arch/parisc/include/asm/errno.h b/arch/parisc/include/asm/errno.h index 9992abdd782d..135ad6047e51 100644 --- a/arch/parisc/include/asm/errno.h +++ b/arch/parisc/include/asm/errno.h | |||
@@ -122,4 +122,6 @@ | |||
122 | 122 | ||
123 | #define ERFKILL 256 /* Operation not possible due to RF-kill */ | 123 | #define ERFKILL 256 /* Operation not possible due to RF-kill */ |
124 | 124 | ||
125 | #define EHWPOISON 257 /* Memory page has hardware error */ | ||
126 | |||
125 | #endif | 127 | #endif |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index badc983031b3..c961de40c676 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -1141,9 +1141,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
1141 | regs->sprg1 = vcpu->arch.shared->sprg1; | 1141 | regs->sprg1 = vcpu->arch.shared->sprg1; |
1142 | regs->sprg2 = vcpu->arch.shared->sprg2; | 1142 | regs->sprg2 = vcpu->arch.shared->sprg2; |
1143 | regs->sprg3 = vcpu->arch.shared->sprg3; | 1143 | regs->sprg3 = vcpu->arch.shared->sprg3; |
1144 | regs->sprg5 = vcpu->arch.sprg4; | 1144 | regs->sprg4 = vcpu->arch.sprg4; |
1145 | regs->sprg6 = vcpu->arch.sprg5; | 1145 | regs->sprg5 = vcpu->arch.sprg5; |
1146 | regs->sprg7 = vcpu->arch.sprg6; | 1146 | regs->sprg6 = vcpu->arch.sprg6; |
1147 | regs->sprg7 = vcpu->arch.sprg7; | ||
1147 | 1148 | ||
1148 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1149 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
1149 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 1150 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
@@ -1167,9 +1168,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
1167 | vcpu->arch.shared->sprg1 = regs->sprg1; | 1168 | vcpu->arch.shared->sprg1 = regs->sprg1; |
1168 | vcpu->arch.shared->sprg2 = regs->sprg2; | 1169 | vcpu->arch.shared->sprg2 = regs->sprg2; |
1169 | vcpu->arch.shared->sprg3 = regs->sprg3; | 1170 | vcpu->arch.shared->sprg3 = regs->sprg3; |
1170 | vcpu->arch.sprg5 = regs->sprg4; | 1171 | vcpu->arch.sprg4 = regs->sprg4; |
1171 | vcpu->arch.sprg6 = regs->sprg5; | 1172 | vcpu->arch.sprg5 = regs->sprg5; |
1172 | vcpu->arch.sprg7 = regs->sprg6; | 1173 | vcpu->arch.sprg6 = regs->sprg6; |
1174 | vcpu->arch.sprg7 = regs->sprg7; | ||
1173 | 1175 | ||
1174 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1176 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
1175 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 1177 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 77575d08c818..ef76acb455c3 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -546,9 +546,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
546 | regs->sprg1 = vcpu->arch.shared->sprg1; | 546 | regs->sprg1 = vcpu->arch.shared->sprg1; |
547 | regs->sprg2 = vcpu->arch.shared->sprg2; | 547 | regs->sprg2 = vcpu->arch.shared->sprg2; |
548 | regs->sprg3 = vcpu->arch.shared->sprg3; | 548 | regs->sprg3 = vcpu->arch.shared->sprg3; |
549 | regs->sprg5 = vcpu->arch.sprg4; | 549 | regs->sprg4 = vcpu->arch.sprg4; |
550 | regs->sprg6 = vcpu->arch.sprg5; | 550 | regs->sprg5 = vcpu->arch.sprg5; |
551 | regs->sprg7 = vcpu->arch.sprg6; | 551 | regs->sprg6 = vcpu->arch.sprg6; |
552 | regs->sprg7 = vcpu->arch.sprg7; | ||
552 | 553 | ||
553 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 554 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
554 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 555 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
@@ -572,9 +573,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
572 | vcpu->arch.shared->sprg1 = regs->sprg1; | 573 | vcpu->arch.shared->sprg1 = regs->sprg1; |
573 | vcpu->arch.shared->sprg2 = regs->sprg2; | 574 | vcpu->arch.shared->sprg2 = regs->sprg2; |
574 | vcpu->arch.shared->sprg3 = regs->sprg3; | 575 | vcpu->arch.shared->sprg3 = regs->sprg3; |
575 | vcpu->arch.sprg5 = regs->sprg4; | 576 | vcpu->arch.sprg4 = regs->sprg4; |
576 | vcpu->arch.sprg6 = regs->sprg5; | 577 | vcpu->arch.sprg5 = regs->sprg5; |
577 | vcpu->arch.sprg7 = regs->sprg6; | 578 | vcpu->arch.sprg6 = regs->sprg6; |
579 | vcpu->arch.sprg7 = regs->sprg7; | ||
578 | 580 | ||
579 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 581 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
580 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 582 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
diff --git a/arch/sparc/include/asm/errno.h b/arch/sparc/include/asm/errno.h index 4e2bc490d714..c351aba997b7 100644 --- a/arch/sparc/include/asm/errno.h +++ b/arch/sparc/include/asm/errno.h | |||
@@ -112,4 +112,6 @@ | |||
112 | 112 | ||
113 | #define ERFKILL 134 /* Operation not possible due to RF-kill */ | 113 | #define ERFKILL 134 /* Operation not possible due to RF-kill */ |
114 | 114 | ||
115 | #define EHWPOISON 135 /* Memory page has hardware error */ | ||
116 | |||
115 | #endif | 117 | #endif |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 8e37deb1eb38..0f5213564326 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -142,9 +142,9 @@ struct x86_emulate_ops { | |||
142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | 142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, |
143 | unsigned int count, struct kvm_vcpu *vcpu); | 143 | unsigned int count, struct kvm_vcpu *vcpu); |
144 | 144 | ||
145 | bool (*get_cached_descriptor)(struct desc_struct *desc, | 145 | bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, |
146 | int seg, struct kvm_vcpu *vcpu); | 146 | int seg, struct kvm_vcpu *vcpu); |
147 | void (*set_cached_descriptor)(struct desc_struct *desc, | 147 | void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, |
148 | int seg, struct kvm_vcpu *vcpu); | 148 | int seg, struct kvm_vcpu *vcpu); |
149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); |
150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); |
@@ -239,6 +239,7 @@ struct x86_emulate_ctxt { | |||
239 | int interruptibility; | 239 | int interruptibility; |
240 | 240 | ||
241 | bool perm_ok; /* do not check permissions if true */ | 241 | bool perm_ok; /* do not check permissions if true */ |
242 | bool only_vendor_specific_insn; | ||
242 | 243 | ||
243 | bool have_exception; | 244 | bool have_exception; |
244 | struct x86_exception exception; | 245 | struct x86_exception exception; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffd7f8d29187..c8af0991fdf0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -85,7 +85,7 @@ | |||
85 | 85 | ||
86 | #define ASYNC_PF_PER_VCPU 64 | 86 | #define ASYNC_PF_PER_VCPU 64 |
87 | 87 | ||
88 | extern spinlock_t kvm_lock; | 88 | extern raw_spinlock_t kvm_lock; |
89 | extern struct list_head vm_list; | 89 | extern struct list_head vm_list; |
90 | 90 | ||
91 | struct kvm_vcpu; | 91 | struct kvm_vcpu; |
@@ -255,6 +255,8 @@ struct kvm_mmu { | |||
255 | int (*sync_page)(struct kvm_vcpu *vcpu, | 255 | int (*sync_page)(struct kvm_vcpu *vcpu, |
256 | struct kvm_mmu_page *sp); | 256 | struct kvm_mmu_page *sp); |
257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
258 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
259 | u64 *spte, const void *pte, unsigned long mmu_seq); | ||
258 | hpa_t root_hpa; | 260 | hpa_t root_hpa; |
259 | int root_level; | 261 | int root_level; |
260 | int shadow_root_level; | 262 | int shadow_root_level; |
@@ -335,12 +337,6 @@ struct kvm_vcpu_arch { | |||
335 | u64 *last_pte_updated; | 337 | u64 *last_pte_updated; |
336 | gfn_t last_pte_gfn; | 338 | gfn_t last_pte_gfn; |
337 | 339 | ||
338 | struct { | ||
339 | gfn_t gfn; /* presumed gfn during guest pte update */ | ||
340 | pfn_t pfn; /* pfn corresponding to that gfn */ | ||
341 | unsigned long mmu_seq; | ||
342 | } update_pte; | ||
343 | |||
344 | struct fpu guest_fpu; | 340 | struct fpu guest_fpu; |
345 | u64 xcr0; | 341 | u64 xcr0; |
346 | 342 | ||
@@ -448,7 +444,7 @@ struct kvm_arch { | |||
448 | 444 | ||
449 | unsigned long irq_sources_bitmap; | 445 | unsigned long irq_sources_bitmap; |
450 | s64 kvmclock_offset; | 446 | s64 kvmclock_offset; |
451 | spinlock_t tsc_write_lock; | 447 | raw_spinlock_t tsc_write_lock; |
452 | u64 last_tsc_nsec; | 448 | u64 last_tsc_nsec; |
453 | u64 last_tsc_offset; | 449 | u64 last_tsc_offset; |
454 | u64 last_tsc_write; | 450 | u64 last_tsc_write; |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 823d48223400..fd5a1f365c95 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -43,6 +43,7 @@ | |||
43 | 43 | ||
44 | #define MSR_MTRRcap 0x000000fe | 44 | #define MSR_MTRRcap 0x000000fe |
45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | 45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
46 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
46 | 47 | ||
47 | #define MSR_IA32_SYSENTER_CS 0x00000174 | 48 | #define MSR_IA32_SYSENTER_CS 0x00000174 |
48 | #define MSR_IA32_SYSENTER_ESP 0x00000175 | 49 | #define MSR_IA32_SYSENTER_ESP 0x00000175 |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8dc44662394b..33c07b0b122e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -493,7 +493,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
493 | native_smp_prepare_boot_cpu(); | 493 | native_smp_prepare_boot_cpu(); |
494 | } | 494 | } |
495 | 495 | ||
496 | static void kvm_guest_cpu_online(void *dummy) | 496 | static void __cpuinit kvm_guest_cpu_online(void *dummy) |
497 | { | 497 | { |
498 | kvm_guest_cpu_init(); | 498 | kvm_guest_cpu_init(); |
499 | } | 499 | } |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index caf966781d25..0ad47b819a8b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -76,6 +76,7 @@ | |||
76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
78 | /* Misc flags */ | 78 | /* Misc flags */ |
79 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | ||
79 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ | 80 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ |
80 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ | 81 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ |
81 | #define Undefined (1<<25) /* No Such Instruction */ | 82 | #define Undefined (1<<25) /* No Such Instruction */ |
@@ -877,7 +878,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
877 | if (selector & 1 << 2) { | 878 | if (selector & 1 << 2) { |
878 | struct desc_struct desc; | 879 | struct desc_struct desc; |
879 | memset (dt, 0, sizeof *dt); | 880 | memset (dt, 0, sizeof *dt); |
880 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | 881 | if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, |
882 | ctxt->vcpu)) | ||
881 | return; | 883 | return; |
882 | 884 | ||
883 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | 885 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ |
@@ -929,6 +931,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
929 | return ret; | 931 | return ret; |
930 | } | 932 | } |
931 | 933 | ||
934 | /* Does not support long mode */ | ||
932 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 935 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
933 | struct x86_emulate_ops *ops, | 936 | struct x86_emulate_ops *ops, |
934 | u16 selector, int seg) | 937 | u16 selector, int seg) |
@@ -1040,7 +1043,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1040 | } | 1043 | } |
1041 | load: | 1044 | load: |
1042 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | 1045 | ops->set_segment_selector(selector, seg, ctxt->vcpu); |
1043 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | 1046 | ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); |
1044 | return X86EMUL_CONTINUE; | 1047 | return X86EMUL_CONTINUE; |
1045 | exception: | 1048 | exception: |
1046 | emulate_exception(ctxt, err_vec, err_code, true); | 1049 | emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1560,7 +1563,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1560 | struct desc_struct *ss) | 1563 | struct desc_struct *ss) |
1561 | { | 1564 | { |
1562 | memset(cs, 0, sizeof(struct desc_struct)); | 1565 | memset(cs, 0, sizeof(struct desc_struct)); |
1563 | ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); | 1566 | ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); |
1564 | memset(ss, 0, sizeof(struct desc_struct)); | 1567 | memset(ss, 0, sizeof(struct desc_struct)); |
1565 | 1568 | ||
1566 | cs->l = 0; /* will be adjusted later */ | 1569 | cs->l = 0; /* will be adjusted later */ |
@@ -1607,9 +1610,9 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1607 | cs.d = 0; | 1610 | cs.d = 0; |
1608 | cs.l = 1; | 1611 | cs.l = 1; |
1609 | } | 1612 | } |
1610 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1613 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1611 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1614 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1612 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1615 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1613 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1616 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1614 | 1617 | ||
1615 | c->regs[VCPU_REGS_RCX] = c->eip; | 1618 | c->regs[VCPU_REGS_RCX] = c->eip; |
@@ -1679,9 +1682,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1679 | cs.l = 1; | 1682 | cs.l = 1; |
1680 | } | 1683 | } |
1681 | 1684 | ||
1682 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1685 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1683 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1686 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1684 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1687 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1685 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1688 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1686 | 1689 | ||
1687 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 1690 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); |
@@ -1736,9 +1739,9 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1736 | cs_sel |= SELECTOR_RPL_MASK; | 1739 | cs_sel |= SELECTOR_RPL_MASK; |
1737 | ss_sel |= SELECTOR_RPL_MASK; | 1740 | ss_sel |= SELECTOR_RPL_MASK; |
1738 | 1741 | ||
1739 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1742 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1740 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1743 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1741 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1744 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1742 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1745 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1743 | 1746 | ||
1744 | c->eip = c->regs[VCPU_REGS_RDX]; | 1747 | c->eip = c->regs[VCPU_REGS_RDX]; |
@@ -1764,24 +1767,28 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
1764 | u16 port, u16 len) | 1767 | u16 port, u16 len) |
1765 | { | 1768 | { |
1766 | struct desc_struct tr_seg; | 1769 | struct desc_struct tr_seg; |
1770 | u32 base3; | ||
1767 | int r; | 1771 | int r; |
1768 | u16 io_bitmap_ptr; | 1772 | u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; |
1769 | u8 perm, bit_idx = port & 0x7; | ||
1770 | unsigned mask = (1 << len) - 1; | 1773 | unsigned mask = (1 << len) - 1; |
1774 | unsigned long base; | ||
1771 | 1775 | ||
1772 | ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); | 1776 | ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); |
1773 | if (!tr_seg.p) | 1777 | if (!tr_seg.p) |
1774 | return false; | 1778 | return false; |
1775 | if (desc_limit_scaled(&tr_seg) < 103) | 1779 | if (desc_limit_scaled(&tr_seg) < 103) |
1776 | return false; | 1780 | return false; |
1777 | r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, | 1781 | base = get_desc_base(&tr_seg); |
1778 | ctxt->vcpu, NULL); | 1782 | #ifdef CONFIG_X86_64 |
1783 | base |= ((u64)base3) << 32; | ||
1784 | #endif | ||
1785 | r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); | ||
1779 | if (r != X86EMUL_CONTINUE) | 1786 | if (r != X86EMUL_CONTINUE) |
1780 | return false; | 1787 | return false; |
1781 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) | 1788 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
1782 | return false; | 1789 | return false; |
1783 | r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, | 1790 | r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, |
1784 | &perm, 1, ctxt->vcpu, NULL); | 1791 | NULL); |
1785 | if (r != X86EMUL_CONTINUE) | 1792 | if (r != X86EMUL_CONTINUE) |
1786 | return false; | 1793 | return false; |
1787 | if ((perm >> bit_idx) & mask) | 1794 | if ((perm >> bit_idx) & mask) |
@@ -2126,7 +2133,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2126 | } | 2133 | } |
2127 | 2134 | ||
2128 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | 2135 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); |
2129 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | 2136 | ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); |
2130 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | 2137 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); |
2131 | 2138 | ||
2132 | if (has_error_code) { | 2139 | if (has_error_code) { |
@@ -2365,7 +2372,8 @@ static struct group_dual group7 = { { | |||
2365 | D(SrcMem16 | ModRM | Mov | Priv), | 2372 | D(SrcMem16 | ModRM | Mov | Priv), |
2366 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), | 2373 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), |
2367 | }, { | 2374 | }, { |
2368 | D(SrcNone | ModRM | Priv), N, N, D(SrcNone | ModRM | Priv), | 2375 | D(SrcNone | ModRM | Priv | VendorSpecific), N, |
2376 | N, D(SrcNone | ModRM | Priv | VendorSpecific), | ||
2369 | D(SrcNone | ModRM | DstMem | Mov), N, | 2377 | D(SrcNone | ModRM | DstMem | Mov), N, |
2370 | D(SrcMem16 | ModRM | Mov | Priv), N, | 2378 | D(SrcMem16 | ModRM | Mov | Priv), N, |
2371 | } }; | 2379 | } }; |
@@ -2489,7 +2497,7 @@ static struct opcode opcode_table[256] = { | |||
2489 | static struct opcode twobyte_table[256] = { | 2497 | static struct opcode twobyte_table[256] = { |
2490 | /* 0x00 - 0x0F */ | 2498 | /* 0x00 - 0x0F */ |
2491 | N, GD(0, &group7), N, N, | 2499 | N, GD(0, &group7), N, N, |
2492 | N, D(ImplicitOps), D(ImplicitOps | Priv), N, | 2500 | N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, |
2493 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, | 2501 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, |
2494 | N, D(ImplicitOps | ModRM), N, N, | 2502 | N, D(ImplicitOps | ModRM), N, N, |
2495 | /* 0x10 - 0x1F */ | 2503 | /* 0x10 - 0x1F */ |
@@ -2502,7 +2510,8 @@ static struct opcode twobyte_table[256] = { | |||
2502 | /* 0x30 - 0x3F */ | 2510 | /* 0x30 - 0x3F */ |
2503 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), | 2511 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), |
2504 | D(ImplicitOps | Priv), N, | 2512 | D(ImplicitOps | Priv), N, |
2505 | D(ImplicitOps), D(ImplicitOps | Priv), N, N, | 2513 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), |
2514 | N, N, | ||
2506 | N, N, N, N, N, N, N, N, | 2515 | N, N, N, N, N, N, N, N, |
2507 | /* 0x40 - 0x4F */ | 2516 | /* 0x40 - 0x4F */ |
2508 | X16(D(DstReg | SrcMem | ModRM | Mov)), | 2517 | X16(D(DstReg | SrcMem | ModRM | Mov)), |
@@ -2741,6 +2750,9 @@ done_prefixes: | |||
2741 | if (c->d == 0 || (c->d & Undefined)) | 2750 | if (c->d == 0 || (c->d & Undefined)) |
2742 | return -1; | 2751 | return -1; |
2743 | 2752 | ||
2753 | if (!(c->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | ||
2754 | return -1; | ||
2755 | |||
2744 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) | 2756 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) |
2745 | c->op_bytes = 8; | 2757 | c->op_bytes = 8; |
2746 | 2758 | ||
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 3cece05e4ac4..19fe855e7953 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -62,9 +62,6 @@ static void pic_unlock(struct kvm_pic *s) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | if (!found) | 64 | if (!found) |
65 | found = s->kvm->bsp_vcpu; | ||
66 | |||
67 | if (!found) | ||
68 | return; | 65 | return; |
69 | 66 | ||
70 | kvm_make_request(KVM_REQ_EVENT, found); | 67 | kvm_make_request(KVM_REQ_EVENT, found); |
@@ -75,7 +72,6 @@ static void pic_unlock(struct kvm_pic *s) | |||
75 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 72 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
76 | { | 73 | { |
77 | s->isr &= ~(1 << irq); | 74 | s->isr &= ~(1 << irq); |
78 | s->isr_ack |= (1 << irq); | ||
79 | if (s != &s->pics_state->pics[0]) | 75 | if (s != &s->pics_state->pics[0]) |
80 | irq += 8; | 76 | irq += 8; |
81 | /* | 77 | /* |
@@ -89,16 +85,6 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
89 | pic_lock(s->pics_state); | 85 | pic_lock(s->pics_state); |
90 | } | 86 | } |
91 | 87 | ||
92 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | ||
93 | { | ||
94 | struct kvm_pic *s = pic_irqchip(kvm); | ||
95 | |||
96 | pic_lock(s); | ||
97 | s->pics[0].isr_ack = 0xff; | ||
98 | s->pics[1].isr_ack = 0xff; | ||
99 | pic_unlock(s); | ||
100 | } | ||
101 | |||
102 | /* | 88 | /* |
103 | * set irq level. If an edge is detected, then the IRR is set to 1 | 89 | * set irq level. If an edge is detected, then the IRR is set to 1 |
104 | */ | 90 | */ |
@@ -281,7 +267,6 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
281 | s->irr = 0; | 267 | s->irr = 0; |
282 | s->imr = 0; | 268 | s->imr = 0; |
283 | s->isr = 0; | 269 | s->isr = 0; |
284 | s->isr_ack = 0xff; | ||
285 | s->priority_add = 0; | 270 | s->priority_add = 0; |
286 | s->irq_base = 0; | 271 | s->irq_base = 0; |
287 | s->read_reg_select = 0; | 272 | s->read_reg_select = 0; |
@@ -545,15 +530,11 @@ static int picdev_read(struct kvm_io_device *this, | |||
545 | */ | 530 | */ |
546 | static void pic_irq_request(struct kvm *kvm, int level) | 531 | static void pic_irq_request(struct kvm *kvm, int level) |
547 | { | 532 | { |
548 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; | ||
549 | struct kvm_pic *s = pic_irqchip(kvm); | 533 | struct kvm_pic *s = pic_irqchip(kvm); |
550 | int irq = pic_get_irq(&s->pics[0]); | ||
551 | 534 | ||
552 | s->output = level; | 535 | if (!s->output) |
553 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | ||
554 | s->pics[0].isr_ack &= ~(1 << irq); | ||
555 | s->wakeup_needed = true; | 536 | s->wakeup_needed = true; |
556 | } | 537 | s->output = level; |
557 | } | 538 | } |
558 | 539 | ||
559 | static const struct kvm_io_device_ops picdev_ops = { | 540 | static const struct kvm_io_device_ops picdev_ops = { |
@@ -575,8 +556,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
575 | s->pics[1].elcr_mask = 0xde; | 556 | s->pics[1].elcr_mask = 0xde; |
576 | s->pics[0].pics_state = s; | 557 | s->pics[0].pics_state = s; |
577 | s->pics[1].pics_state = s; | 558 | s->pics[1].pics_state = s; |
578 | s->pics[0].isr_ack = 0xff; | ||
579 | s->pics[1].isr_ack = 0xff; | ||
580 | 559 | ||
581 | /* | 560 | /* |
582 | * Initialize PIO device | 561 | * Initialize PIO device |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 93cf9d0d3653..2b2255b1f04b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -417,10 +417,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
417 | case APIC_DM_INIT: | 417 | case APIC_DM_INIT: |
418 | if (level) { | 418 | if (level) { |
419 | result = 1; | 419 | result = 1; |
420 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | ||
421 | printk(KERN_DEBUG | ||
422 | "INIT on a runnable vcpu %d\n", | ||
423 | vcpu->vcpu_id); | ||
424 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 420 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
425 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 421 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
426 | kvm_vcpu_kick(vcpu); | 422 | kvm_vcpu_kick(vcpu); |
@@ -875,8 +871,8 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
875 | 871 | ||
876 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); | 872 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); |
877 | 873 | ||
878 | if (vcpu->arch.apic->regs_page) | 874 | if (vcpu->arch.apic->regs) |
879 | __free_page(vcpu->arch.apic->regs_page); | 875 | free_page((unsigned long)vcpu->arch.apic->regs); |
880 | 876 | ||
881 | kfree(vcpu->arch.apic); | 877 | kfree(vcpu->arch.apic); |
882 | } | 878 | } |
@@ -1065,13 +1061,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
1065 | 1061 | ||
1066 | vcpu->arch.apic = apic; | 1062 | vcpu->arch.apic = apic; |
1067 | 1063 | ||
1068 | apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO); | 1064 | apic->regs = (void *)get_zeroed_page(GFP_KERNEL); |
1069 | if (apic->regs_page == NULL) { | 1065 | if (!apic->regs) { |
1070 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", | 1066 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", |
1071 | vcpu->vcpu_id); | 1067 | vcpu->vcpu_id); |
1072 | goto nomem_free_apic; | 1068 | goto nomem_free_apic; |
1073 | } | 1069 | } |
1074 | apic->regs = page_address(apic->regs_page); | ||
1075 | apic->vcpu = vcpu; | 1070 | apic->vcpu = vcpu; |
1076 | 1071 | ||
1077 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | 1072 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f5fe32c5edad..52c9e6b9e725 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -13,7 +13,6 @@ struct kvm_lapic { | |||
13 | u32 divide_count; | 13 | u32 divide_count; |
14 | struct kvm_vcpu *vcpu; | 14 | struct kvm_vcpu *vcpu; |
15 | bool irr_pending; | 15 | bool irr_pending; |
16 | struct page *regs_page; | ||
17 | void *regs; | 16 | void *regs; |
18 | gpa_t vapic_addr; | 17 | gpa_t vapic_addr; |
19 | struct page *vapic_page; | 18 | struct page *vapic_page; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f02b8edc3d44..22fae7593ee7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -111,9 +111,6 @@ module_param(oos_shadow, bool, 0644); | |||
111 | #define PT64_LEVEL_SHIFT(level) \ | 111 | #define PT64_LEVEL_SHIFT(level) \ |
112 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) | 112 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) |
113 | 113 | ||
114 | #define PT64_LEVEL_MASK(level) \ | ||
115 | (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) | ||
116 | |||
117 | #define PT64_INDEX(address, level)\ | 114 | #define PT64_INDEX(address, level)\ |
118 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) | 115 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) |
119 | 116 | ||
@@ -123,8 +120,6 @@ module_param(oos_shadow, bool, 0644); | |||
123 | #define PT32_LEVEL_SHIFT(level) \ | 120 | #define PT32_LEVEL_SHIFT(level) \ |
124 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) | 121 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) |
125 | 122 | ||
126 | #define PT32_LEVEL_MASK(level) \ | ||
127 | (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) | ||
128 | #define PT32_LVL_OFFSET_MASK(level) \ | 123 | #define PT32_LVL_OFFSET_MASK(level) \ |
129 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 124 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
130 | * PT32_LEVEL_BITS))) - 1)) | 125 | * PT32_LEVEL_BITS))) - 1)) |
@@ -379,15 +374,15 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, | |||
379 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 374 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
380 | int min) | 375 | int min) |
381 | { | 376 | { |
382 | struct page *page; | 377 | void *page; |
383 | 378 | ||
384 | if (cache->nobjs >= min) | 379 | if (cache->nobjs >= min) |
385 | return 0; | 380 | return 0; |
386 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 381 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
387 | page = alloc_page(GFP_KERNEL); | 382 | page = (void *)__get_free_page(GFP_KERNEL); |
388 | if (!page) | 383 | if (!page) |
389 | return -ENOMEM; | 384 | return -ENOMEM; |
390 | cache->objects[cache->nobjs++] = page_address(page); | 385 | cache->objects[cache->nobjs++] = page; |
391 | } | 386 | } |
392 | return 0; | 387 | return 0; |
393 | } | 388 | } |
@@ -554,13 +549,23 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
554 | return ret; | 549 | return ret; |
555 | } | 550 | } |
556 | 551 | ||
557 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 552 | static struct kvm_memory_slot * |
553 | gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
554 | bool no_dirty_log) | ||
558 | { | 555 | { |
559 | struct kvm_memory_slot *slot; | 556 | struct kvm_memory_slot *slot; |
560 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 557 | |
561 | if (slot && slot->dirty_bitmap) | 558 | slot = gfn_to_memslot(vcpu->kvm, gfn); |
562 | return true; | 559 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || |
563 | return false; | 560 | (no_dirty_log && slot->dirty_bitmap)) |
561 | slot = NULL; | ||
562 | |||
563 | return slot; | ||
564 | } | ||
565 | |||
566 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
567 | { | ||
568 | return gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); | ||
564 | } | 569 | } |
565 | 570 | ||
566 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 571 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
@@ -1032,9 +1037,9 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1032 | ASSERT(is_empty_shadow_page(sp->spt)); | 1037 | ASSERT(is_empty_shadow_page(sp->spt)); |
1033 | hlist_del(&sp->hash_link); | 1038 | hlist_del(&sp->hash_link); |
1034 | list_del(&sp->link); | 1039 | list_del(&sp->link); |
1035 | __free_page(virt_to_page(sp->spt)); | 1040 | free_page((unsigned long)sp->spt); |
1036 | if (!sp->role.direct) | 1041 | if (!sp->role.direct) |
1037 | __free_page(virt_to_page(sp->gfns)); | 1042 | free_page((unsigned long)sp->gfns); |
1038 | kmem_cache_free(mmu_page_header_cache, sp); | 1043 | kmem_cache_free(mmu_page_header_cache, sp); |
1039 | kvm_mod_used_mmu_pages(kvm, -1); | 1044 | kvm_mod_used_mmu_pages(kvm, -1); |
1040 | } | 1045 | } |
@@ -1199,6 +1204,13 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
1199 | { | 1204 | { |
1200 | } | 1205 | } |
1201 | 1206 | ||
1207 | static void nonpaging_update_pte(struct kvm_vcpu *vcpu, | ||
1208 | struct kvm_mmu_page *sp, u64 *spte, | ||
1209 | const void *pte, unsigned long mmu_seq) | ||
1210 | { | ||
1211 | WARN_ON(1); | ||
1212 | } | ||
1213 | |||
1202 | #define KVM_PAGE_ARRAY_NR 16 | 1214 | #define KVM_PAGE_ARRAY_NR 16 |
1203 | 1215 | ||
1204 | struct kvm_mmu_pages { | 1216 | struct kvm_mmu_pages { |
@@ -2150,26 +2162,13 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2150 | { | 2162 | { |
2151 | } | 2163 | } |
2152 | 2164 | ||
2153 | static struct kvm_memory_slot * | ||
2154 | pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) | ||
2155 | { | ||
2156 | struct kvm_memory_slot *slot; | ||
2157 | |||
2158 | slot = gfn_to_memslot(vcpu->kvm, gfn); | ||
2159 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | ||
2160 | (no_dirty_log && slot->dirty_bitmap)) | ||
2161 | slot = NULL; | ||
2162 | |||
2163 | return slot; | ||
2164 | } | ||
2165 | |||
2166 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2165 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
2167 | bool no_dirty_log) | 2166 | bool no_dirty_log) |
2168 | { | 2167 | { |
2169 | struct kvm_memory_slot *slot; | 2168 | struct kvm_memory_slot *slot; |
2170 | unsigned long hva; | 2169 | unsigned long hva; |
2171 | 2170 | ||
2172 | slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log); | 2171 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); |
2173 | if (!slot) { | 2172 | if (!slot) { |
2174 | get_page(bad_page); | 2173 | get_page(bad_page); |
2175 | return page_to_pfn(bad_page); | 2174 | return page_to_pfn(bad_page); |
@@ -2190,7 +2189,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
2190 | gfn_t gfn; | 2189 | gfn_t gfn; |
2191 | 2190 | ||
2192 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); | 2191 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); |
2193 | if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK)) | 2192 | if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK)) |
2194 | return -1; | 2193 | return -1; |
2195 | 2194 | ||
2196 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); | 2195 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); |
@@ -2804,6 +2803,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu, | |||
2804 | context->prefetch_page = nonpaging_prefetch_page; | 2803 | context->prefetch_page = nonpaging_prefetch_page; |
2805 | context->sync_page = nonpaging_sync_page; | 2804 | context->sync_page = nonpaging_sync_page; |
2806 | context->invlpg = nonpaging_invlpg; | 2805 | context->invlpg = nonpaging_invlpg; |
2806 | context->update_pte = nonpaging_update_pte; | ||
2807 | context->root_level = 0; | 2807 | context->root_level = 0; |
2808 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2808 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2809 | context->root_hpa = INVALID_PAGE; | 2809 | context->root_hpa = INVALID_PAGE; |
@@ -2933,6 +2933,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
2933 | context->prefetch_page = paging64_prefetch_page; | 2933 | context->prefetch_page = paging64_prefetch_page; |
2934 | context->sync_page = paging64_sync_page; | 2934 | context->sync_page = paging64_sync_page; |
2935 | context->invlpg = paging64_invlpg; | 2935 | context->invlpg = paging64_invlpg; |
2936 | context->update_pte = paging64_update_pte; | ||
2936 | context->free = paging_free; | 2937 | context->free = paging_free; |
2937 | context->root_level = level; | 2938 | context->root_level = level; |
2938 | context->shadow_root_level = level; | 2939 | context->shadow_root_level = level; |
@@ -2961,6 +2962,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
2961 | context->prefetch_page = paging32_prefetch_page; | 2962 | context->prefetch_page = paging32_prefetch_page; |
2962 | context->sync_page = paging32_sync_page; | 2963 | context->sync_page = paging32_sync_page; |
2963 | context->invlpg = paging32_invlpg; | 2964 | context->invlpg = paging32_invlpg; |
2965 | context->update_pte = paging32_update_pte; | ||
2964 | context->root_level = PT32_ROOT_LEVEL; | 2966 | context->root_level = PT32_ROOT_LEVEL; |
2965 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2967 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2966 | context->root_hpa = INVALID_PAGE; | 2968 | context->root_hpa = INVALID_PAGE; |
@@ -2985,6 +2987,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2985 | context->prefetch_page = nonpaging_prefetch_page; | 2987 | context->prefetch_page = nonpaging_prefetch_page; |
2986 | context->sync_page = nonpaging_sync_page; | 2988 | context->sync_page = nonpaging_sync_page; |
2987 | context->invlpg = nonpaging_invlpg; | 2989 | context->invlpg = nonpaging_invlpg; |
2990 | context->update_pte = nonpaging_update_pte; | ||
2988 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 2991 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
2989 | context->root_hpa = INVALID_PAGE; | 2992 | context->root_hpa = INVALID_PAGE; |
2990 | context->direct_map = true; | 2993 | context->direct_map = true; |
@@ -3089,8 +3092,6 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3089 | 3092 | ||
3090 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 3093 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) |
3091 | { | 3094 | { |
3092 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
3093 | |||
3094 | if (mmu_is_nested(vcpu)) | 3095 | if (mmu_is_nested(vcpu)) |
3095 | return init_kvm_nested_mmu(vcpu); | 3096 | return init_kvm_nested_mmu(vcpu); |
3096 | else if (tdp_enabled) | 3097 | else if (tdp_enabled) |
@@ -3164,7 +3165,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
3164 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
3165 | struct kvm_mmu_page *sp, | 3166 | struct kvm_mmu_page *sp, |
3166 | u64 *spte, | 3167 | u64 *spte, |
3167 | const void *new) | 3168 | const void *new, unsigned long mmu_seq) |
3168 | { | 3169 | { |
3169 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 3170 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
3170 | ++vcpu->kvm->stat.mmu_pde_zapped; | 3171 | ++vcpu->kvm->stat.mmu_pde_zapped; |
@@ -3172,10 +3173,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3172 | } | 3173 | } |
3173 | 3174 | ||
3174 | ++vcpu->kvm->stat.mmu_pte_updated; | 3175 | ++vcpu->kvm->stat.mmu_pte_updated; |
3175 | if (!sp->role.cr4_pae) | 3176 | vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); |
3176 | paging32_update_pte(vcpu, sp, spte, new); | ||
3177 | else | ||
3178 | paging64_update_pte(vcpu, sp, spte, new); | ||
3179 | } | 3177 | } |
3180 | 3178 | ||
3181 | static bool need_remote_flush(u64 old, u64 new) | 3179 | static bool need_remote_flush(u64 old, u64 new) |
@@ -3210,28 +3208,6 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
3210 | return !!(spte && (*spte & shadow_accessed_mask)); | 3208 | return !!(spte && (*spte & shadow_accessed_mask)); |
3211 | } | 3209 | } |
3212 | 3210 | ||
3213 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
3214 | u64 gpte) | ||
3215 | { | ||
3216 | gfn_t gfn; | ||
3217 | pfn_t pfn; | ||
3218 | |||
3219 | if (!is_present_gpte(gpte)) | ||
3220 | return; | ||
3221 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | ||
3222 | |||
3223 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
3224 | smp_rmb(); | ||
3225 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
3226 | |||
3227 | if (is_error_pfn(pfn)) { | ||
3228 | kvm_release_pfn_clean(pfn); | ||
3229 | return; | ||
3230 | } | ||
3231 | vcpu->arch.update_pte.gfn = gfn; | ||
3232 | vcpu->arch.update_pte.pfn = pfn; | ||
3233 | } | ||
3234 | |||
3235 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 3211 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) |
3236 | { | 3212 | { |
3237 | u64 *spte = vcpu->arch.last_pte_updated; | 3213 | u64 *spte = vcpu->arch.last_pte_updated; |
@@ -3253,21 +3229,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3253 | struct kvm_mmu_page *sp; | 3229 | struct kvm_mmu_page *sp; |
3254 | struct hlist_node *node; | 3230 | struct hlist_node *node; |
3255 | LIST_HEAD(invalid_list); | 3231 | LIST_HEAD(invalid_list); |
3256 | u64 entry, gentry; | 3232 | unsigned long mmu_seq; |
3257 | u64 *spte; | 3233 | u64 entry, gentry, *spte; |
3258 | unsigned offset = offset_in_page(gpa); | 3234 | unsigned pte_size, page_offset, misaligned, quadrant, offset; |
3259 | unsigned pte_size; | 3235 | int level, npte, invlpg_counter, r, flooded = 0; |
3260 | unsigned page_offset; | ||
3261 | unsigned misaligned; | ||
3262 | unsigned quadrant; | ||
3263 | int level; | ||
3264 | int flooded = 0; | ||
3265 | int npte; | ||
3266 | int r; | ||
3267 | int invlpg_counter; | ||
3268 | bool remote_flush, local_flush, zap_page; | 3236 | bool remote_flush, local_flush, zap_page; |
3269 | 3237 | ||
3270 | zap_page = remote_flush = local_flush = false; | 3238 | zap_page = remote_flush = local_flush = false; |
3239 | offset = offset_in_page(gpa); | ||
3271 | 3240 | ||
3272 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 3241 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
3273 | 3242 | ||
@@ -3275,9 +3244,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3275 | 3244 | ||
3276 | /* | 3245 | /* |
3277 | * Assume that the pte write on a page table of the same type | 3246 | * Assume that the pte write on a page table of the same type |
3278 | * as the current vcpu paging mode. This is nearly always true | 3247 | * as the current vcpu paging mode since we update the sptes only |
3279 | * (might be false while changing modes). Note it is verified later | 3248 | * when they have the same mode. |
3280 | * by update_pte(). | ||
3281 | */ | 3249 | */ |
3282 | if ((is_pae(vcpu) && bytes == 4) || !new) { | 3250 | if ((is_pae(vcpu) && bytes == 4) || !new) { |
3283 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 3251 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
@@ -3303,15 +3271,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3303 | break; | 3271 | break; |
3304 | } | 3272 | } |
3305 | 3273 | ||
3306 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | 3274 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
3275 | smp_rmb(); | ||
3276 | |||
3307 | spin_lock(&vcpu->kvm->mmu_lock); | 3277 | spin_lock(&vcpu->kvm->mmu_lock); |
3308 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | 3278 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) |
3309 | gentry = 0; | 3279 | gentry = 0; |
3310 | kvm_mmu_access_page(vcpu, gfn); | ||
3311 | kvm_mmu_free_some_pages(vcpu); | 3280 | kvm_mmu_free_some_pages(vcpu); |
3312 | ++vcpu->kvm->stat.mmu_pte_write; | 3281 | ++vcpu->kvm->stat.mmu_pte_write; |
3313 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3282 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
3314 | if (guest_initiated) { | 3283 | if (guest_initiated) { |
3284 | kvm_mmu_access_page(vcpu, gfn); | ||
3315 | if (gfn == vcpu->arch.last_pt_write_gfn | 3285 | if (gfn == vcpu->arch.last_pt_write_gfn |
3316 | && !last_updated_pte_accessed(vcpu)) { | 3286 | && !last_updated_pte_accessed(vcpu)) { |
3317 | ++vcpu->arch.last_pt_write_count; | 3287 | ++vcpu->arch.last_pt_write_count; |
@@ -3375,7 +3345,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3375 | if (gentry && | 3345 | if (gentry && |
3376 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3346 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3377 | & mask.word)) | 3347 | & mask.word)) |
3378 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3348 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, |
3349 | mmu_seq); | ||
3379 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3350 | if (!remote_flush && need_remote_flush(entry, *spte)) |
3380 | remote_flush = true; | 3351 | remote_flush = true; |
3381 | ++spte; | 3352 | ++spte; |
@@ -3385,10 +3356,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3385 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3356 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3386 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); | 3357 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
3387 | spin_unlock(&vcpu->kvm->mmu_lock); | 3358 | spin_unlock(&vcpu->kvm->mmu_lock); |
3388 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | ||
3389 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); | ||
3390 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
3391 | } | ||
3392 | } | 3359 | } |
3393 | 3360 | ||
3394 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 3361 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -3538,14 +3505,23 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3538 | if (!test_bit(slot, sp->slot_bitmap)) | 3505 | if (!test_bit(slot, sp->slot_bitmap)) |
3539 | continue; | 3506 | continue; |
3540 | 3507 | ||
3541 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | ||
3542 | continue; | ||
3543 | |||
3544 | pt = sp->spt; | 3508 | pt = sp->spt; |
3545 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3509 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
3510 | if (!is_shadow_present_pte(pt[i]) || | ||
3511 | !is_last_spte(pt[i], sp->role.level)) | ||
3512 | continue; | ||
3513 | |||
3514 | if (is_large_pte(pt[i])) { | ||
3515 | drop_spte(kvm, &pt[i], | ||
3516 | shadow_trap_nonpresent_pte); | ||
3517 | --kvm->stat.lpages; | ||
3518 | continue; | ||
3519 | } | ||
3520 | |||
3546 | /* avoid RMW */ | 3521 | /* avoid RMW */ |
3547 | if (is_writable_pte(pt[i])) | 3522 | if (is_writable_pte(pt[i])) |
3548 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); | 3523 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); |
3524 | } | ||
3549 | } | 3525 | } |
3550 | kvm_flush_remote_tlbs(kvm); | 3526 | kvm_flush_remote_tlbs(kvm); |
3551 | } | 3527 | } |
@@ -3583,7 +3559,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3583 | if (nr_to_scan == 0) | 3559 | if (nr_to_scan == 0) |
3584 | goto out; | 3560 | goto out; |
3585 | 3561 | ||
3586 | spin_lock(&kvm_lock); | 3562 | raw_spin_lock(&kvm_lock); |
3587 | 3563 | ||
3588 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3564 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3589 | int idx, freed_pages; | 3565 | int idx, freed_pages; |
@@ -3606,7 +3582,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3606 | if (kvm_freed) | 3582 | if (kvm_freed) |
3607 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 3583 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
3608 | 3584 | ||
3609 | spin_unlock(&kvm_lock); | 3585 | raw_spin_unlock(&kvm_lock); |
3610 | 3586 | ||
3611 | out: | 3587 | out: |
3612 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); | 3588 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bccc24c4181..751405097d8c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) | 31 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) |
32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
34 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | ||
35 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
36 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
37 | #define PT_MAX_FULL_LEVELS 4 | 36 | #define PT_MAX_FULL_LEVELS 4 |
@@ -48,7 +47,6 @@ | |||
48 | #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl) | 47 | #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl) |
49 | #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl) | 48 | #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl) |
50 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
51 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | ||
52 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 50 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
53 | #define PT_MAX_FULL_LEVELS 2 | 51 | #define PT_MAX_FULL_LEVELS 2 |
54 | #define CMPXCHG cmpxchg | 52 | #define CMPXCHG cmpxchg |
@@ -327,7 +325,7 @@ no_present: | |||
327 | } | 325 | } |
328 | 326 | ||
329 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 327 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
330 | u64 *spte, const void *pte) | 328 | u64 *spte, const void *pte, unsigned long mmu_seq) |
331 | { | 329 | { |
332 | pt_element_t gpte; | 330 | pt_element_t gpte; |
333 | unsigned pte_access; | 331 | unsigned pte_access; |
@@ -339,14 +337,14 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
339 | 337 | ||
340 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 338 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
341 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 339 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
342 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 340 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); |
341 | if (is_error_pfn(pfn)) { | ||
342 | kvm_release_pfn_clean(pfn); | ||
343 | return; | 343 | return; |
344 | pfn = vcpu->arch.update_pte.pfn; | 344 | } |
345 | if (is_error_pfn(pfn)) | 345 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
346 | return; | ||
347 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | ||
348 | return; | 346 | return; |
349 | kvm_get_pfn(pfn); | 347 | |
350 | /* | 348 | /* |
351 | * we call mmu_set_spte() with host_writable = true beacuse that | 349 | * we call mmu_set_spte() with host_writable = true beacuse that |
352 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 350 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
@@ -829,7 +827,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
829 | #undef FNAME | 827 | #undef FNAME |
830 | #undef PT_BASE_ADDR_MASK | 828 | #undef PT_BASE_ADDR_MASK |
831 | #undef PT_INDEX | 829 | #undef PT_INDEX |
832 | #undef PT_LEVEL_MASK | ||
833 | #undef PT_LVL_ADDR_MASK | 830 | #undef PT_LVL_ADDR_MASK |
834 | #undef PT_LVL_OFFSET_MASK | 831 | #undef PT_LVL_OFFSET_MASK |
835 | #undef PT_LEVEL_BITS | 832 | #undef PT_LEVEL_BITS |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 63fec1531e89..6bb15d583e47 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -135,6 +135,8 @@ struct vcpu_svm { | |||
135 | 135 | ||
136 | u32 *msrpm; | 136 | u32 *msrpm; |
137 | 137 | ||
138 | ulong nmi_iret_rip; | ||
139 | |||
138 | struct nested_state nested; | 140 | struct nested_state nested; |
139 | 141 | ||
140 | bool nmi_singlestep; | 142 | bool nmi_singlestep; |
@@ -1153,8 +1155,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1153 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | 1155 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); |
1154 | load_gs_index(svm->host.gs); | 1156 | load_gs_index(svm->host.gs); |
1155 | #else | 1157 | #else |
1158 | #ifdef CONFIG_X86_32_LAZY_GS | ||
1156 | loadsegment(gs, svm->host.gs); | 1159 | loadsegment(gs, svm->host.gs); |
1157 | #endif | 1160 | #endif |
1161 | #endif | ||
1158 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1162 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1159 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1163 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1160 | } | 1164 | } |
@@ -2653,6 +2657,7 @@ static int iret_interception(struct vcpu_svm *svm) | |||
2653 | ++svm->vcpu.stat.nmi_window_exits; | 2657 | ++svm->vcpu.stat.nmi_window_exits; |
2654 | clr_intercept(svm, INTERCEPT_IRET); | 2658 | clr_intercept(svm, INTERCEPT_IRET); |
2655 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2659 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2660 | svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); | ||
2656 | return 1; | 2661 | return 1; |
2657 | } | 2662 | } |
2658 | 2663 | ||
@@ -3474,7 +3479,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
3474 | 3479 | ||
3475 | svm->int3_injected = 0; | 3480 | svm->int3_injected = 0; |
3476 | 3481 | ||
3477 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) { | 3482 | /* |
3483 | * If we've made progress since setting HF_IRET_MASK, we've | ||
3484 | * executed an IRET and can allow NMI injection. | ||
3485 | */ | ||
3486 | if ((svm->vcpu.arch.hflags & HF_IRET_MASK) | ||
3487 | && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { | ||
3478 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 3488 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
3479 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3489 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
3480 | } | 3490 | } |
@@ -3641,19 +3651,30 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3641 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); | 3651 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); |
3642 | #else | 3652 | #else |
3643 | loadsegment(fs, svm->host.fs); | 3653 | loadsegment(fs, svm->host.fs); |
3654 | #ifndef CONFIG_X86_32_LAZY_GS | ||
3655 | loadsegment(gs, svm->host.gs); | ||
3656 | #endif | ||
3644 | #endif | 3657 | #endif |
3645 | 3658 | ||
3646 | reload_tss(vcpu); | 3659 | reload_tss(vcpu); |
3647 | 3660 | ||
3648 | local_irq_disable(); | 3661 | local_irq_disable(); |
3649 | 3662 | ||
3650 | stgi(); | ||
3651 | |||
3652 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | 3663 | vcpu->arch.cr2 = svm->vmcb->save.cr2; |
3653 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | 3664 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; |
3654 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | 3665 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; |
3655 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3666 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
3656 | 3667 | ||
3668 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | ||
3669 | kvm_before_handle_nmi(&svm->vcpu); | ||
3670 | |||
3671 | stgi(); | ||
3672 | |||
3673 | /* Any pending NMI will happen here */ | ||
3674 | |||
3675 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | ||
3676 | kvm_after_handle_nmi(&svm->vcpu); | ||
3677 | |||
3657 | sync_cr8_to_lapic(vcpu); | 3678 | sync_cr8_to_lapic(vcpu); |
3658 | 3679 | ||
3659 | svm->next_rip = 0; | 3680 | svm->next_rip = 0; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bf89ec2cfb82..5b4cdcbd154c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -93,14 +93,14 @@ module_param(yield_on_hlt, bool, S_IRUGO); | |||
93 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 93 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
94 | * ple_gap: upper bound on the amount of time between two successive | 94 | * ple_gap: upper bound on the amount of time between two successive |
95 | * executions of PAUSE in a loop. Also indicate if ple enabled. | 95 | * executions of PAUSE in a loop. Also indicate if ple enabled. |
96 | * According to test, this time is usually small than 41 cycles. | 96 | * According to test, this time is usually smaller than 128 cycles. |
97 | * ple_window: upper bound on the amount of time a guest is allowed to execute | 97 | * ple_window: upper bound on the amount of time a guest is allowed to execute |
98 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | 98 | * in a PAUSE loop. Tests indicate that most spinlocks are held for |
99 | * less than 2^12 cycles | 99 | * less than 2^12 cycles |
100 | * Time is measured based on a counter that runs at the same rate as the TSC, | 100 | * Time is measured based on a counter that runs at the same rate as the TSC, |
101 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | 101 | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
102 | */ | 102 | */ |
103 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | 103 | #define KVM_VMX_DEFAULT_PLE_GAP 128 |
104 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | 104 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 |
105 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | 105 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; |
106 | module_param(ple_gap, int, S_IRUGO); | 106 | module_param(ple_gap, int, S_IRUGO); |
@@ -176,11 +176,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
176 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 176 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
177 | } | 177 | } |
178 | 178 | ||
179 | static int init_rmode(struct kvm *kvm); | ||
180 | static u64 construct_eptp(unsigned long root_hpa); | 179 | static u64 construct_eptp(unsigned long root_hpa); |
181 | static void kvm_cpu_vmxon(u64 addr); | 180 | static void kvm_cpu_vmxon(u64 addr); |
182 | static void kvm_cpu_vmxoff(void); | 181 | static void kvm_cpu_vmxoff(void); |
183 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 182 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
183 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | ||
184 | 184 | ||
185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -1333,19 +1333,25 @@ static __init int vmx_disabled_by_bios(void) | |||
1333 | 1333 | ||
1334 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1334 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
1335 | if (msr & FEATURE_CONTROL_LOCKED) { | 1335 | if (msr & FEATURE_CONTROL_LOCKED) { |
1336 | /* launched w/ TXT and VMX disabled */ | ||
1336 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | 1337 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
1337 | && tboot_enabled()) | 1338 | && tboot_enabled()) |
1338 | return 1; | 1339 | return 1; |
1340 | /* launched w/o TXT and VMX only enabled w/ TXT */ | ||
1339 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | 1341 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) |
1342 | && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | ||
1340 | && !tboot_enabled()) { | 1343 | && !tboot_enabled()) { |
1341 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " | 1344 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " |
1342 | " activate TXT before enabling KVM\n"); | 1345 | "activate TXT before enabling KVM\n"); |
1343 | return 1; | 1346 | return 1; |
1344 | } | 1347 | } |
1348 | /* launched w/o TXT and VMX disabled */ | ||
1349 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
1350 | && !tboot_enabled()) | ||
1351 | return 1; | ||
1345 | } | 1352 | } |
1346 | 1353 | ||
1347 | return 0; | 1354 | return 0; |
1348 | /* locked but not enabled */ | ||
1349 | } | 1355 | } |
1350 | 1356 | ||
1351 | static void kvm_cpu_vmxon(u64 addr) | 1357 | static void kvm_cpu_vmxon(u64 addr) |
@@ -1683,6 +1689,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1683 | vmx->emulation_required = 1; | 1689 | vmx->emulation_required = 1; |
1684 | vmx->rmode.vm86_active = 0; | 1690 | vmx->rmode.vm86_active = 0; |
1685 | 1691 | ||
1692 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); | ||
1686 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); | 1693 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); |
1687 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); | 1694 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); |
1688 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1695 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
@@ -1756,6 +1763,19 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1756 | vmx->emulation_required = 1; | 1763 | vmx->emulation_required = 1; |
1757 | vmx->rmode.vm86_active = 1; | 1764 | vmx->rmode.vm86_active = 1; |
1758 | 1765 | ||
1766 | /* | ||
1767 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | ||
1768 | * vcpu. Call it here with phys address pointing 16M below 4G. | ||
1769 | */ | ||
1770 | if (!vcpu->kvm->arch.tss_addr) { | ||
1771 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | ||
1772 | "called before entering vcpu\n"); | ||
1773 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
1774 | vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); | ||
1775 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
1776 | } | ||
1777 | |||
1778 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); | ||
1759 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1779 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1760 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1780 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
1761 | 1781 | ||
@@ -1794,7 +1814,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1794 | 1814 | ||
1795 | continue_rmode: | 1815 | continue_rmode: |
1796 | kvm_mmu_reset_context(vcpu); | 1816 | kvm_mmu_reset_context(vcpu); |
1797 | init_rmode(vcpu->kvm); | ||
1798 | } | 1817 | } |
1799 | 1818 | ||
1800 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1819 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
@@ -2030,23 +2049,40 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
2030 | vmcs_writel(GUEST_CR4, hw_cr4); | 2049 | vmcs_writel(GUEST_CR4, hw_cr4); |
2031 | } | 2050 | } |
2032 | 2051 | ||
2033 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | ||
2034 | { | ||
2035 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2036 | |||
2037 | return vmcs_readl(sf->base); | ||
2038 | } | ||
2039 | |||
2040 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 2052 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
2041 | struct kvm_segment *var, int seg) | 2053 | struct kvm_segment *var, int seg) |
2042 | { | 2054 | { |
2055 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2043 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2056 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2057 | struct kvm_save_segment *save; | ||
2044 | u32 ar; | 2058 | u32 ar; |
2045 | 2059 | ||
2060 | if (vmx->rmode.vm86_active | ||
2061 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES | ||
2062 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS | ||
2063 | || seg == VCPU_SREG_GS) | ||
2064 | && !emulate_invalid_guest_state) { | ||
2065 | switch (seg) { | ||
2066 | case VCPU_SREG_TR: save = &vmx->rmode.tr; break; | ||
2067 | case VCPU_SREG_ES: save = &vmx->rmode.es; break; | ||
2068 | case VCPU_SREG_DS: save = &vmx->rmode.ds; break; | ||
2069 | case VCPU_SREG_FS: save = &vmx->rmode.fs; break; | ||
2070 | case VCPU_SREG_GS: save = &vmx->rmode.gs; break; | ||
2071 | default: BUG(); | ||
2072 | } | ||
2073 | var->selector = save->selector; | ||
2074 | var->base = save->base; | ||
2075 | var->limit = save->limit; | ||
2076 | ar = save->ar; | ||
2077 | if (seg == VCPU_SREG_TR | ||
2078 | || var->selector == vmcs_read16(sf->selector)) | ||
2079 | goto use_saved_rmode_seg; | ||
2080 | } | ||
2046 | var->base = vmcs_readl(sf->base); | 2081 | var->base = vmcs_readl(sf->base); |
2047 | var->limit = vmcs_read32(sf->limit); | 2082 | var->limit = vmcs_read32(sf->limit); |
2048 | var->selector = vmcs_read16(sf->selector); | 2083 | var->selector = vmcs_read16(sf->selector); |
2049 | ar = vmcs_read32(sf->ar_bytes); | 2084 | ar = vmcs_read32(sf->ar_bytes); |
2085 | use_saved_rmode_seg: | ||
2050 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | 2086 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) |
2051 | ar = 0; | 2087 | ar = 0; |
2052 | var->type = ar & 15; | 2088 | var->type = ar & 15; |
@@ -2060,6 +2096,18 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2060 | var->unusable = (ar >> 16) & 1; | 2096 | var->unusable = (ar >> 16) & 1; |
2061 | } | 2097 | } |
2062 | 2098 | ||
2099 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | ||
2100 | { | ||
2101 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2102 | struct kvm_segment s; | ||
2103 | |||
2104 | if (to_vmx(vcpu)->rmode.vm86_active) { | ||
2105 | vmx_get_segment(vcpu, &s, seg); | ||
2106 | return s.base; | ||
2107 | } | ||
2108 | return vmcs_readl(sf->base); | ||
2109 | } | ||
2110 | |||
2063 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 2111 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
2064 | { | 2112 | { |
2065 | if (!is_protmode(vcpu)) | 2113 | if (!is_protmode(vcpu)) |
@@ -2101,6 +2149,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2101 | u32 ar; | 2149 | u32 ar; |
2102 | 2150 | ||
2103 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 2151 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
2152 | vmcs_write16(sf->selector, var->selector); | ||
2104 | vmx->rmode.tr.selector = var->selector; | 2153 | vmx->rmode.tr.selector = var->selector; |
2105 | vmx->rmode.tr.base = var->base; | 2154 | vmx->rmode.tr.base = var->base; |
2106 | vmx->rmode.tr.limit = var->limit; | 2155 | vmx->rmode.tr.limit = var->limit; |
@@ -2361,11 +2410,12 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) | |||
2361 | 2410 | ||
2362 | static int init_rmode_tss(struct kvm *kvm) | 2411 | static int init_rmode_tss(struct kvm *kvm) |
2363 | { | 2412 | { |
2364 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 2413 | gfn_t fn; |
2365 | u16 data = 0; | 2414 | u16 data = 0; |
2366 | int ret = 0; | 2415 | int r, idx, ret = 0; |
2367 | int r; | ||
2368 | 2416 | ||
2417 | idx = srcu_read_lock(&kvm->srcu); | ||
2418 | fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | ||
2369 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 2419 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
2370 | if (r < 0) | 2420 | if (r < 0) |
2371 | goto out; | 2421 | goto out; |
@@ -2389,12 +2439,13 @@ static int init_rmode_tss(struct kvm *kvm) | |||
2389 | 2439 | ||
2390 | ret = 1; | 2440 | ret = 1; |
2391 | out: | 2441 | out: |
2442 | srcu_read_unlock(&kvm->srcu, idx); | ||
2392 | return ret; | 2443 | return ret; |
2393 | } | 2444 | } |
2394 | 2445 | ||
2395 | static int init_rmode_identity_map(struct kvm *kvm) | 2446 | static int init_rmode_identity_map(struct kvm *kvm) |
2396 | { | 2447 | { |
2397 | int i, r, ret; | 2448 | int i, idx, r, ret; |
2398 | pfn_t identity_map_pfn; | 2449 | pfn_t identity_map_pfn; |
2399 | u32 tmp; | 2450 | u32 tmp; |
2400 | 2451 | ||
@@ -2409,6 +2460,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2409 | return 1; | 2460 | return 1; |
2410 | ret = 0; | 2461 | ret = 0; |
2411 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; | 2462 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; |
2463 | idx = srcu_read_lock(&kvm->srcu); | ||
2412 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | 2464 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); |
2413 | if (r < 0) | 2465 | if (r < 0) |
2414 | goto out; | 2466 | goto out; |
@@ -2424,6 +2476,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2424 | kvm->arch.ept_identity_pagetable_done = true; | 2476 | kvm->arch.ept_identity_pagetable_done = true; |
2425 | ret = 1; | 2477 | ret = 1; |
2426 | out: | 2478 | out: |
2479 | srcu_read_unlock(&kvm->srcu, idx); | ||
2427 | return ret; | 2480 | return ret; |
2428 | } | 2481 | } |
2429 | 2482 | ||
@@ -2699,22 +2752,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2699 | return 0; | 2752 | return 0; |
2700 | } | 2753 | } |
2701 | 2754 | ||
2702 | static int init_rmode(struct kvm *kvm) | ||
2703 | { | ||
2704 | int idx, ret = 0; | ||
2705 | |||
2706 | idx = srcu_read_lock(&kvm->srcu); | ||
2707 | if (!init_rmode_tss(kvm)) | ||
2708 | goto exit; | ||
2709 | if (!init_rmode_identity_map(kvm)) | ||
2710 | goto exit; | ||
2711 | |||
2712 | ret = 1; | ||
2713 | exit: | ||
2714 | srcu_read_unlock(&kvm->srcu, idx); | ||
2715 | return ret; | ||
2716 | } | ||
2717 | |||
2718 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 2755 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
2719 | { | 2756 | { |
2720 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2757 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -2722,10 +2759,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2722 | int ret; | 2759 | int ret; |
2723 | 2760 | ||
2724 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2761 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2725 | if (!init_rmode(vmx->vcpu.kvm)) { | ||
2726 | ret = -ENOMEM; | ||
2727 | goto out; | ||
2728 | } | ||
2729 | 2762 | ||
2730 | vmx->rmode.vm86_active = 0; | 2763 | vmx->rmode.vm86_active = 0; |
2731 | 2764 | ||
@@ -2805,7 +2838,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2805 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); | 2838 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
2806 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) | 2839 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) |
2807 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 2840 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, |
2808 | page_to_phys(vmx->vcpu.arch.apic->regs_page)); | 2841 | __pa(vmx->vcpu.arch.apic->regs)); |
2809 | vmcs_write32(TPR_THRESHOLD, 0); | 2842 | vmcs_write32(TPR_THRESHOLD, 0); |
2810 | } | 2843 | } |
2811 | 2844 | ||
@@ -2971,6 +3004,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
2971 | if (ret) | 3004 | if (ret) |
2972 | return ret; | 3005 | return ret; |
2973 | kvm->arch.tss_addr = addr; | 3006 | kvm->arch.tss_addr = addr; |
3007 | if (!init_rmode_tss(kvm)) | ||
3008 | return -ENOMEM; | ||
3009 | |||
2974 | return 0; | 3010 | return 0; |
2975 | } | 3011 | } |
2976 | 3012 | ||
@@ -3962,7 +3998,7 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu) | |||
3962 | #define Q "l" | 3998 | #define Q "l" |
3963 | #endif | 3999 | #endif |
3964 | 4000 | ||
3965 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | 4001 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3966 | { | 4002 | { |
3967 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4003 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3968 | 4004 | ||
@@ -3991,6 +4027,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3991 | asm( | 4027 | asm( |
3992 | /* Store host registers */ | 4028 | /* Store host registers */ |
3993 | "push %%"R"dx; push %%"R"bp;" | 4029 | "push %%"R"dx; push %%"R"bp;" |
4030 | "push %%"R"cx \n\t" /* placeholder for guest rcx */ | ||
3994 | "push %%"R"cx \n\t" | 4031 | "push %%"R"cx \n\t" |
3995 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" | 4032 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" |
3996 | "je 1f \n\t" | 4033 | "je 1f \n\t" |
@@ -4032,10 +4069,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4032 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 4069 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
4033 | ".Lkvm_vmx_return: " | 4070 | ".Lkvm_vmx_return: " |
4034 | /* Save guest registers, load host registers, keep flags */ | 4071 | /* Save guest registers, load host registers, keep flags */ |
4035 | "xchg %0, (%%"R"sp) \n\t" | 4072 | "mov %0, %c[wordsize](%%"R"sp) \n\t" |
4073 | "pop %0 \n\t" | ||
4036 | "mov %%"R"ax, %c[rax](%0) \n\t" | 4074 | "mov %%"R"ax, %c[rax](%0) \n\t" |
4037 | "mov %%"R"bx, %c[rbx](%0) \n\t" | 4075 | "mov %%"R"bx, %c[rbx](%0) \n\t" |
4038 | "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" | 4076 | "pop"Q" %c[rcx](%0) \n\t" |
4039 | "mov %%"R"dx, %c[rdx](%0) \n\t" | 4077 | "mov %%"R"dx, %c[rdx](%0) \n\t" |
4040 | "mov %%"R"si, %c[rsi](%0) \n\t" | 4078 | "mov %%"R"si, %c[rsi](%0) \n\t" |
4041 | "mov %%"R"di, %c[rdi](%0) \n\t" | 4079 | "mov %%"R"di, %c[rdi](%0) \n\t" |
@@ -4053,7 +4091,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4053 | "mov %%cr2, %%"R"ax \n\t" | 4091 | "mov %%cr2, %%"R"ax \n\t" |
4054 | "mov %%"R"ax, %c[cr2](%0) \n\t" | 4092 | "mov %%"R"ax, %c[cr2](%0) \n\t" |
4055 | 4093 | ||
4056 | "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" | 4094 | "pop %%"R"bp; pop %%"R"dx \n\t" |
4057 | "setbe %c[fail](%0) \n\t" | 4095 | "setbe %c[fail](%0) \n\t" |
4058 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 4096 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
4059 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), | 4097 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), |
@@ -4076,7 +4114,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4076 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), | 4114 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), |
4077 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), | 4115 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), |
4078 | #endif | 4116 | #endif |
4079 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) | 4117 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), |
4118 | [wordsize]"i"(sizeof(ulong)) | ||
4080 | : "cc", "memory" | 4119 | : "cc", "memory" |
4081 | , R"ax", R"bx", R"di", R"si" | 4120 | , R"ax", R"bx", R"di", R"si" |
4082 | #ifdef CONFIG_X86_64 | 4121 | #ifdef CONFIG_X86_64 |
@@ -4183,8 +4222,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4183 | if (!kvm->arch.ept_identity_map_addr) | 4222 | if (!kvm->arch.ept_identity_map_addr) |
4184 | kvm->arch.ept_identity_map_addr = | 4223 | kvm->arch.ept_identity_map_addr = |
4185 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 4224 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; |
4225 | err = -ENOMEM; | ||
4186 | if (alloc_identity_pagetable(kvm) != 0) | 4226 | if (alloc_identity_pagetable(kvm) != 0) |
4187 | goto free_vmcs; | 4227 | goto free_vmcs; |
4228 | if (!init_rmode_identity_map(kvm)) | ||
4229 | goto free_vmcs; | ||
4188 | } | 4230 | } |
4189 | 4231 | ||
4190 | return &vmx->vcpu; | 4232 | return &vmx->vcpu; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bcc0efce85bf..f1e4025f1ae2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -81,9 +81,10 @@ | |||
81 | * - enable LME and LMA per default on 64 bit KVM | 81 | * - enable LME and LMA per default on 64 bit KVM |
82 | */ | 82 | */ |
83 | #ifdef CONFIG_X86_64 | 83 | #ifdef CONFIG_X86_64 |
84 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; | 84 | static |
85 | u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); | ||
85 | #else | 86 | #else |
86 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; | 87 | static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); |
87 | #endif | 88 | #endif |
88 | 89 | ||
89 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | 90 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM |
@@ -360,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
360 | 361 | ||
361 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 362 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
362 | { | 363 | { |
364 | kvm_make_request(KVM_REQ_NMI, vcpu); | ||
363 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 365 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
364 | vcpu->arch.nmi_pending = 1; | ||
365 | } | 366 | } |
366 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 367 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
367 | 368 | ||
@@ -525,8 +526,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
525 | 526 | ||
526 | kvm_x86_ops->set_cr0(vcpu, cr0); | 527 | kvm_x86_ops->set_cr0(vcpu, cr0); |
527 | 528 | ||
528 | if ((cr0 ^ old_cr0) & X86_CR0_PG) | 529 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { |
529 | kvm_clear_async_pf_completion_queue(vcpu); | 530 | kvm_clear_async_pf_completion_queue(vcpu); |
531 | kvm_async_pf_hash_reset(vcpu); | ||
532 | } | ||
530 | 533 | ||
531 | if ((cr0 ^ old_cr0) & update_bits) | 534 | if ((cr0 ^ old_cr0) & update_bits) |
532 | kvm_mmu_reset_context(vcpu); | 535 | kvm_mmu_reset_context(vcpu); |
@@ -1017,7 +1020,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1017 | unsigned long flags; | 1020 | unsigned long flags; |
1018 | s64 sdiff; | 1021 | s64 sdiff; |
1019 | 1022 | ||
1020 | spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1023 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1021 | offset = data - native_read_tsc(); | 1024 | offset = data - native_read_tsc(); |
1022 | ns = get_kernel_ns(); | 1025 | ns = get_kernel_ns(); |
1023 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1026 | elapsed = ns - kvm->arch.last_tsc_nsec; |
@@ -1050,7 +1053,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1050 | kvm->arch.last_tsc_write = data; | 1053 | kvm->arch.last_tsc_write = data; |
1051 | kvm->arch.last_tsc_offset = offset; | 1054 | kvm->arch.last_tsc_offset = offset; |
1052 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | 1055 | kvm_x86_ops->write_tsc_offset(vcpu, offset); |
1053 | spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | 1056 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); |
1054 | 1057 | ||
1055 | /* Reset of TSC must disable overshoot protection below */ | 1058 | /* Reset of TSC must disable overshoot protection below */ |
1056 | vcpu->arch.hv_clock.tsc_timestamp = 0; | 1059 | vcpu->arch.hv_clock.tsc_timestamp = 0; |
@@ -1453,6 +1456,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
1453 | return 0; | 1456 | return 0; |
1454 | } | 1457 | } |
1455 | 1458 | ||
1459 | static void kvmclock_reset(struct kvm_vcpu *vcpu) | ||
1460 | { | ||
1461 | if (vcpu->arch.time_page) { | ||
1462 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1463 | vcpu->arch.time_page = NULL; | ||
1464 | } | ||
1465 | } | ||
1466 | |||
1456 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1467 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1457 | { | 1468 | { |
1458 | switch (msr) { | 1469 | switch (msr) { |
@@ -1510,10 +1521,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1510 | break; | 1521 | break; |
1511 | case MSR_KVM_SYSTEM_TIME_NEW: | 1522 | case MSR_KVM_SYSTEM_TIME_NEW: |
1512 | case MSR_KVM_SYSTEM_TIME: { | 1523 | case MSR_KVM_SYSTEM_TIME: { |
1513 | if (vcpu->arch.time_page) { | 1524 | kvmclock_reset(vcpu); |
1514 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1515 | vcpu->arch.time_page = NULL; | ||
1516 | } | ||
1517 | 1525 | ||
1518 | vcpu->arch.time = data; | 1526 | vcpu->arch.time = data; |
1519 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 1527 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
@@ -1592,6 +1600,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1592 | } else | 1600 | } else |
1593 | return set_msr_hyperv(vcpu, msr, data); | 1601 | return set_msr_hyperv(vcpu, msr, data); |
1594 | break; | 1602 | break; |
1603 | case MSR_IA32_BBL_CR_CTL3: | ||
1604 | /* Drop writes to this legacy MSR -- see rdmsr | ||
1605 | * counterpart for further detail. | ||
1606 | */ | ||
1607 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | ||
1608 | break; | ||
1595 | default: | 1609 | default: |
1596 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1610 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1597 | return xen_hvm_config(vcpu, data); | 1611 | return xen_hvm_config(vcpu, data); |
@@ -1846,6 +1860,19 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1846 | } else | 1860 | } else |
1847 | return get_msr_hyperv(vcpu, msr, pdata); | 1861 | return get_msr_hyperv(vcpu, msr, pdata); |
1848 | break; | 1862 | break; |
1863 | case MSR_IA32_BBL_CR_CTL3: | ||
1864 | /* This legacy MSR exists but isn't fully documented in current | ||
1865 | * silicon. It is however accessed by winxp in very narrow | ||
1866 | * scenarios where it sets bit #19, itself documented as | ||
1867 | * a "reserved" bit. Best effort attempt to source coherent | ||
1868 | * read data here should the balance of the register be | ||
1869 | * interpreted by the guest: | ||
1870 | * | ||
1871 | * L2 cache control register 3: 64GB range, 256KB size, | ||
1872 | * enabled, latency 0x1, configured | ||
1873 | */ | ||
1874 | data = 0xbe702111; | ||
1875 | break; | ||
1849 | default: | 1876 | default: |
1850 | if (!ignore_msrs) { | 1877 | if (!ignore_msrs) { |
1851 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1878 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -2100,8 +2127,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2100 | if (check_tsc_unstable()) { | 2127 | if (check_tsc_unstable()) { |
2101 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); | 2128 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); |
2102 | vcpu->arch.tsc_catchup = 1; | 2129 | vcpu->arch.tsc_catchup = 1; |
2103 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2104 | } | 2130 | } |
2131 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2105 | if (vcpu->cpu != cpu) | 2132 | if (vcpu->cpu != cpu) |
2106 | kvm_migrate_timers(vcpu); | 2133 | kvm_migrate_timers(vcpu); |
2107 | vcpu->cpu = cpu; | 2134 | vcpu->cpu = cpu; |
@@ -2575,9 +2602,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2575 | if (mce->status & MCI_STATUS_UC) { | 2602 | if (mce->status & MCI_STATUS_UC) { |
2576 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2603 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
2577 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { | 2604 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
2578 | printk(KERN_DEBUG "kvm: set_mce: " | ||
2579 | "injects mce exception while " | ||
2580 | "previous one is in progress!\n"); | ||
2581 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 2605 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2582 | return 0; | 2606 | return 0; |
2583 | } | 2607 | } |
@@ -2648,8 +2672,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2648 | vcpu->arch.interrupt.pending = events->interrupt.injected; | 2672 | vcpu->arch.interrupt.pending = events->interrupt.injected; |
2649 | vcpu->arch.interrupt.nr = events->interrupt.nr; | 2673 | vcpu->arch.interrupt.nr = events->interrupt.nr; |
2650 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2674 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2651 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
2652 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
2653 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | 2675 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) |
2654 | kvm_x86_ops->set_interrupt_shadow(vcpu, | 2676 | kvm_x86_ops->set_interrupt_shadow(vcpu, |
2655 | events->interrupt.shadow); | 2677 | events->interrupt.shadow); |
@@ -4140,8 +4162,8 @@ static unsigned long emulator_get_cached_segment_base(int seg, | |||
4140 | return get_segment_base(vcpu, seg); | 4162 | return get_segment_base(vcpu, seg); |
4141 | } | 4163 | } |
4142 | 4164 | ||
4143 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | 4165 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, |
4144 | struct kvm_vcpu *vcpu) | 4166 | int seg, struct kvm_vcpu *vcpu) |
4145 | { | 4167 | { |
4146 | struct kvm_segment var; | 4168 | struct kvm_segment var; |
4147 | 4169 | ||
@@ -4154,6 +4176,10 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
4154 | var.limit >>= 12; | 4176 | var.limit >>= 12; |
4155 | set_desc_limit(desc, var.limit); | 4177 | set_desc_limit(desc, var.limit); |
4156 | set_desc_base(desc, (unsigned long)var.base); | 4178 | set_desc_base(desc, (unsigned long)var.base); |
4179 | #ifdef CONFIG_X86_64 | ||
4180 | if (base3) | ||
4181 | *base3 = var.base >> 32; | ||
4182 | #endif | ||
4157 | desc->type = var.type; | 4183 | desc->type = var.type; |
4158 | desc->s = var.s; | 4184 | desc->s = var.s; |
4159 | desc->dpl = var.dpl; | 4185 | desc->dpl = var.dpl; |
@@ -4166,8 +4192,8 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
4166 | return true; | 4192 | return true; |
4167 | } | 4193 | } |
4168 | 4194 | ||
4169 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | 4195 | static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, |
4170 | struct kvm_vcpu *vcpu) | 4196 | int seg, struct kvm_vcpu *vcpu) |
4171 | { | 4197 | { |
4172 | struct kvm_segment var; | 4198 | struct kvm_segment var; |
4173 | 4199 | ||
@@ -4175,6 +4201,9 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | |||
4175 | kvm_get_segment(vcpu, &var, seg); | 4201 | kvm_get_segment(vcpu, &var, seg); |
4176 | 4202 | ||
4177 | var.base = get_desc_base(desc); | 4203 | var.base = get_desc_base(desc); |
4204 | #ifdef CONFIG_X86_64 | ||
4205 | var.base |= ((u64)base3) << 32; | ||
4206 | #endif | ||
4178 | var.limit = get_desc_limit(desc); | 4207 | var.limit = get_desc_limit(desc); |
4179 | if (desc->g) | 4208 | if (desc->g) |
4180 | var.limit = (var.limit << 12) | 0xfff; | 4209 | var.limit = (var.limit << 12) | 0xfff; |
@@ -4390,41 +4419,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4390 | vcpu->arch.emulate_ctxt.have_exception = false; | 4419 | vcpu->arch.emulate_ctxt.have_exception = false; |
4391 | vcpu->arch.emulate_ctxt.perm_ok = false; | 4420 | vcpu->arch.emulate_ctxt.perm_ok = false; |
4392 | 4421 | ||
4422 | vcpu->arch.emulate_ctxt.only_vendor_specific_insn | ||
4423 | = emulation_type & EMULTYPE_TRAP_UD; | ||
4424 | |||
4393 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); | 4425 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); |
4394 | if (r == X86EMUL_PROPAGATE_FAULT) | ||
4395 | goto done; | ||
4396 | 4426 | ||
4397 | trace_kvm_emulate_insn_start(vcpu); | 4427 | trace_kvm_emulate_insn_start(vcpu); |
4398 | |||
4399 | /* Only allow emulation of specific instructions on #UD | ||
4400 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | ||
4401 | if (emulation_type & EMULTYPE_TRAP_UD) { | ||
4402 | if (!c->twobyte) | ||
4403 | return EMULATE_FAIL; | ||
4404 | switch (c->b) { | ||
4405 | case 0x01: /* VMMCALL */ | ||
4406 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
4407 | return EMULATE_FAIL; | ||
4408 | break; | ||
4409 | case 0x34: /* sysenter */ | ||
4410 | case 0x35: /* sysexit */ | ||
4411 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4412 | return EMULATE_FAIL; | ||
4413 | break; | ||
4414 | case 0x05: /* syscall */ | ||
4415 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4416 | return EMULATE_FAIL; | ||
4417 | break; | ||
4418 | default: | ||
4419 | return EMULATE_FAIL; | ||
4420 | } | ||
4421 | |||
4422 | if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) | ||
4423 | return EMULATE_FAIL; | ||
4424 | } | ||
4425 | |||
4426 | ++vcpu->stat.insn_emulation; | 4428 | ++vcpu->stat.insn_emulation; |
4427 | if (r) { | 4429 | if (r) { |
4430 | if (emulation_type & EMULTYPE_TRAP_UD) | ||
4431 | return EMULATE_FAIL; | ||
4428 | if (reexecute_instruction(vcpu, cr2)) | 4432 | if (reexecute_instruction(vcpu, cr2)) |
4429 | return EMULATE_DONE; | 4433 | return EMULATE_DONE; |
4430 | if (emulation_type & EMULTYPE_SKIP) | 4434 | if (emulation_type & EMULTYPE_SKIP) |
@@ -4452,7 +4456,6 @@ restart: | |||
4452 | return handle_emulation_failure(vcpu); | 4456 | return handle_emulation_failure(vcpu); |
4453 | } | 4457 | } |
4454 | 4458 | ||
4455 | done: | ||
4456 | if (vcpu->arch.emulate_ctxt.have_exception) { | 4459 | if (vcpu->arch.emulate_ctxt.have_exception) { |
4457 | inject_emulated_exception(vcpu); | 4460 | inject_emulated_exception(vcpu); |
4458 | r = EMULATE_DONE; | 4461 | r = EMULATE_DONE; |
@@ -4562,7 +4565,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4562 | 4565 | ||
4563 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); | 4566 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); |
4564 | 4567 | ||
4565 | spin_lock(&kvm_lock); | 4568 | raw_spin_lock(&kvm_lock); |
4566 | list_for_each_entry(kvm, &vm_list, vm_list) { | 4569 | list_for_each_entry(kvm, &vm_list, vm_list) { |
4567 | kvm_for_each_vcpu(i, vcpu, kvm) { | 4570 | kvm_for_each_vcpu(i, vcpu, kvm) { |
4568 | if (vcpu->cpu != freq->cpu) | 4571 | if (vcpu->cpu != freq->cpu) |
@@ -4572,7 +4575,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4572 | send_ipi = 1; | 4575 | send_ipi = 1; |
4573 | } | 4576 | } |
4574 | } | 4577 | } |
4575 | spin_unlock(&kvm_lock); | 4578 | raw_spin_unlock(&kvm_lock); |
4576 | 4579 | ||
4577 | if (freq->old < freq->new && send_ipi) { | 4580 | if (freq->old < freq->new && send_ipi) { |
4578 | /* | 4581 | /* |
@@ -5185,6 +5188,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5185 | r = 1; | 5188 | r = 1; |
5186 | goto out; | 5189 | goto out; |
5187 | } | 5190 | } |
5191 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | ||
5192 | vcpu->arch.nmi_pending = true; | ||
5188 | } | 5193 | } |
5189 | 5194 | ||
5190 | r = kvm_mmu_reload(vcpu); | 5195 | r = kvm_mmu_reload(vcpu); |
@@ -5213,14 +5218,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5213 | kvm_load_guest_fpu(vcpu); | 5218 | kvm_load_guest_fpu(vcpu); |
5214 | kvm_load_guest_xcr0(vcpu); | 5219 | kvm_load_guest_xcr0(vcpu); |
5215 | 5220 | ||
5216 | atomic_set(&vcpu->guest_mode, 1); | 5221 | vcpu->mode = IN_GUEST_MODE; |
5217 | smp_wmb(); | 5222 | |
5223 | /* We should set ->mode before check ->requests, | ||
5224 | * see the comment in make_all_cpus_request. | ||
5225 | */ | ||
5226 | smp_mb(); | ||
5218 | 5227 | ||
5219 | local_irq_disable(); | 5228 | local_irq_disable(); |
5220 | 5229 | ||
5221 | if (!atomic_read(&vcpu->guest_mode) || vcpu->requests | 5230 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests |
5222 | || need_resched() || signal_pending(current)) { | 5231 | || need_resched() || signal_pending(current)) { |
5223 | atomic_set(&vcpu->guest_mode, 0); | 5232 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5224 | smp_wmb(); | 5233 | smp_wmb(); |
5225 | local_irq_enable(); | 5234 | local_irq_enable(); |
5226 | preempt_enable(); | 5235 | preempt_enable(); |
@@ -5256,7 +5265,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5256 | 5265 | ||
5257 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | 5266 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); |
5258 | 5267 | ||
5259 | atomic_set(&vcpu->guest_mode, 0); | 5268 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5260 | smp_wmb(); | 5269 | smp_wmb(); |
5261 | local_irq_enable(); | 5270 | local_irq_enable(); |
5262 | 5271 | ||
@@ -5574,7 +5583,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5574 | struct kvm_sregs *sregs) | 5583 | struct kvm_sregs *sregs) |
5575 | { | 5584 | { |
5576 | int mmu_reset_needed = 0; | 5585 | int mmu_reset_needed = 0; |
5577 | int pending_vec, max_bits; | 5586 | int pending_vec, max_bits, idx; |
5578 | struct desc_ptr dt; | 5587 | struct desc_ptr dt; |
5579 | 5588 | ||
5580 | dt.size = sregs->idt.limit; | 5589 | dt.size = sregs->idt.limit; |
@@ -5603,10 +5612,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5603 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5612 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
5604 | if (sregs->cr4 & X86_CR4_OSXSAVE) | 5613 | if (sregs->cr4 & X86_CR4_OSXSAVE) |
5605 | update_cpuid(vcpu); | 5614 | update_cpuid(vcpu); |
5615 | |||
5616 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5606 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5617 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
5607 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); | 5618 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
5608 | mmu_reset_needed = 1; | 5619 | mmu_reset_needed = 1; |
5609 | } | 5620 | } |
5621 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
5610 | 5622 | ||
5611 | if (mmu_reset_needed) | 5623 | if (mmu_reset_needed) |
5612 | kvm_mmu_reset_context(vcpu); | 5624 | kvm_mmu_reset_context(vcpu); |
@@ -5617,8 +5629,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5617 | if (pending_vec < max_bits) { | 5629 | if (pending_vec < max_bits) { |
5618 | kvm_queue_interrupt(vcpu, pending_vec, false); | 5630 | kvm_queue_interrupt(vcpu, pending_vec, false); |
5619 | pr_debug("Set back pending irq %d\n", pending_vec); | 5631 | pr_debug("Set back pending irq %d\n", pending_vec); |
5620 | if (irqchip_in_kernel(vcpu->kvm)) | ||
5621 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
5622 | } | 5632 | } |
5623 | 5633 | ||
5624 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 5634 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
@@ -5814,10 +5824,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
5814 | 5824 | ||
5815 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 5825 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
5816 | { | 5826 | { |
5817 | if (vcpu->arch.time_page) { | 5827 | kvmclock_reset(vcpu); |
5818 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
5819 | vcpu->arch.time_page = NULL; | ||
5820 | } | ||
5821 | 5828 | ||
5822 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | 5829 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); |
5823 | fx_free(vcpu); | 5830 | fx_free(vcpu); |
@@ -5878,6 +5885,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5878 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5885 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5879 | vcpu->arch.apf.msr_val = 0; | 5886 | vcpu->arch.apf.msr_val = 0; |
5880 | 5887 | ||
5888 | kvmclock_reset(vcpu); | ||
5889 | |||
5881 | kvm_clear_async_pf_completion_queue(vcpu); | 5890 | kvm_clear_async_pf_completion_queue(vcpu); |
5882 | kvm_async_pf_hash_reset(vcpu); | 5891 | kvm_async_pf_hash_reset(vcpu); |
5883 | vcpu->arch.apf.halted = false; | 5892 | vcpu->arch.apf.halted = false; |
@@ -6005,7 +6014,7 @@ int kvm_arch_init_vm(struct kvm *kvm) | |||
6005 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6014 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
6006 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); | 6015 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); |
6007 | 6016 | ||
6008 | spin_lock_init(&kvm->arch.tsc_write_lock); | 6017 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
6009 | 6018 | ||
6010 | return 0; | 6019 | return 0; |
6011 | } | 6020 | } |
@@ -6103,7 +6112,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6103 | int user_alloc) | 6112 | int user_alloc) |
6104 | { | 6113 | { |
6105 | 6114 | ||
6106 | int npages = mem->memory_size >> PAGE_SHIFT; | 6115 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
6107 | 6116 | ||
6108 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | 6117 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { |
6109 | int ret; | 6118 | int ret; |
@@ -6118,12 +6127,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6118 | "failed to munmap memory\n"); | 6127 | "failed to munmap memory\n"); |
6119 | } | 6128 | } |
6120 | 6129 | ||
6130 | if (!kvm->arch.n_requested_mmu_pages) | ||
6131 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
6132 | |||
6121 | spin_lock(&kvm->mmu_lock); | 6133 | spin_lock(&kvm->mmu_lock); |
6122 | if (!kvm->arch.n_requested_mmu_pages) { | 6134 | if (nr_mmu_pages) |
6123 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
6124 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6135 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
6125 | } | ||
6126 | |||
6127 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6136 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
6128 | spin_unlock(&kvm->mmu_lock); | 6137 | spin_unlock(&kvm->mmu_lock); |
6129 | } | 6138 | } |
@@ -6157,7 +6166,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
6157 | 6166 | ||
6158 | me = get_cpu(); | 6167 | me = get_cpu(); |
6159 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | 6168 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
6160 | if (atomic_xchg(&vcpu->guest_mode, 0)) | 6169 | if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) |
6161 | smp_send_reschedule(cpu); | 6170 | smp_send_reschedule(cpu); |
6162 | put_cpu(); | 6171 | put_cpu(); |
6163 | } | 6172 | } |