diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 23 | ||||
-rw-r--r-- | arch/x86/include/asm/pvclock.h | 2 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm.h | 6 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 13 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 115 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 5 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 130 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 115 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 158 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 108 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 1 |
18 files changed, 489 insertions, 214 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 15f960c06ff7..24ec1216596e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -274,13 +274,17 @@ struct x86_emulate_ctxt { | |||
274 | 274 | ||
275 | bool guest_mode; /* guest running a nested guest */ | 275 | bool guest_mode; /* guest running a nested guest */ |
276 | bool perm_ok; /* do not check permissions if true */ | 276 | bool perm_ok; /* do not check permissions if true */ |
277 | bool only_vendor_specific_insn; | 277 | bool ud; /* inject an #UD if host doesn't support insn */ |
278 | 278 | ||
279 | bool have_exception; | 279 | bool have_exception; |
280 | struct x86_exception exception; | 280 | struct x86_exception exception; |
281 | 281 | ||
282 | /* decode cache */ | 282 | /* |
283 | u8 twobyte; | 283 | * decode cache |
284 | */ | ||
285 | |||
286 | /* current opcode length in bytes */ | ||
287 | u8 opcode_len; | ||
284 | u8 b; | 288 | u8 b; |
285 | u8 intercept; | 289 | u8 intercept; |
286 | u8 lock_prefix; | 290 | u8 lock_prefix; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c76ff74a98f2..ae5d7830855c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -79,6 +79,13 @@ | |||
79 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | 79 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) |
80 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | 80 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) |
81 | 81 | ||
82 | static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | ||
83 | { | ||
84 | /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ | ||
85 | return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | ||
86 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | ||
87 | } | ||
88 | |||
82 | #define SELECTOR_TI_MASK (1 << 2) | 89 | #define SELECTOR_TI_MASK (1 << 2) |
83 | #define SELECTOR_RPL_MASK 0x03 | 90 | #define SELECTOR_RPL_MASK 0x03 |
84 | 91 | ||
@@ -253,7 +260,6 @@ struct kvm_pio_request { | |||
253 | * mode. | 260 | * mode. |
254 | */ | 261 | */ |
255 | struct kvm_mmu { | 262 | struct kvm_mmu { |
256 | void (*new_cr3)(struct kvm_vcpu *vcpu); | ||
257 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); | 263 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); |
258 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); | 264 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); |
259 | u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); | 265 | u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); |
@@ -261,7 +267,6 @@ struct kvm_mmu { | |||
261 | bool prefault); | 267 | bool prefault); |
262 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, | 268 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, |
263 | struct x86_exception *fault); | 269 | struct x86_exception *fault); |
264 | void (*free)(struct kvm_vcpu *vcpu); | ||
265 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, | 270 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, |
266 | struct x86_exception *exception); | 271 | struct x86_exception *exception); |
267 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | 272 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); |
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch { | |||
389 | 394 | ||
390 | struct fpu guest_fpu; | 395 | struct fpu guest_fpu; |
391 | u64 xcr0; | 396 | u64 xcr0; |
397 | u64 guest_supported_xcr0; | ||
398 | u32 guest_xstate_size; | ||
392 | 399 | ||
393 | struct kvm_pio_request pio; | 400 | struct kvm_pio_request pio; |
394 | void *pio_data; | 401 | void *pio_data; |
@@ -557,7 +564,9 @@ struct kvm_arch { | |||
557 | 564 | ||
558 | struct list_head assigned_dev_head; | 565 | struct list_head assigned_dev_head; |
559 | struct iommu_domain *iommu_domain; | 566 | struct iommu_domain *iommu_domain; |
560 | int iommu_flags; | 567 | bool iommu_noncoherent; |
568 | #define __KVM_HAVE_ARCH_NONCOHERENT_DMA | ||
569 | atomic_t noncoherent_dma_count; | ||
561 | struct kvm_pic *vpic; | 570 | struct kvm_pic *vpic; |
562 | struct kvm_ioapic *vioapic; | 571 | struct kvm_ioapic *vioapic; |
563 | struct kvm_pit *vpit; | 572 | struct kvm_pit *vpit; |
@@ -780,11 +789,11 @@ void kvm_mmu_module_exit(void); | |||
780 | 789 | ||
781 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | 790 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); |
782 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | 791 | int kvm_mmu_create(struct kvm_vcpu *vcpu); |
783 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); | 792 | void kvm_mmu_setup(struct kvm_vcpu *vcpu); |
784 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 793 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
785 | u64 dirty_mask, u64 nx_mask, u64 x_mask); | 794 | u64 dirty_mask, u64 nx_mask, u64 x_mask); |
786 | 795 | ||
787 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 796 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
788 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 797 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
789 | void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | 798 | void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, |
790 | struct kvm_memory_slot *slot, | 799 | struct kvm_memory_slot *slot, |
@@ -922,13 +931,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | |||
922 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, | 931 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, |
923 | void *insn, int insn_len); | 932 | void *insn, int insn_len); |
924 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | 933 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); |
934 | void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); | ||
925 | 935 | ||
926 | void kvm_enable_tdp(void); | 936 | void kvm_enable_tdp(void); |
927 | void kvm_disable_tdp(void); | 937 | void kvm_disable_tdp(void); |
928 | 938 | ||
929 | int complete_pio(struct kvm_vcpu *vcpu); | ||
930 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | ||
931 | |||
932 | static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 939 | static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) |
933 | { | 940 | { |
934 | return gpa; | 941 | return gpa; |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index be8269b00e2a..d6b078e9fa28 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -14,6 +14,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | |||
14 | struct timespec *ts); | 14 | struct timespec *ts); |
15 | void pvclock_resume(void); | 15 | void pvclock_resume(void); |
16 | 16 | ||
17 | void pvclock_touch_watchdogs(void); | ||
18 | |||
17 | /* | 19 | /* |
18 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 20 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
19 | * yielding a 64-bit result. | 21 | * yielding a 64-bit result. |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5d9a3033b3d7..d3a87780c70b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 { | |||
211 | __u32 padding[3]; | 211 | __u32 padding[3]; |
212 | }; | 212 | }; |
213 | 213 | ||
214 | #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 | 214 | #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) |
215 | #define KVM_CPUID_FLAG_STATEFUL_FUNC 2 | 215 | #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) |
216 | #define KVM_CPUID_FLAG_STATE_READ_NEXT 4 | 216 | #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) |
217 | 217 | ||
218 | /* for KVM_SET_CPUID2 */ | 218 | /* for KVM_SET_CPUID2 */ |
219 | struct kvm_cpuid2 { | 219 | struct kvm_cpuid2 { |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index bb0465090ae5..b93e09a0fa21 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -536,6 +536,7 @@ | |||
536 | 536 | ||
537 | /* MSR_IA32_VMX_MISC bits */ | 537 | /* MSR_IA32_VMX_MISC bits */ |
538 | #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) | 538 | #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) |
539 | #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F | ||
539 | /* AMD-V MSRs */ | 540 | /* AMD-V MSRs */ |
540 | 541 | ||
541 | #define MSR_VM_CR 0xc0010114 | 542 | #define MSR_VM_CR 0xc0010114 |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1570e0741344..e6041094ff26 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -139,6 +139,7 @@ bool kvm_check_and_clear_guest_paused(void) | |||
139 | src = &hv_clock[cpu].pvti; | 139 | src = &hv_clock[cpu].pvti; |
140 | if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { | 140 | if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { |
141 | src->flags &= ~PVCLOCK_GUEST_STOPPED; | 141 | src->flags &= ~PVCLOCK_GUEST_STOPPED; |
142 | pvclock_touch_watchdogs(); | ||
142 | ret = true; | 143 | ret = true; |
143 | } | 144 | } |
144 | 145 | ||
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index a16bae3f83b3..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -43,6 +43,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
43 | return pv_tsc_khz; | 43 | return pv_tsc_khz; |
44 | } | 44 | } |
45 | 45 | ||
46 | void pvclock_touch_watchdogs(void) | ||
47 | { | ||
48 | touch_softlockup_watchdog_sync(); | ||
49 | clocksource_touch_watchdog(); | ||
50 | rcu_cpu_stall_reset(); | ||
51 | reset_hung_task_detector(); | ||
52 | } | ||
53 | |||
46 | static atomic64_t last_value = ATOMIC64_INIT(0); | 54 | static atomic64_t last_value = ATOMIC64_INIT(0); |
47 | 55 | ||
48 | void pvclock_resume(void) | 56 | void pvclock_resume(void) |
@@ -74,6 +82,11 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
74 | version = __pvclock_read_cycles(src, &ret, &flags); | 82 | version = __pvclock_read_cycles(src, &ret, &flags); |
75 | } while ((src->version & 1) || version != src->version); | 83 | } while ((src->version & 1) || version != src->version); |
76 | 84 | ||
85 | if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { | ||
86 | src->flags &= ~PVCLOCK_GUEST_STOPPED; | ||
87 | pvclock_touch_watchdogs(); | ||
88 | } | ||
89 | |||
77 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | 90 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && |
78 | (flags & PVCLOCK_TSC_STABLE_BIT)) | 91 | (flags & PVCLOCK_TSC_STABLE_BIT)) |
79 | return ret; | 92 | return ret; |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..b89c5db2b832 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -38,6 +38,7 @@ config KVM | |||
38 | select PERF_EVENTS | 38 | select PERF_EVENTS |
39 | select HAVE_KVM_MSI | 39 | select HAVE_KVM_MSI |
40 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 40 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
41 | select KVM_VFIO | ||
41 | ---help--- | 42 | ---help--- |
42 | Support hosting fully virtualized guest machines using hardware | 43 | Support hosting fully virtualized guest machines using hardware |
43 | virtualization extensions. You will need a fairly recent | 44 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index bf4fb04d0112..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm | |||
9 | 9 | ||
10 | kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ | 10 | kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ |
11 | $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ | 11 | $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ |
12 | $(KVM)/eventfd.o $(KVM)/irqchip.o | 12 | $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o |
13 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o | 13 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o |
14 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o | 14 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o |
15 | 15 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b110fe6c03d4..c6976257eff5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -23,6 +23,26 @@ | |||
23 | #include "mmu.h" | 23 | #include "mmu.h" |
24 | #include "trace.h" | 24 | #include "trace.h" |
25 | 25 | ||
26 | static u32 xstate_required_size(u64 xstate_bv) | ||
27 | { | ||
28 | int feature_bit = 0; | ||
29 | u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; | ||
30 | |||
31 | xstate_bv &= ~XSTATE_FPSSE; | ||
32 | while (xstate_bv) { | ||
33 | if (xstate_bv & 0x1) { | ||
34 | u32 eax, ebx, ecx, edx; | ||
35 | cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); | ||
36 | ret = max(ret, eax + ebx); | ||
37 | } | ||
38 | |||
39 | xstate_bv >>= 1; | ||
40 | feature_bit++; | ||
41 | } | ||
42 | |||
43 | return ret; | ||
44 | } | ||
45 | |||
26 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) | 46 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) |
27 | { | 47 | { |
28 | struct kvm_cpuid_entry2 *best; | 48 | struct kvm_cpuid_entry2 *best; |
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
46 | apic->lapic_timer.timer_mode_mask = 1 << 17; | 66 | apic->lapic_timer.timer_mode_mask = 1 << 17; |
47 | } | 67 | } |
48 | 68 | ||
69 | best = kvm_find_cpuid_entry(vcpu, 0xD, 0); | ||
70 | if (!best) { | ||
71 | vcpu->arch.guest_supported_xcr0 = 0; | ||
72 | vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; | ||
73 | } else { | ||
74 | vcpu->arch.guest_supported_xcr0 = | ||
75 | (best->eax | ((u64)best->edx << 32)) & | ||
76 | host_xcr0 & KVM_SUPPORTED_XCR0; | ||
77 | vcpu->arch.guest_xstate_size = | ||
78 | xstate_required_size(vcpu->arch.guest_supported_xcr0); | ||
79 | } | ||
80 | |||
49 | kvm_pmu_cpuid_update(vcpu); | 81 | kvm_pmu_cpuid_update(vcpu); |
50 | } | 82 | } |
51 | 83 | ||
@@ -182,13 +214,35 @@ static bool supported_xcr0_bit(unsigned bit) | |||
182 | { | 214 | { |
183 | u64 mask = ((u64)1 << bit); | 215 | u64 mask = ((u64)1 << bit); |
184 | 216 | ||
185 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | 217 | return mask & KVM_SUPPORTED_XCR0 & host_xcr0; |
186 | } | 218 | } |
187 | 219 | ||
188 | #define F(x) bit(X86_FEATURE_##x) | 220 | #define F(x) bit(X86_FEATURE_##x) |
189 | 221 | ||
190 | static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 222 | static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, |
191 | u32 index, int *nent, int maxnent) | 223 | u32 func, u32 index, int *nent, int maxnent) |
224 | { | ||
225 | switch (func) { | ||
226 | case 0: | ||
227 | entry->eax = 1; /* only one leaf currently */ | ||
228 | ++*nent; | ||
229 | break; | ||
230 | case 1: | ||
231 | entry->ecx = F(MOVBE); | ||
232 | ++*nent; | ||
233 | break; | ||
234 | default: | ||
235 | break; | ||
236 | } | ||
237 | |||
238 | entry->function = func; | ||
239 | entry->index = index; | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
245 | u32 index, int *nent, int maxnent) | ||
192 | { | 246 | { |
193 | int r; | 247 | int r; |
194 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | 248 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
@@ -383,6 +437,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
383 | case 0xd: { | 437 | case 0xd: { |
384 | int idx, i; | 438 | int idx, i; |
385 | 439 | ||
440 | entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; | ||
441 | entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; | ||
386 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 442 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
387 | for (idx = 1, i = 1; idx < 64; ++idx) { | 443 | for (idx = 1, i = 1; idx < 64; ++idx) { |
388 | if (*nent >= maxnent) | 444 | if (*nent >= maxnent) |
@@ -481,6 +537,15 @@ out: | |||
481 | return r; | 537 | return r; |
482 | } | 538 | } |
483 | 539 | ||
540 | static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, | ||
541 | u32 idx, int *nent, int maxnent, unsigned int type) | ||
542 | { | ||
543 | if (type == KVM_GET_EMULATED_CPUID) | ||
544 | return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); | ||
545 | |||
546 | return __do_cpuid_ent(entry, func, idx, nent, maxnent); | ||
547 | } | ||
548 | |||
484 | #undef F | 549 | #undef F |
485 | 550 | ||
486 | struct kvm_cpuid_param { | 551 | struct kvm_cpuid_param { |
@@ -495,8 +560,36 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param) | |||
495 | return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; | 560 | return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; |
496 | } | 561 | } |
497 | 562 | ||
498 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | 563 | static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, |
499 | struct kvm_cpuid_entry2 __user *entries) | 564 | __u32 num_entries, unsigned int ioctl_type) |
565 | { | ||
566 | int i; | ||
567 | __u32 pad[3]; | ||
568 | |||
569 | if (ioctl_type != KVM_GET_EMULATED_CPUID) | ||
570 | return false; | ||
571 | |||
572 | /* | ||
573 | * We want to make sure that ->padding is being passed clean from | ||
574 | * userspace in case we want to use it for something in the future. | ||
575 | * | ||
576 | * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we | ||
577 | * have to give ourselves satisfied only with the emulated side. /me | ||
578 | * sheds a tear. | ||
579 | */ | ||
580 | for (i = 0; i < num_entries; i++) { | ||
581 | if (copy_from_user(pad, entries[i].padding, sizeof(pad))) | ||
582 | return true; | ||
583 | |||
584 | if (pad[0] || pad[1] || pad[2]) | ||
585 | return true; | ||
586 | } | ||
587 | return false; | ||
588 | } | ||
589 | |||
590 | int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, | ||
591 | struct kvm_cpuid_entry2 __user *entries, | ||
592 | unsigned int type) | ||
500 | { | 593 | { |
501 | struct kvm_cpuid_entry2 *cpuid_entries; | 594 | struct kvm_cpuid_entry2 *cpuid_entries; |
502 | int limit, nent = 0, r = -E2BIG, i; | 595 | int limit, nent = 0, r = -E2BIG, i; |
@@ -513,8 +606,12 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
513 | goto out; | 606 | goto out; |
514 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | 607 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) |
515 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | 608 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; |
609 | |||
610 | if (sanity_check_entries(entries, cpuid->nent, type)) | ||
611 | return -EINVAL; | ||
612 | |||
516 | r = -ENOMEM; | 613 | r = -ENOMEM; |
517 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | 614 | cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); |
518 | if (!cpuid_entries) | 615 | if (!cpuid_entries) |
519 | goto out; | 616 | goto out; |
520 | 617 | ||
@@ -526,7 +623,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
526 | continue; | 623 | continue; |
527 | 624 | ||
528 | r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, | 625 | r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, |
529 | &nent, cpuid->nent); | 626 | &nent, cpuid->nent, type); |
530 | 627 | ||
531 | if (r) | 628 | if (r) |
532 | goto out_free; | 629 | goto out_free; |
@@ -537,7 +634,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
537 | limit = cpuid_entries[nent - 1].eax; | 634 | limit = cpuid_entries[nent - 1].eax; |
538 | for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) | 635 | for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) |
539 | r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, | 636 | r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, |
540 | &nent, cpuid->nent); | 637 | &nent, cpuid->nent, type); |
541 | 638 | ||
542 | if (r) | 639 | if (r) |
543 | goto out_free; | 640 | goto out_free; |
@@ -661,6 +758,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | |||
661 | *edx = best->edx; | 758 | *edx = best->edx; |
662 | } else | 759 | } else |
663 | *eax = *ebx = *ecx = *edx = 0; | 760 | *eax = *ebx = *ecx = *edx = 0; |
761 | trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); | ||
664 | } | 762 | } |
665 | EXPORT_SYMBOL_GPL(kvm_cpuid); | 763 | EXPORT_SYMBOL_GPL(kvm_cpuid); |
666 | 764 | ||
@@ -676,6 +774,5 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
676 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); | 774 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); |
677 | kvm_register_write(vcpu, VCPU_REGS_RDX, edx); | 775 | kvm_register_write(vcpu, VCPU_REGS_RDX, edx); |
678 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 776 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
679 | trace_kvm_cpuid(function, eax, ebx, ecx, edx); | ||
680 | } | 777 | } |
681 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 778 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b7fd07984888..f1e4895174b2 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -6,8 +6,9 @@ | |||
6 | void kvm_update_cpuid(struct kvm_vcpu *vcpu); | 6 | void kvm_update_cpuid(struct kvm_vcpu *vcpu); |
7 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | 7 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, |
8 | u32 function, u32 index); | 8 | u32 function, u32 index); |
9 | int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | 9 | int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, |
10 | struct kvm_cpuid_entry2 __user *entries); | 10 | struct kvm_cpuid_entry2 __user *entries, |
11 | unsigned int type); | ||
11 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | 12 | int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, |
12 | struct kvm_cpuid *cpuid, | 13 | struct kvm_cpuid *cpuid, |
13 | struct kvm_cpuid_entry __user *entries); | 14 | struct kvm_cpuid_entry __user *entries); |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ddc3f3d2afdb..07ffca0a89e9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -130,7 +130,7 @@ | |||
130 | #define Mov (1<<20) | 130 | #define Mov (1<<20) |
131 | /* Misc flags */ | 131 | /* Misc flags */ |
132 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ | 132 | #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ |
133 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | 133 | #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */ |
134 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ | 134 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ |
135 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ | 135 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ |
136 | #define Undefined (1<<25) /* No Such Instruction */ | 136 | #define Undefined (1<<25) /* No Such Instruction */ |
@@ -785,9 +785,10 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
785 | * @highbyte_regs specifies whether to decode AH,CH,DH,BH. | 785 | * @highbyte_regs specifies whether to decode AH,CH,DH,BH. |
786 | */ | 786 | */ |
787 | static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, | 787 | static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, |
788 | int highbyte_regs) | 788 | int byteop) |
789 | { | 789 | { |
790 | void *p; | 790 | void *p; |
791 | int highbyte_regs = (ctxt->rex_prefix == 0) && byteop; | ||
791 | 792 | ||
792 | if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) | 793 | if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) |
793 | p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; | 794 | p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; |
@@ -1024,7 +1025,6 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
1024 | struct operand *op) | 1025 | struct operand *op) |
1025 | { | 1026 | { |
1026 | unsigned reg = ctxt->modrm_reg; | 1027 | unsigned reg = ctxt->modrm_reg; |
1027 | int highbyte_regs = ctxt->rex_prefix == 0; | ||
1028 | 1028 | ||
1029 | if (!(ctxt->d & ModRM)) | 1029 | if (!(ctxt->d & ModRM)) |
1030 | reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); | 1030 | reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); |
@@ -1045,13 +1045,9 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | op->type = OP_REG; | 1047 | op->type = OP_REG; |
1048 | if (ctxt->d & ByteOp) { | 1048 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
1049 | op->addr.reg = decode_register(ctxt, reg, highbyte_regs); | 1049 | op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp); |
1050 | op->bytes = 1; | 1050 | |
1051 | } else { | ||
1052 | op->addr.reg = decode_register(ctxt, reg, 0); | ||
1053 | op->bytes = ctxt->op_bytes; | ||
1054 | } | ||
1055 | fetch_register_operand(op); | 1051 | fetch_register_operand(op); |
1056 | op->orig_val = op->val; | 1052 | op->orig_val = op->val; |
1057 | } | 1053 | } |
@@ -1082,12 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1082 | ctxt->modrm_seg = VCPU_SREG_DS; | 1078 | ctxt->modrm_seg = VCPU_SREG_DS; |
1083 | 1079 | ||
1084 | if (ctxt->modrm_mod == 3) { | 1080 | if (ctxt->modrm_mod == 3) { |
1085 | int highbyte_regs = ctxt->rex_prefix == 0; | ||
1086 | |||
1087 | op->type = OP_REG; | 1081 | op->type = OP_REG; |
1088 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 1082 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
1089 | op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, | 1083 | op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, |
1090 | highbyte_regs && (ctxt->d & ByteOp)); | 1084 | ctxt->d & ByteOp); |
1091 | if (ctxt->d & Sse) { | 1085 | if (ctxt->d & Sse) { |
1092 | op->type = OP_XMM; | 1086 | op->type = OP_XMM; |
1093 | op->bytes = 16; | 1087 | op->bytes = 16; |
@@ -2961,6 +2955,46 @@ static int em_mov(struct x86_emulate_ctxt *ctxt) | |||
2961 | return X86EMUL_CONTINUE; | 2955 | return X86EMUL_CONTINUE; |
2962 | } | 2956 | } |
2963 | 2957 | ||
2958 | #define FFL(x) bit(X86_FEATURE_##x) | ||
2959 | |||
2960 | static int em_movbe(struct x86_emulate_ctxt *ctxt) | ||
2961 | { | ||
2962 | u32 ebx, ecx, edx, eax = 1; | ||
2963 | u16 tmp; | ||
2964 | |||
2965 | /* | ||
2966 | * Check MOVBE is set in the guest-visible CPUID leaf. | ||
2967 | */ | ||
2968 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | ||
2969 | if (!(ecx & FFL(MOVBE))) | ||
2970 | return emulate_ud(ctxt); | ||
2971 | |||
2972 | switch (ctxt->op_bytes) { | ||
2973 | case 2: | ||
2974 | /* | ||
2975 | * From MOVBE definition: "...When the operand size is 16 bits, | ||
2976 | * the upper word of the destination register remains unchanged | ||
2977 | * ..." | ||
2978 | * | ||
2979 | * Both casting ->valptr and ->val to u16 breaks strict aliasing | ||
2980 | * rules so we have to do the operation almost per hand. | ||
2981 | */ | ||
2982 | tmp = (u16)ctxt->src.val; | ||
2983 | ctxt->dst.val &= ~0xffffUL; | ||
2984 | ctxt->dst.val |= (unsigned long)swab16(tmp); | ||
2985 | break; | ||
2986 | case 4: | ||
2987 | ctxt->dst.val = swab32((u32)ctxt->src.val); | ||
2988 | break; | ||
2989 | case 8: | ||
2990 | ctxt->dst.val = swab64(ctxt->src.val); | ||
2991 | break; | ||
2992 | default: | ||
2993 | return X86EMUL_PROPAGATE_FAULT; | ||
2994 | } | ||
2995 | return X86EMUL_CONTINUE; | ||
2996 | } | ||
2997 | |||
2964 | static int em_cr_write(struct x86_emulate_ctxt *ctxt) | 2998 | static int em_cr_write(struct x86_emulate_ctxt *ctxt) |
2965 | { | 2999 | { |
2966 | if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) | 3000 | if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) |
@@ -3256,6 +3290,18 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) | |||
3256 | return X86EMUL_CONTINUE; | 3290 | return X86EMUL_CONTINUE; |
3257 | } | 3291 | } |
3258 | 3292 | ||
3293 | static int em_sahf(struct x86_emulate_ctxt *ctxt) | ||
3294 | { | ||
3295 | u32 flags; | ||
3296 | |||
3297 | flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; | ||
3298 | flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; | ||
3299 | |||
3300 | ctxt->eflags &= ~0xffUL; | ||
3301 | ctxt->eflags |= flags | X86_EFLAGS_FIXED; | ||
3302 | return X86EMUL_CONTINUE; | ||
3303 | } | ||
3304 | |||
3259 | static int em_lahf(struct x86_emulate_ctxt *ctxt) | 3305 | static int em_lahf(struct x86_emulate_ctxt *ctxt) |
3260 | { | 3306 | { |
3261 | *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; | 3307 | *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; |
@@ -3502,7 +3548,7 @@ static const struct opcode group7_rm1[] = { | |||
3502 | 3548 | ||
3503 | static const struct opcode group7_rm3[] = { | 3549 | static const struct opcode group7_rm3[] = { |
3504 | DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), | 3550 | DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), |
3505 | II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), | 3551 | II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), |
3506 | DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), | 3552 | DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), |
3507 | DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), | 3553 | DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), |
3508 | DIP(SrcNone | Prot | Priv, stgi, check_svme), | 3554 | DIP(SrcNone | Prot | Priv, stgi, check_svme), |
@@ -3587,7 +3633,7 @@ static const struct group_dual group7 = { { | |||
3587 | II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), | 3633 | II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), |
3588 | II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), | 3634 | II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), |
3589 | }, { | 3635 | }, { |
3590 | I(SrcNone | Priv | VendorSpecific, em_vmcall), | 3636 | I(SrcNone | Priv | EmulateOnUD, em_vmcall), |
3591 | EXT(0, group7_rm1), | 3637 | EXT(0, group7_rm1), |
3592 | N, EXT(0, group7_rm3), | 3638 | N, EXT(0, group7_rm3), |
3593 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, | 3639 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, |
@@ -3750,7 +3796,8 @@ static const struct opcode opcode_table[256] = { | |||
3750 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), | 3796 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), |
3751 | I(SrcImmFAddr | No64, em_call_far), N, | 3797 | I(SrcImmFAddr | No64, em_call_far), N, |
3752 | II(ImplicitOps | Stack, em_pushf, pushf), | 3798 | II(ImplicitOps | Stack, em_pushf, pushf), |
3753 | II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), | 3799 | II(ImplicitOps | Stack, em_popf, popf), |
3800 | I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf), | ||
3754 | /* 0xA0 - 0xA7 */ | 3801 | /* 0xA0 - 0xA7 */ |
3755 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3802 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
3756 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), | 3803 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
@@ -3810,7 +3857,7 @@ static const struct opcode opcode_table[256] = { | |||
3810 | static const struct opcode twobyte_table[256] = { | 3857 | static const struct opcode twobyte_table[256] = { |
3811 | /* 0x00 - 0x0F */ | 3858 | /* 0x00 - 0x0F */ |
3812 | G(0, group6), GD(0, &group7), N, N, | 3859 | G(0, group6), GD(0, &group7), N, N, |
3813 | N, I(ImplicitOps | VendorSpecific, em_syscall), | 3860 | N, I(ImplicitOps | EmulateOnUD, em_syscall), |
3814 | II(ImplicitOps | Priv, em_clts, clts), N, | 3861 | II(ImplicitOps | Priv, em_clts, clts), N, |
3815 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, | 3862 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
3816 | N, D(ImplicitOps | ModRM), N, N, | 3863 | N, D(ImplicitOps | ModRM), N, N, |
@@ -3830,8 +3877,8 @@ static const struct opcode twobyte_table[256] = { | |||
3830 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), | 3877 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
3831 | II(ImplicitOps | Priv, em_rdmsr, rdmsr), | 3878 | II(ImplicitOps | Priv, em_rdmsr, rdmsr), |
3832 | IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), | 3879 | IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), |
3833 | I(ImplicitOps | VendorSpecific, em_sysenter), | 3880 | I(ImplicitOps | EmulateOnUD, em_sysenter), |
3834 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), | 3881 | I(ImplicitOps | Priv | EmulateOnUD, em_sysexit), |
3835 | N, N, | 3882 | N, N, |
3836 | N, N, N, N, N, N, N, N, | 3883 | N, N, N, N, N, N, N, N, |
3837 | /* 0x40 - 0x4F */ | 3884 | /* 0x40 - 0x4F */ |
@@ -3892,6 +3939,30 @@ static const struct opcode twobyte_table[256] = { | |||
3892 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N | 3939 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N |
3893 | }; | 3940 | }; |
3894 | 3941 | ||
3942 | static const struct gprefix three_byte_0f_38_f0 = { | ||
3943 | I(DstReg | SrcMem | Mov, em_movbe), N, N, N | ||
3944 | }; | ||
3945 | |||
3946 | static const struct gprefix three_byte_0f_38_f1 = { | ||
3947 | I(DstMem | SrcReg | Mov, em_movbe), N, N, N | ||
3948 | }; | ||
3949 | |||
3950 | /* | ||
3951 | * Insns below are selected by the prefix which indexed by the third opcode | ||
3952 | * byte. | ||
3953 | */ | ||
3954 | static const struct opcode opcode_map_0f_38[256] = { | ||
3955 | /* 0x00 - 0x7f */ | ||
3956 | X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), | ||
3957 | /* 0x80 - 0xef */ | ||
3958 | X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), | ||
3959 | /* 0xf0 - 0xf1 */ | ||
3960 | GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), | ||
3961 | GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), | ||
3962 | /* 0xf2 - 0xff */ | ||
3963 | N, N, X4(N), X8(N) | ||
3964 | }; | ||
3965 | |||
3895 | #undef D | 3966 | #undef D |
3896 | #undef N | 3967 | #undef N |
3897 | #undef G | 3968 | #undef G |
@@ -4040,7 +4111,8 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4040 | case OpMem8: | 4111 | case OpMem8: |
4041 | ctxt->memop.bytes = 1; | 4112 | ctxt->memop.bytes = 1; |
4042 | if (ctxt->memop.type == OP_REG) { | 4113 | if (ctxt->memop.type == OP_REG) { |
4043 | ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); | 4114 | ctxt->memop.addr.reg = decode_register(ctxt, |
4115 | ctxt->modrm_rm, true); | ||
4044 | fetch_register_operand(&ctxt->memop); | 4116 | fetch_register_operand(&ctxt->memop); |
4045 | } | 4117 | } |
4046 | goto mem_common; | 4118 | goto mem_common; |
@@ -4126,6 +4198,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
4126 | ctxt->_eip = ctxt->eip; | 4198 | ctxt->_eip = ctxt->eip; |
4127 | ctxt->fetch.start = ctxt->_eip; | 4199 | ctxt->fetch.start = ctxt->_eip; |
4128 | ctxt->fetch.end = ctxt->fetch.start + insn_len; | 4200 | ctxt->fetch.end = ctxt->fetch.start + insn_len; |
4201 | ctxt->opcode_len = 1; | ||
4129 | if (insn_len > 0) | 4202 | if (insn_len > 0) |
4130 | memcpy(ctxt->fetch.data, insn, insn_len); | 4203 | memcpy(ctxt->fetch.data, insn, insn_len); |
4131 | 4204 | ||
@@ -4208,9 +4281,16 @@ done_prefixes: | |||
4208 | opcode = opcode_table[ctxt->b]; | 4281 | opcode = opcode_table[ctxt->b]; |
4209 | /* Two-byte opcode? */ | 4282 | /* Two-byte opcode? */ |
4210 | if (ctxt->b == 0x0f) { | 4283 | if (ctxt->b == 0x0f) { |
4211 | ctxt->twobyte = 1; | 4284 | ctxt->opcode_len = 2; |
4212 | ctxt->b = insn_fetch(u8, ctxt); | 4285 | ctxt->b = insn_fetch(u8, ctxt); |
4213 | opcode = twobyte_table[ctxt->b]; | 4286 | opcode = twobyte_table[ctxt->b]; |
4287 | |||
4288 | /* 0F_38 opcode map */ | ||
4289 | if (ctxt->b == 0x38) { | ||
4290 | ctxt->opcode_len = 3; | ||
4291 | ctxt->b = insn_fetch(u8, ctxt); | ||
4292 | opcode = opcode_map_0f_38[ctxt->b]; | ||
4293 | } | ||
4214 | } | 4294 | } |
4215 | ctxt->d = opcode.flags; | 4295 | ctxt->d = opcode.flags; |
4216 | 4296 | ||
@@ -4267,7 +4347,7 @@ done_prefixes: | |||
4267 | if (ctxt->d == 0 || (ctxt->d & NotImpl)) | 4347 | if (ctxt->d == 0 || (ctxt->d & NotImpl)) |
4268 | return EMULATION_FAILED; | 4348 | return EMULATION_FAILED; |
4269 | 4349 | ||
4270 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | 4350 | if (!(ctxt->d & EmulateOnUD) && ctxt->ud) |
4271 | return EMULATION_FAILED; | 4351 | return EMULATION_FAILED; |
4272 | 4352 | ||
4273 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) | 4353 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) |
@@ -4540,8 +4620,10 @@ special_insn: | |||
4540 | goto writeback; | 4620 | goto writeback; |
4541 | } | 4621 | } |
4542 | 4622 | ||
4543 | if (ctxt->twobyte) | 4623 | if (ctxt->opcode_len == 2) |
4544 | goto twobyte_insn; | 4624 | goto twobyte_insn; |
4625 | else if (ctxt->opcode_len == 3) | ||
4626 | goto threebyte_insn; | ||
4545 | 4627 | ||
4546 | switch (ctxt->b) { | 4628 | switch (ctxt->b) { |
4547 | case 0x63: /* movsxd */ | 4629 | case 0x63: /* movsxd */ |
@@ -4726,6 +4808,8 @@ twobyte_insn: | |||
4726 | goto cannot_emulate; | 4808 | goto cannot_emulate; |
4727 | } | 4809 | } |
4728 | 4810 | ||
4811 | threebyte_insn: | ||
4812 | |||
4729 | if (rc != X86EMUL_CONTINUE) | 4813 | if (rc != X86EMUL_CONTINUE) |
4730 | goto done; | 4814 | goto done; |
4731 | 4815 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dce0df8150df..40772ef0f2b1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2570 | kvm_release_pfn_clean(pfn); | 2570 | kvm_release_pfn_clean(pfn); |
2571 | } | 2571 | } |
2572 | 2572 | ||
2573 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | ||
2574 | { | ||
2575 | mmu_free_roots(vcpu); | ||
2576 | } | ||
2577 | |||
2578 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2573 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
2579 | bool no_dirty_log) | 2574 | bool no_dirty_log) |
2580 | { | 2575 | { |
@@ -3424,18 +3419,11 @@ out_unlock: | |||
3424 | return 0; | 3419 | return 0; |
3425 | } | 3420 | } |
3426 | 3421 | ||
3427 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 3422 | static void nonpaging_init_context(struct kvm_vcpu *vcpu, |
3428 | { | 3423 | struct kvm_mmu *context) |
3429 | mmu_free_roots(vcpu); | ||
3430 | } | ||
3431 | |||
3432 | static int nonpaging_init_context(struct kvm_vcpu *vcpu, | ||
3433 | struct kvm_mmu *context) | ||
3434 | { | 3424 | { |
3435 | context->new_cr3 = nonpaging_new_cr3; | ||
3436 | context->page_fault = nonpaging_page_fault; | 3425 | context->page_fault = nonpaging_page_fault; |
3437 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 3426 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
3438 | context->free = nonpaging_free; | ||
3439 | context->sync_page = nonpaging_sync_page; | 3427 | context->sync_page = nonpaging_sync_page; |
3440 | context->invlpg = nonpaging_invlpg; | 3428 | context->invlpg = nonpaging_invlpg; |
3441 | context->update_pte = nonpaging_update_pte; | 3429 | context->update_pte = nonpaging_update_pte; |
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu, | |||
3444 | context->root_hpa = INVALID_PAGE; | 3432 | context->root_hpa = INVALID_PAGE; |
3445 | context->direct_map = true; | 3433 | context->direct_map = true; |
3446 | context->nx = false; | 3434 | context->nx = false; |
3447 | return 0; | ||
3448 | } | 3435 | } |
3449 | 3436 | ||
3450 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3437 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
3454 | } | 3441 | } |
3455 | EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); | 3442 | EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); |
3456 | 3443 | ||
3457 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 3444 | void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) |
3458 | { | 3445 | { |
3459 | pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); | ||
3460 | mmu_free_roots(vcpu); | 3446 | mmu_free_roots(vcpu); |
3461 | } | 3447 | } |
3462 | 3448 | ||
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, | |||
3471 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); | 3457 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
3472 | } | 3458 | } |
3473 | 3459 | ||
3474 | static void paging_free(struct kvm_vcpu *vcpu) | ||
3475 | { | ||
3476 | nonpaging_free(vcpu); | ||
3477 | } | ||
3478 | |||
3479 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | 3460 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, |
3480 | unsigned access, int *nr_present) | 3461 | unsigned access, int *nr_present) |
3481 | { | 3462 | { |
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | |||
3665 | mmu->last_pte_bitmap = map; | 3646 | mmu->last_pte_bitmap = map; |
3666 | } | 3647 | } |
3667 | 3648 | ||
3668 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, | 3649 | static void paging64_init_context_common(struct kvm_vcpu *vcpu, |
3669 | struct kvm_mmu *context, | 3650 | struct kvm_mmu *context, |
3670 | int level) | 3651 | int level) |
3671 | { | 3652 | { |
3672 | context->nx = is_nx(vcpu); | 3653 | context->nx = is_nx(vcpu); |
3673 | context->root_level = level; | 3654 | context->root_level = level; |
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3677 | update_last_pte_bitmap(vcpu, context); | 3658 | update_last_pte_bitmap(vcpu, context); |
3678 | 3659 | ||
3679 | ASSERT(is_pae(vcpu)); | 3660 | ASSERT(is_pae(vcpu)); |
3680 | context->new_cr3 = paging_new_cr3; | ||
3681 | context->page_fault = paging64_page_fault; | 3661 | context->page_fault = paging64_page_fault; |
3682 | context->gva_to_gpa = paging64_gva_to_gpa; | 3662 | context->gva_to_gpa = paging64_gva_to_gpa; |
3683 | context->sync_page = paging64_sync_page; | 3663 | context->sync_page = paging64_sync_page; |
3684 | context->invlpg = paging64_invlpg; | 3664 | context->invlpg = paging64_invlpg; |
3685 | context->update_pte = paging64_update_pte; | 3665 | context->update_pte = paging64_update_pte; |
3686 | context->free = paging_free; | ||
3687 | context->shadow_root_level = level; | 3666 | context->shadow_root_level = level; |
3688 | context->root_hpa = INVALID_PAGE; | 3667 | context->root_hpa = INVALID_PAGE; |
3689 | context->direct_map = false; | 3668 | context->direct_map = false; |
3690 | return 0; | ||
3691 | } | 3669 | } |
3692 | 3670 | ||
3693 | static int paging64_init_context(struct kvm_vcpu *vcpu, | 3671 | static void paging64_init_context(struct kvm_vcpu *vcpu, |
3694 | struct kvm_mmu *context) | 3672 | struct kvm_mmu *context) |
3695 | { | 3673 | { |
3696 | return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); | 3674 | paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); |
3697 | } | 3675 | } |
3698 | 3676 | ||
3699 | static int paging32_init_context(struct kvm_vcpu *vcpu, | 3677 | static void paging32_init_context(struct kvm_vcpu *vcpu, |
3700 | struct kvm_mmu *context) | 3678 | struct kvm_mmu *context) |
3701 | { | 3679 | { |
3702 | context->nx = false; | 3680 | context->nx = false; |
3703 | context->root_level = PT32_ROOT_LEVEL; | 3681 | context->root_level = PT32_ROOT_LEVEL; |
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3706 | update_permission_bitmask(vcpu, context, false); | 3684 | update_permission_bitmask(vcpu, context, false); |
3707 | update_last_pte_bitmap(vcpu, context); | 3685 | update_last_pte_bitmap(vcpu, context); |
3708 | 3686 | ||
3709 | context->new_cr3 = paging_new_cr3; | ||
3710 | context->page_fault = paging32_page_fault; | 3687 | context->page_fault = paging32_page_fault; |
3711 | context->gva_to_gpa = paging32_gva_to_gpa; | 3688 | context->gva_to_gpa = paging32_gva_to_gpa; |
3712 | context->free = paging_free; | ||
3713 | context->sync_page = paging32_sync_page; | 3689 | context->sync_page = paging32_sync_page; |
3714 | context->invlpg = paging32_invlpg; | 3690 | context->invlpg = paging32_invlpg; |
3715 | context->update_pte = paging32_update_pte; | 3691 | context->update_pte = paging32_update_pte; |
3716 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 3692 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
3717 | context->root_hpa = INVALID_PAGE; | 3693 | context->root_hpa = INVALID_PAGE; |
3718 | context->direct_map = false; | 3694 | context->direct_map = false; |
3719 | return 0; | ||
3720 | } | 3695 | } |
3721 | 3696 | ||
3722 | static int paging32E_init_context(struct kvm_vcpu *vcpu, | 3697 | static void paging32E_init_context(struct kvm_vcpu *vcpu, |
3723 | struct kvm_mmu *context) | 3698 | struct kvm_mmu *context) |
3724 | { | 3699 | { |
3725 | return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); | 3700 | paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); |
3726 | } | 3701 | } |
3727 | 3702 | ||
3728 | static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | 3703 | static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) |
3729 | { | 3704 | { |
3730 | struct kvm_mmu *context = vcpu->arch.walk_mmu; | 3705 | struct kvm_mmu *context = vcpu->arch.walk_mmu; |
3731 | 3706 | ||
3732 | context->base_role.word = 0; | 3707 | context->base_role.word = 0; |
3733 | context->new_cr3 = nonpaging_new_cr3; | ||
3734 | context->page_fault = tdp_page_fault; | 3708 | context->page_fault = tdp_page_fault; |
3735 | context->free = nonpaging_free; | ||
3736 | context->sync_page = nonpaging_sync_page; | 3709 | context->sync_page = nonpaging_sync_page; |
3737 | context->invlpg = nonpaging_invlpg; | 3710 | context->invlpg = nonpaging_invlpg; |
3738 | context->update_pte = nonpaging_update_pte; | 3711 | context->update_pte = nonpaging_update_pte; |
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3767 | 3740 | ||
3768 | update_permission_bitmask(vcpu, context, false); | 3741 | update_permission_bitmask(vcpu, context, false); |
3769 | update_last_pte_bitmap(vcpu, context); | 3742 | update_last_pte_bitmap(vcpu, context); |
3770 | |||
3771 | return 0; | ||
3772 | } | 3743 | } |
3773 | 3744 | ||
3774 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | 3745 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) |
3775 | { | 3746 | { |
3776 | int r; | ||
3777 | bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | 3747 | bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); |
3778 | ASSERT(vcpu); | 3748 | ASSERT(vcpu); |
3779 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3749 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3780 | 3750 | ||
3781 | if (!is_paging(vcpu)) | 3751 | if (!is_paging(vcpu)) |
3782 | r = nonpaging_init_context(vcpu, context); | 3752 | nonpaging_init_context(vcpu, context); |
3783 | else if (is_long_mode(vcpu)) | 3753 | else if (is_long_mode(vcpu)) |
3784 | r = paging64_init_context(vcpu, context); | 3754 | paging64_init_context(vcpu, context); |
3785 | else if (is_pae(vcpu)) | 3755 | else if (is_pae(vcpu)) |
3786 | r = paging32E_init_context(vcpu, context); | 3756 | paging32E_init_context(vcpu, context); |
3787 | else | 3757 | else |
3788 | r = paging32_init_context(vcpu, context); | 3758 | paging32_init_context(vcpu, context); |
3789 | 3759 | ||
3790 | vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); | 3760 | vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); |
3791 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 3761 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
3792 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 3762 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); |
3793 | vcpu->arch.mmu.base_role.smep_andnot_wp | 3763 | vcpu->arch.mmu.base_role.smep_andnot_wp |
3794 | = smep && !is_write_protection(vcpu); | 3764 | = smep && !is_write_protection(vcpu); |
3795 | |||
3796 | return r; | ||
3797 | } | 3765 | } |
3798 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | 3766 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); |
3799 | 3767 | ||
3800 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | 3768 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, |
3801 | bool execonly) | 3769 | bool execonly) |
3802 | { | 3770 | { |
3803 | ASSERT(vcpu); | 3771 | ASSERT(vcpu); |
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | |||
3806 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 3774 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
3807 | 3775 | ||
3808 | context->nx = true; | 3776 | context->nx = true; |
3809 | context->new_cr3 = paging_new_cr3; | ||
3810 | context->page_fault = ept_page_fault; | 3777 | context->page_fault = ept_page_fault; |
3811 | context->gva_to_gpa = ept_gva_to_gpa; | 3778 | context->gva_to_gpa = ept_gva_to_gpa; |
3812 | context->sync_page = ept_sync_page; | 3779 | context->sync_page = ept_sync_page; |
3813 | context->invlpg = ept_invlpg; | 3780 | context->invlpg = ept_invlpg; |
3814 | context->update_pte = ept_update_pte; | 3781 | context->update_pte = ept_update_pte; |
3815 | context->free = paging_free; | ||
3816 | context->root_level = context->shadow_root_level; | 3782 | context->root_level = context->shadow_root_level; |
3817 | context->root_hpa = INVALID_PAGE; | 3783 | context->root_hpa = INVALID_PAGE; |
3818 | context->direct_map = false; | 3784 | context->direct_map = false; |
3819 | 3785 | ||
3820 | update_permission_bitmask(vcpu, context, true); | 3786 | update_permission_bitmask(vcpu, context, true); |
3821 | reset_rsvds_bits_mask_ept(vcpu, context, execonly); | 3787 | reset_rsvds_bits_mask_ept(vcpu, context, execonly); |
3822 | |||
3823 | return 0; | ||
3824 | } | 3788 | } |
3825 | EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); | 3789 | EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); |
3826 | 3790 | ||
3827 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | 3791 | static void init_kvm_softmmu(struct kvm_vcpu *vcpu) |
3828 | { | 3792 | { |
3829 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); | 3793 | kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); |
3830 | |||
3831 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; | 3794 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; |
3832 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; | 3795 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; |
3833 | vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; | 3796 | vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; |
3834 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | 3797 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; |
3835 | |||
3836 | return r; | ||
3837 | } | 3798 | } |
3838 | 3799 | ||
3839 | static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | 3800 | static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) |
3840 | { | 3801 | { |
3841 | struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; | 3802 | struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; |
3842 | 3803 | ||
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3873 | 3834 | ||
3874 | update_permission_bitmask(vcpu, g_context, false); | 3835 | update_permission_bitmask(vcpu, g_context, false); |
3875 | update_last_pte_bitmap(vcpu, g_context); | 3836 | update_last_pte_bitmap(vcpu, g_context); |
3876 | |||
3877 | return 0; | ||
3878 | } | 3837 | } |
3879 | 3838 | ||
3880 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 3839 | static void init_kvm_mmu(struct kvm_vcpu *vcpu) |
3881 | { | 3840 | { |
3882 | if (mmu_is_nested(vcpu)) | 3841 | if (mmu_is_nested(vcpu)) |
3883 | return init_kvm_nested_mmu(vcpu); | 3842 | return init_kvm_nested_mmu(vcpu); |
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) | |||
3887 | return init_kvm_softmmu(vcpu); | 3846 | return init_kvm_softmmu(vcpu); |
3888 | } | 3847 | } |
3889 | 3848 | ||
3890 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 3849 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
3891 | { | 3850 | { |
3892 | ASSERT(vcpu); | 3851 | ASSERT(vcpu); |
3893 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
3894 | /* mmu.free() should set root_hpa = INVALID_PAGE */ | ||
3895 | vcpu->arch.mmu.free(vcpu); | ||
3896 | } | ||
3897 | 3852 | ||
3898 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 3853 | kvm_mmu_unload(vcpu); |
3899 | { | 3854 | init_kvm_mmu(vcpu); |
3900 | destroy_kvm_mmu(vcpu); | ||
3901 | return init_kvm_mmu(vcpu); | ||
3902 | } | 3855 | } |
3903 | EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); | 3856 | EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); |
3904 | 3857 | ||
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); | |||
3923 | void kvm_mmu_unload(struct kvm_vcpu *vcpu) | 3876 | void kvm_mmu_unload(struct kvm_vcpu *vcpu) |
3924 | { | 3877 | { |
3925 | mmu_free_roots(vcpu); | 3878 | mmu_free_roots(vcpu); |
3879 | WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
3926 | } | 3880 | } |
3927 | EXPORT_SYMBOL_GPL(kvm_mmu_unload); | 3881 | EXPORT_SYMBOL_GPL(kvm_mmu_unload); |
3928 | 3882 | ||
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) | |||
4281 | return alloc_mmu_pages(vcpu); | 4235 | return alloc_mmu_pages(vcpu); |
4282 | } | 4236 | } |
4283 | 4237 | ||
4284 | int kvm_mmu_setup(struct kvm_vcpu *vcpu) | 4238 | void kvm_mmu_setup(struct kvm_vcpu *vcpu) |
4285 | { | 4239 | { |
4286 | ASSERT(vcpu); | 4240 | ASSERT(vcpu); |
4287 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 4241 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
4288 | 4242 | ||
4289 | return init_kvm_mmu(vcpu); | 4243 | init_kvm_mmu(vcpu); |
4290 | } | 4244 | } |
4291 | 4245 | ||
4292 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 4246 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
4428 | int nr_to_scan = sc->nr_to_scan; | 4382 | int nr_to_scan = sc->nr_to_scan; |
4429 | unsigned long freed = 0; | 4383 | unsigned long freed = 0; |
4430 | 4384 | ||
4431 | raw_spin_lock(&kvm_lock); | 4385 | spin_lock(&kvm_lock); |
4432 | 4386 | ||
4433 | list_for_each_entry(kvm, &vm_list, vm_list) { | 4387 | list_for_each_entry(kvm, &vm_list, vm_list) { |
4434 | int idx; | 4388 | int idx; |
@@ -4478,9 +4432,8 @@ unlock: | |||
4478 | break; | 4432 | break; |
4479 | } | 4433 | } |
4480 | 4434 | ||
4481 | raw_spin_unlock(&kvm_lock); | 4435 | spin_unlock(&kvm_lock); |
4482 | return freed; | 4436 | return freed; |
4483 | |||
4484 | } | 4437 | } |
4485 | 4438 | ||
4486 | static unsigned long | 4439 | static unsigned long |
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
4574 | { | 4527 | { |
4575 | ASSERT(vcpu); | 4528 | ASSERT(vcpu); |
4576 | 4529 | ||
4577 | destroy_kvm_mmu(vcpu); | 4530 | kvm_mmu_unload(vcpu); |
4578 | free_mmu_pages(vcpu); | 4531 | free_mmu_pages(vcpu); |
4579 | mmu_free_memory_caches(vcpu); | 4532 | mmu_free_memory_caches(vcpu); |
4580 | } | 4533 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 77e044a0f5f7..292615274358 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -70,8 +70,8 @@ enum { | |||
70 | }; | 70 | }; |
71 | 71 | ||
72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 73 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
74 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | 74 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, |
75 | bool execonly); | 75 | bool execonly); |
76 | 76 | ||
77 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 77 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c0bc80391e40..c7168a5cff1b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, | |||
1959 | nested_svm_vmexit(svm); | 1959 | nested_svm_vmexit(svm); |
1960 | } | 1960 | } |
1961 | 1961 | ||
1962 | static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | 1962 | static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) |
1963 | { | 1963 | { |
1964 | int r; | 1964 | kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); |
1965 | |||
1966 | r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); | ||
1967 | 1965 | ||
1968 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; | 1966 | vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; |
1969 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; | 1967 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; |
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | |||
1971 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; | 1969 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; |
1972 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); | 1970 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); |
1973 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | 1971 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
1974 | |||
1975 | return r; | ||
1976 | } | 1972 | } |
1977 | 1973 | ||
1978 | static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) | 1974 | static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2b2fce1b2009..b2fe1c252f35 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1498,7 +1498,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1498 | break; | 1498 | break; |
1499 | 1499 | ||
1500 | if (i == NR_AUTOLOAD_MSRS) { | 1500 | if (i == NR_AUTOLOAD_MSRS) { |
1501 | printk_once(KERN_WARNING"Not enough mst switch entries. " | 1501 | printk_once(KERN_WARNING "Not enough msr switch entries. " |
1502 | "Can't add msr %x\n", msr); | 1502 | "Can't add msr %x\n", msr); |
1503 | return; | 1503 | return; |
1504 | } else if (i == m->nr) { | 1504 | } else if (i == m->nr) { |
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
1898 | /* | 1898 | /* |
1899 | * KVM wants to inject page-faults which it got to the guest. This function | 1899 | * KVM wants to inject page-faults which it got to the guest. This function |
1900 | * checks whether in a nested guest, we need to inject them to L1 or L2. | 1900 | * checks whether in a nested guest, we need to inject them to L1 or L2. |
1901 | * This function assumes it is called with the exit reason in vmcs02 being | ||
1902 | * a #PF exception (this is the only case in which KVM injects a #PF when L2 | ||
1903 | * is running). | ||
1904 | */ | 1901 | */ |
1905 | static int nested_pf_handled(struct kvm_vcpu *vcpu) | 1902 | static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) |
1906 | { | 1903 | { |
1907 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 1904 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
1908 | 1905 | ||
1909 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | 1906 | if (!(vmcs12->exception_bitmap & (1u << nr))) |
1910 | if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR))) | ||
1911 | return 0; | 1907 | return 0; |
1912 | 1908 | ||
1913 | nested_vmx_vmexit(vcpu); | 1909 | nested_vmx_vmexit(vcpu); |
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1921 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1917 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1922 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 1918 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
1923 | 1919 | ||
1924 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && | 1920 | if (!reinject && is_guest_mode(vcpu) && |
1925 | !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) | 1921 | nested_vmx_check_exception(vcpu, nr)) |
1926 | return; | 1922 | return; |
1927 | 1923 | ||
1928 | if (has_error_code) { | 1924 | if (has_error_code) { |
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2204 | #ifdef CONFIG_X86_64 | 2200 | #ifdef CONFIG_X86_64 |
2205 | VM_EXIT_HOST_ADDR_SPACE_SIZE | | 2201 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
2206 | #endif | 2202 | #endif |
2207 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; | 2203 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | |
2204 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | ||
2205 | if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || | ||
2206 | !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { | ||
2207 | nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | ||
2208 | nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER; | ||
2209 | } | ||
2208 | nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | 2210 | nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | |
2209 | VM_EXIT_LOAD_IA32_EFER); | 2211 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER); |
2210 | 2212 | ||
2211 | /* entry controls */ | 2213 | /* entry controls */ |
2212 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2214 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2226,7 +2228,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2226 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); | 2228 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); |
2227 | nested_vmx_procbased_ctls_low = 0; | 2229 | nested_vmx_procbased_ctls_low = 0; |
2228 | nested_vmx_procbased_ctls_high &= | 2230 | nested_vmx_procbased_ctls_high &= |
2229 | CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | | 2231 | CPU_BASED_VIRTUAL_INTR_PENDING | |
2232 | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | | ||
2230 | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | | 2233 | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | |
2231 | CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | | 2234 | CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | |
2232 | CPU_BASED_CR3_STORE_EXITING | | 2235 | CPU_BASED_CR3_STORE_EXITING | |
@@ -2252,13 +2255,15 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2252 | nested_vmx_secondary_ctls_low = 0; | 2255 | nested_vmx_secondary_ctls_low = 0; |
2253 | nested_vmx_secondary_ctls_high &= | 2256 | nested_vmx_secondary_ctls_high &= |
2254 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2257 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2258 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | ||
2255 | SECONDARY_EXEC_WBINVD_EXITING; | 2259 | SECONDARY_EXEC_WBINVD_EXITING; |
2256 | 2260 | ||
2257 | if (enable_ept) { | 2261 | if (enable_ept) { |
2258 | /* nested EPT: emulate EPT also to L1 */ | 2262 | /* nested EPT: emulate EPT also to L1 */ |
2259 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; | 2263 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; |
2260 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | 2264 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | |
2261 | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; | 2265 | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | |
2266 | VMX_EPT_INVEPT_BIT; | ||
2262 | nested_vmx_ept_caps &= vmx_capability.ept; | 2267 | nested_vmx_ept_caps &= vmx_capability.ept; |
2263 | /* | 2268 | /* |
2264 | * Since invept is completely emulated we support both global | 2269 | * Since invept is completely emulated we support both global |
@@ -3380,8 +3385,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
3380 | if (enable_ept) { | 3385 | if (enable_ept) { |
3381 | eptp = construct_eptp(cr3); | 3386 | eptp = construct_eptp(cr3); |
3382 | vmcs_write64(EPT_POINTER, eptp); | 3387 | vmcs_write64(EPT_POINTER, eptp); |
3383 | guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : | 3388 | if (is_paging(vcpu) || is_guest_mode(vcpu)) |
3384 | vcpu->kvm->arch.ept_identity_map_addr; | 3389 | guest_cr3 = kvm_read_cr3(vcpu); |
3390 | else | ||
3391 | guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr; | ||
3385 | ept_load_pdptrs(vcpu); | 3392 | ept_load_pdptrs(vcpu); |
3386 | } | 3393 | } |
3387 | 3394 | ||
@@ -4879,6 +4886,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
4879 | hypercall[2] = 0xc1; | 4886 | hypercall[2] = 0xc1; |
4880 | } | 4887 | } |
4881 | 4888 | ||
4889 | static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val) | ||
4890 | { | ||
4891 | unsigned long always_on = VMXON_CR0_ALWAYSON; | ||
4892 | |||
4893 | if (nested_vmx_secondary_ctls_high & | ||
4894 | SECONDARY_EXEC_UNRESTRICTED_GUEST && | ||
4895 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) | ||
4896 | always_on &= ~(X86_CR0_PE | X86_CR0_PG); | ||
4897 | return (val & always_on) == always_on; | ||
4898 | } | ||
4899 | |||
4882 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ | 4900 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ |
4883 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | 4901 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) |
4884 | { | 4902 | { |
@@ -4897,9 +4915,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | |||
4897 | val = (val & ~vmcs12->cr0_guest_host_mask) | | 4915 | val = (val & ~vmcs12->cr0_guest_host_mask) | |
4898 | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); | 4916 | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); |
4899 | 4917 | ||
4900 | /* TODO: will have to take unrestricted guest mode into | 4918 | if (!nested_cr0_valid(vmcs12, val)) |
4901 | * account */ | ||
4902 | if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) | ||
4903 | return 1; | 4919 | return 1; |
4904 | 4920 | ||
4905 | if (kvm_set_cr0(vcpu, val)) | 4921 | if (kvm_set_cr0(vcpu, val)) |
@@ -6627,6 +6643,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6627 | return 0; | 6643 | return 0; |
6628 | else if (is_page_fault(intr_info)) | 6644 | else if (is_page_fault(intr_info)) |
6629 | return enable_ept; | 6645 | return enable_ept; |
6646 | else if (is_no_device(intr_info) && | ||
6647 | !(nested_read_cr0(vmcs12) & X86_CR0_TS)) | ||
6648 | return 0; | ||
6630 | return vmcs12->exception_bitmap & | 6649 | return vmcs12->exception_bitmap & |
6631 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); | 6650 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); |
6632 | case EXIT_REASON_EXTERNAL_INTERRUPT: | 6651 | case EXIT_REASON_EXTERNAL_INTERRUPT: |
@@ -6722,6 +6741,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | |||
6722 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | 6741 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); |
6723 | } | 6742 | } |
6724 | 6743 | ||
6744 | static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) | ||
6745 | { | ||
6746 | u64 delta_tsc_l1; | ||
6747 | u32 preempt_val_l1, preempt_val_l2, preempt_scale; | ||
6748 | |||
6749 | if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control & | ||
6750 | PIN_BASED_VMX_PREEMPTION_TIMER)) | ||
6751 | return; | ||
6752 | preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & | ||
6753 | MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; | ||
6754 | preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); | ||
6755 | delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc()) | ||
6756 | - vcpu->arch.last_guest_tsc; | ||
6757 | preempt_val_l1 = delta_tsc_l1 >> preempt_scale; | ||
6758 | if (preempt_val_l2 <= preempt_val_l1) | ||
6759 | preempt_val_l2 = 0; | ||
6760 | else | ||
6761 | preempt_val_l2 -= preempt_val_l1; | ||
6762 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); | ||
6763 | } | ||
6764 | |||
6725 | /* | 6765 | /* |
6726 | * The guest has exited. See if we can fix it or if we need userspace | 6766 | * The guest has exited. See if we can fix it or if we need userspace |
6727 | * assistance. | 6767 | * assistance. |
@@ -6736,20 +6776,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6736 | if (vmx->emulation_required) | 6776 | if (vmx->emulation_required) |
6737 | return handle_invalid_guest_state(vcpu); | 6777 | return handle_invalid_guest_state(vcpu); |
6738 | 6778 | ||
6739 | /* | ||
6740 | * the KVM_REQ_EVENT optimization bit is only on for one entry, and if | ||
6741 | * we did not inject a still-pending event to L1 now because of | ||
6742 | * nested_run_pending, we need to re-enable this bit. | ||
6743 | */ | ||
6744 | if (vmx->nested.nested_run_pending) | ||
6745 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
6746 | |||
6747 | if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH || | ||
6748 | exit_reason == EXIT_REASON_VMRESUME)) | ||
6749 | vmx->nested.nested_run_pending = 1; | ||
6750 | else | ||
6751 | vmx->nested.nested_run_pending = 0; | ||
6752 | |||
6753 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { | 6779 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { |
6754 | nested_vmx_vmexit(vcpu); | 6780 | nested_vmx_vmexit(vcpu); |
6755 | return 1; | 6781 | return 1; |
@@ -7061,9 +7087,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, | |||
7061 | case INTR_TYPE_HARD_EXCEPTION: | 7087 | case INTR_TYPE_HARD_EXCEPTION: |
7062 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { | 7088 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
7063 | u32 err = vmcs_read32(error_code_field); | 7089 | u32 err = vmcs_read32(error_code_field); |
7064 | kvm_queue_exception_e(vcpu, vector, err); | 7090 | kvm_requeue_exception_e(vcpu, vector, err); |
7065 | } else | 7091 | } else |
7066 | kvm_queue_exception(vcpu, vector); | 7092 | kvm_requeue_exception(vcpu, vector); |
7067 | break; | 7093 | break; |
7068 | case INTR_TYPE_SOFT_INTR: | 7094 | case INTR_TYPE_SOFT_INTR: |
7069 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); | 7095 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); |
@@ -7146,6 +7172,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
7146 | atomic_switch_perf_msrs(vmx); | 7172 | atomic_switch_perf_msrs(vmx); |
7147 | debugctlmsr = get_debugctlmsr(); | 7173 | debugctlmsr = get_debugctlmsr(); |
7148 | 7174 | ||
7175 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) | ||
7176 | nested_adjust_preemption_timer(vcpu); | ||
7149 | vmx->__launched = vmx->loaded_vmcs->launched; | 7177 | vmx->__launched = vmx->loaded_vmcs->launched; |
7150 | asm( | 7178 | asm( |
7151 | /* Store host registers */ | 7179 | /* Store host registers */ |
@@ -7284,6 +7312,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
7284 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 7312 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
7285 | trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); | 7313 | trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); |
7286 | 7314 | ||
7315 | /* | ||
7316 | * the KVM_REQ_EVENT optimization bit is only on for one entry, and if | ||
7317 | * we did not inject a still-pending event to L1 now because of | ||
7318 | * nested_run_pending, we need to re-enable this bit. | ||
7319 | */ | ||
7320 | if (vmx->nested.nested_run_pending) | ||
7321 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
7322 | |||
7323 | vmx->nested.nested_run_pending = 0; | ||
7324 | |||
7287 | vmx_complete_atomic_exit(vmx); | 7325 | vmx_complete_atomic_exit(vmx); |
7288 | vmx_recover_nmi_blocking(vmx); | 7326 | vmx_recover_nmi_blocking(vmx); |
7289 | vmx_complete_interrupts(vmx); | 7327 | vmx_complete_interrupts(vmx); |
@@ -7410,8 +7448,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
7410 | */ | 7448 | */ |
7411 | if (is_mmio) | 7449 | if (is_mmio) |
7412 | ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; | 7450 | ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; |
7413 | else if (vcpu->kvm->arch.iommu_domain && | 7451 | else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) |
7414 | !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) | ||
7415 | ret = kvm_get_guest_memory_type(vcpu, gfn) << | 7452 | ret = kvm_get_guest_memory_type(vcpu, gfn) << |
7416 | VMX_EPT_MT_EPTE_SHIFT; | 7453 | VMX_EPT_MT_EPTE_SHIFT; |
7417 | else | 7454 | else |
@@ -7501,9 +7538,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | |||
7501 | return get_vmcs12(vcpu)->ept_pointer; | 7538 | return get_vmcs12(vcpu)->ept_pointer; |
7502 | } | 7539 | } |
7503 | 7540 | ||
7504 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | 7541 | static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) |
7505 | { | 7542 | { |
7506 | int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, | 7543 | kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, |
7507 | nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); | 7544 | nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); |
7508 | 7545 | ||
7509 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | 7546 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; |
@@ -7511,8 +7548,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | |||
7511 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | 7548 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; |
7512 | 7549 | ||
7513 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | 7550 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
7514 | |||
7515 | return r; | ||
7516 | } | 7551 | } |
7517 | 7552 | ||
7518 | static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) | 7553 | static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) |
@@ -7520,6 +7555,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) | |||
7520 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | 7555 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; |
7521 | } | 7556 | } |
7522 | 7557 | ||
7558 | static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | ||
7559 | struct x86_exception *fault) | ||
7560 | { | ||
7561 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
7562 | |||
7563 | WARN_ON(!is_guest_mode(vcpu)); | ||
7564 | |||
7565 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | ||
7566 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) | ||
7567 | nested_vmx_vmexit(vcpu); | ||
7568 | else | ||
7569 | kvm_inject_page_fault(vcpu, fault); | ||
7570 | } | ||
7571 | |||
7523 | /* | 7572 | /* |
7524 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 7573 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
7525 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 7574 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
@@ -7533,6 +7582,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7533 | { | 7582 | { |
7534 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7583 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7535 | u32 exec_control; | 7584 | u32 exec_control; |
7585 | u32 exit_control; | ||
7536 | 7586 | ||
7537 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); | 7587 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); |
7538 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); | 7588 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); |
@@ -7706,7 +7756,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7706 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER | 7756 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER |
7707 | * bits are further modified by vmx_set_efer() below. | 7757 | * bits are further modified by vmx_set_efer() below. |
7708 | */ | 7758 | */ |
7709 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); | 7759 | exit_control = vmcs_config.vmexit_ctrl; |
7760 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | ||
7761 | exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | ||
7762 | vmcs_write32(VM_EXIT_CONTROLS, exit_control); | ||
7710 | 7763 | ||
7711 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | 7764 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are |
7712 | * emulated by vmx_set_efer(), below. | 7765 | * emulated by vmx_set_efer(), below. |
@@ -7773,6 +7826,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7773 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); | 7826 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); |
7774 | kvm_mmu_reset_context(vcpu); | 7827 | kvm_mmu_reset_context(vcpu); |
7775 | 7828 | ||
7829 | if (!enable_ept) | ||
7830 | vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; | ||
7831 | |||
7776 | /* | 7832 | /* |
7777 | * L1 may access the L2's PDPTR, so save them to construct vmcs12 | 7833 | * L1 may access the L2's PDPTR, so save them to construct vmcs12 |
7778 | */ | 7834 | */ |
@@ -7876,7 +7932,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7876 | return 1; | 7932 | return 1; |
7877 | } | 7933 | } |
7878 | 7934 | ||
7879 | if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || | 7935 | if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) || |
7880 | ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { | 7936 | ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { |
7881 | nested_vmx_entry_failure(vcpu, vmcs12, | 7937 | nested_vmx_entry_failure(vcpu, vmcs12, |
7882 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | 7938 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); |
@@ -7938,6 +7994,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7938 | 7994 | ||
7939 | enter_guest_mode(vcpu); | 7995 | enter_guest_mode(vcpu); |
7940 | 7996 | ||
7997 | vmx->nested.nested_run_pending = 1; | ||
7998 | |||
7941 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); | 7999 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); |
7942 | 8000 | ||
7943 | cpu = get_cpu(); | 8001 | cpu = get_cpu(); |
@@ -8005,7 +8063,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
8005 | u32 idt_vectoring; | 8063 | u32 idt_vectoring; |
8006 | unsigned int nr; | 8064 | unsigned int nr; |
8007 | 8065 | ||
8008 | if (vcpu->arch.exception.pending) { | 8066 | if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { |
8009 | nr = vcpu->arch.exception.nr; | 8067 | nr = vcpu->arch.exception.nr; |
8010 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | 8068 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; |
8011 | 8069 | ||
@@ -8023,7 +8081,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
8023 | } | 8081 | } |
8024 | 8082 | ||
8025 | vmcs12->idt_vectoring_info_field = idt_vectoring; | 8083 | vmcs12->idt_vectoring_info_field = idt_vectoring; |
8026 | } else if (vcpu->arch.nmi_pending) { | 8084 | } else if (vcpu->arch.nmi_injected) { |
8027 | vmcs12->idt_vectoring_info_field = | 8085 | vmcs12->idt_vectoring_info_field = |
8028 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; | 8086 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; |
8029 | } else if (vcpu->arch.interrupt.pending) { | 8087 | } else if (vcpu->arch.interrupt.pending) { |
@@ -8105,6 +8163,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8105 | vmcs12->guest_pending_dbg_exceptions = | 8163 | vmcs12->guest_pending_dbg_exceptions = |
8106 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 8164 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
8107 | 8165 | ||
8166 | if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && | ||
8167 | (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) | ||
8168 | vmcs12->vmx_preemption_timer_value = | ||
8169 | vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); | ||
8170 | |||
8108 | /* | 8171 | /* |
8109 | * In some cases (usually, nested EPT), L2 is allowed to change its | 8172 | * In some cases (usually, nested EPT), L2 is allowed to change its |
8110 | * own CR3 without exiting. If it has changed it, we must keep it. | 8173 | * own CR3 without exiting. If it has changed it, we must keep it. |
@@ -8130,6 +8193,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8130 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | 8193 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
8131 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) | 8194 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) |
8132 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); | 8195 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); |
8196 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) | ||
8197 | vmcs12->guest_ia32_efer = vcpu->arch.efer; | ||
8133 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | 8198 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
8134 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | 8199 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
8135 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | 8200 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); |
@@ -8201,7 +8266,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8201 | * fpu_active (which may have changed). | 8266 | * fpu_active (which may have changed). |
8202 | * Note that vmx_set_cr0 refers to efer set above. | 8267 | * Note that vmx_set_cr0 refers to efer set above. |
8203 | */ | 8268 | */ |
8204 | kvm_set_cr0(vcpu, vmcs12->host_cr0); | 8269 | vmx_set_cr0(vcpu, vmcs12->host_cr0); |
8205 | /* | 8270 | /* |
8206 | * If we did fpu_activate()/fpu_deactivate() during L2's run, we need | 8271 | * If we did fpu_activate()/fpu_deactivate() during L2's run, we need |
8207 | * to apply the same changes to L1's vmcs. We just set cr0 correctly, | 8272 | * to apply the same changes to L1's vmcs. We just set cr0 correctly, |
@@ -8224,6 +8289,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8224 | kvm_set_cr3(vcpu, vmcs12->host_cr3); | 8289 | kvm_set_cr3(vcpu, vmcs12->host_cr3); |
8225 | kvm_mmu_reset_context(vcpu); | 8290 | kvm_mmu_reset_context(vcpu); |
8226 | 8291 | ||
8292 | if (!enable_ept) | ||
8293 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | ||
8294 | |||
8227 | if (enable_vpid) { | 8295 | if (enable_vpid) { |
8228 | /* | 8296 | /* |
8229 | * Trivially support vpid by letting L2s share their parent | 8297 | * Trivially support vpid by letting L2s share their parent |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5ca72a5cdb6..21ef1ba184ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -577,6 +577,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
577 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | 577 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) |
578 | { | 578 | { |
579 | u64 xcr0; | 579 | u64 xcr0; |
580 | u64 valid_bits; | ||
580 | 581 | ||
581 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ | 582 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ |
582 | if (index != XCR_XFEATURE_ENABLED_MASK) | 583 | if (index != XCR_XFEATURE_ENABLED_MASK) |
@@ -586,8 +587,16 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
586 | return 1; | 587 | return 1; |
587 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) | 588 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) |
588 | return 1; | 589 | return 1; |
589 | if (xcr0 & ~host_xcr0) | 590 | |
591 | /* | ||
592 | * Do not allow the guest to set bits that we do not support | ||
593 | * saving. However, xcr0 bit 0 is always set, even if the | ||
594 | * emulated CPU does not support XSAVE (see fx_init). | ||
595 | */ | ||
596 | valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP; | ||
597 | if (xcr0 & ~valid_bits) | ||
590 | return 1; | 598 | return 1; |
599 | |||
591 | kvm_put_guest_xcr0(vcpu); | 600 | kvm_put_guest_xcr0(vcpu); |
592 | vcpu->arch.xcr0 = xcr0; | 601 | vcpu->arch.xcr0 = xcr0; |
593 | return 0; | 602 | return 0; |
@@ -684,7 +693,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
684 | 693 | ||
685 | vcpu->arch.cr3 = cr3; | 694 | vcpu->arch.cr3 = cr3; |
686 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | 695 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); |
687 | vcpu->arch.mmu.new_cr3(vcpu); | 696 | kvm_mmu_new_cr3(vcpu); |
688 | return 0; | 697 | return 0; |
689 | } | 698 | } |
690 | EXPORT_SYMBOL_GPL(kvm_set_cr3); | 699 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
@@ -2564,6 +2573,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2564 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: | 2573 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: |
2565 | case KVM_CAP_SET_TSS_ADDR: | 2574 | case KVM_CAP_SET_TSS_ADDR: |
2566 | case KVM_CAP_EXT_CPUID: | 2575 | case KVM_CAP_EXT_CPUID: |
2576 | case KVM_CAP_EXT_EMUL_CPUID: | ||
2567 | case KVM_CAP_CLOCKSOURCE: | 2577 | case KVM_CAP_CLOCKSOURCE: |
2568 | case KVM_CAP_PIT: | 2578 | case KVM_CAP_PIT: |
2569 | case KVM_CAP_NOP_IO_DELAY: | 2579 | case KVM_CAP_NOP_IO_DELAY: |
@@ -2673,15 +2683,17 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
2673 | r = 0; | 2683 | r = 0; |
2674 | break; | 2684 | break; |
2675 | } | 2685 | } |
2676 | case KVM_GET_SUPPORTED_CPUID: { | 2686 | case KVM_GET_SUPPORTED_CPUID: |
2687 | case KVM_GET_EMULATED_CPUID: { | ||
2677 | struct kvm_cpuid2 __user *cpuid_arg = argp; | 2688 | struct kvm_cpuid2 __user *cpuid_arg = argp; |
2678 | struct kvm_cpuid2 cpuid; | 2689 | struct kvm_cpuid2 cpuid; |
2679 | 2690 | ||
2680 | r = -EFAULT; | 2691 | r = -EFAULT; |
2681 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) | 2692 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) |
2682 | goto out; | 2693 | goto out; |
2683 | r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, | 2694 | |
2684 | cpuid_arg->entries); | 2695 | r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, |
2696 | ioctl); | ||
2685 | if (r) | 2697 | if (r) |
2686 | goto out; | 2698 | goto out; |
2687 | 2699 | ||
@@ -2715,8 +2727,7 @@ static void wbinvd_ipi(void *garbage) | |||
2715 | 2727 | ||
2716 | static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) | 2728 | static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) |
2717 | { | 2729 | { |
2718 | return vcpu->kvm->arch.iommu_domain && | 2730 | return kvm_arch_has_noncoherent_dma(vcpu->kvm); |
2719 | !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); | ||
2720 | } | 2731 | } |
2721 | 2732 | ||
2722 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 2733 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
@@ -2984,11 +2995,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
2984 | static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, | 2995 | static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, |
2985 | struct kvm_xsave *guest_xsave) | 2996 | struct kvm_xsave *guest_xsave) |
2986 | { | 2997 | { |
2987 | if (cpu_has_xsave) | 2998 | if (cpu_has_xsave) { |
2988 | memcpy(guest_xsave->region, | 2999 | memcpy(guest_xsave->region, |
2989 | &vcpu->arch.guest_fpu.state->xsave, | 3000 | &vcpu->arch.guest_fpu.state->xsave, |
2990 | xstate_size); | 3001 | vcpu->arch.guest_xstate_size); |
2991 | else { | 3002 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &= |
3003 | vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE; | ||
3004 | } else { | ||
2992 | memcpy(guest_xsave->region, | 3005 | memcpy(guest_xsave->region, |
2993 | &vcpu->arch.guest_fpu.state->fxsave, | 3006 | &vcpu->arch.guest_fpu.state->fxsave, |
2994 | sizeof(struct i387_fxsave_struct)); | 3007 | sizeof(struct i387_fxsave_struct)); |
@@ -3003,10 +3016,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, | |||
3003 | u64 xstate_bv = | 3016 | u64 xstate_bv = |
3004 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; | 3017 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; |
3005 | 3018 | ||
3006 | if (cpu_has_xsave) | 3019 | if (cpu_has_xsave) { |
3020 | /* | ||
3021 | * Here we allow setting states that are not present in | ||
3022 | * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility | ||
3023 | * with old userspace. | ||
3024 | */ | ||
3025 | if (xstate_bv & ~KVM_SUPPORTED_XCR0) | ||
3026 | return -EINVAL; | ||
3027 | if (xstate_bv & ~host_xcr0) | ||
3028 | return -EINVAL; | ||
3007 | memcpy(&vcpu->arch.guest_fpu.state->xsave, | 3029 | memcpy(&vcpu->arch.guest_fpu.state->xsave, |
3008 | guest_xsave->region, xstate_size); | 3030 | guest_xsave->region, vcpu->arch.guest_xstate_size); |
3009 | else { | 3031 | } else { |
3010 | if (xstate_bv & ~XSTATE_FPSSE) | 3032 | if (xstate_bv & ~XSTATE_FPSSE) |
3011 | return -EINVAL; | 3033 | return -EINVAL; |
3012 | memcpy(&vcpu->arch.guest_fpu.state->fxsave, | 3034 | memcpy(&vcpu->arch.guest_fpu.state->fxsave, |
@@ -3042,9 +3064,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, | |||
3042 | 3064 | ||
3043 | for (i = 0; i < guest_xcrs->nr_xcrs; i++) | 3065 | for (i = 0; i < guest_xcrs->nr_xcrs; i++) |
3044 | /* Only support XCR0 currently */ | 3066 | /* Only support XCR0 currently */ |
3045 | if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { | 3067 | if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) { |
3046 | r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, | 3068 | r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, |
3047 | guest_xcrs->xcrs[0].value); | 3069 | guest_xcrs->xcrs[i].value); |
3048 | break; | 3070 | break; |
3049 | } | 3071 | } |
3050 | if (r) | 3072 | if (r) |
@@ -4775,8 +4797,8 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) | |||
4775 | 4797 | ||
4776 | static void init_decode_cache(struct x86_emulate_ctxt *ctxt) | 4798 | static void init_decode_cache(struct x86_emulate_ctxt *ctxt) |
4777 | { | 4799 | { |
4778 | memset(&ctxt->twobyte, 0, | 4800 | memset(&ctxt->opcode_len, 0, |
4779 | (void *)&ctxt->_regs - (void *)&ctxt->twobyte); | 4801 | (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); |
4780 | 4802 | ||
4781 | ctxt->fetch.start = 0; | 4803 | ctxt->fetch.start = 0; |
4782 | ctxt->fetch.end = 0; | 4804 | ctxt->fetch.end = 0; |
@@ -5094,8 +5116,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
5094 | ctxt->have_exception = false; | 5116 | ctxt->have_exception = false; |
5095 | ctxt->perm_ok = false; | 5117 | ctxt->perm_ok = false; |
5096 | 5118 | ||
5097 | ctxt->only_vendor_specific_insn | 5119 | ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; |
5098 | = emulation_type & EMULTYPE_TRAP_UD; | ||
5099 | 5120 | ||
5100 | r = x86_decode_insn(ctxt, insn, insn_len); | 5121 | r = x86_decode_insn(ctxt, insn, insn_len); |
5101 | 5122 | ||
@@ -5263,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
5263 | 5284 | ||
5264 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); | 5285 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); |
5265 | 5286 | ||
5266 | raw_spin_lock(&kvm_lock); | 5287 | spin_lock(&kvm_lock); |
5267 | list_for_each_entry(kvm, &vm_list, vm_list) { | 5288 | list_for_each_entry(kvm, &vm_list, vm_list) { |
5268 | kvm_for_each_vcpu(i, vcpu, kvm) { | 5289 | kvm_for_each_vcpu(i, vcpu, kvm) { |
5269 | if (vcpu->cpu != freq->cpu) | 5290 | if (vcpu->cpu != freq->cpu) |
@@ -5273,7 +5294,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
5273 | send_ipi = 1; | 5294 | send_ipi = 1; |
5274 | } | 5295 | } |
5275 | } | 5296 | } |
5276 | raw_spin_unlock(&kvm_lock); | 5297 | spin_unlock(&kvm_lock); |
5277 | 5298 | ||
5278 | if (freq->old < freq->new && send_ipi) { | 5299 | if (freq->old < freq->new && send_ipi) { |
5279 | /* | 5300 | /* |
@@ -5426,12 +5447,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) | |||
5426 | struct kvm_vcpu *vcpu; | 5447 | struct kvm_vcpu *vcpu; |
5427 | int i; | 5448 | int i; |
5428 | 5449 | ||
5429 | raw_spin_lock(&kvm_lock); | 5450 | spin_lock(&kvm_lock); |
5430 | list_for_each_entry(kvm, &vm_list, vm_list) | 5451 | list_for_each_entry(kvm, &vm_list, vm_list) |
5431 | kvm_for_each_vcpu(i, vcpu, kvm) | 5452 | kvm_for_each_vcpu(i, vcpu, kvm) |
5432 | set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); | 5453 | set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); |
5433 | atomic_set(&kvm_guest_has_master_clock, 0); | 5454 | atomic_set(&kvm_guest_has_master_clock, 0); |
5434 | raw_spin_unlock(&kvm_lock); | 5455 | spin_unlock(&kvm_lock); |
5435 | } | 5456 | } |
5436 | 5457 | ||
5437 | static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); | 5458 | static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); |
@@ -5945,10 +5966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5945 | 5966 | ||
5946 | vcpu->mode = IN_GUEST_MODE; | 5967 | vcpu->mode = IN_GUEST_MODE; |
5947 | 5968 | ||
5969 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5970 | |||
5948 | /* We should set ->mode before check ->requests, | 5971 | /* We should set ->mode before check ->requests, |
5949 | * see the comment in make_all_cpus_request. | 5972 | * see the comment in make_all_cpus_request. |
5950 | */ | 5973 | */ |
5951 | smp_mb(); | 5974 | smp_mb__after_srcu_read_unlock(); |
5952 | 5975 | ||
5953 | local_irq_disable(); | 5976 | local_irq_disable(); |
5954 | 5977 | ||
@@ -5958,12 +5981,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5958 | smp_wmb(); | 5981 | smp_wmb(); |
5959 | local_irq_enable(); | 5982 | local_irq_enable(); |
5960 | preempt_enable(); | 5983 | preempt_enable(); |
5984 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5961 | r = 1; | 5985 | r = 1; |
5962 | goto cancel_injection; | 5986 | goto cancel_injection; |
5963 | } | 5987 | } |
5964 | 5988 | ||
5965 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5966 | |||
5967 | if (req_immediate_exit) | 5989 | if (req_immediate_exit) |
5968 | smp_send_reschedule(vcpu->cpu); | 5990 | smp_send_reschedule(vcpu->cpu); |
5969 | 5991 | ||
@@ -6688,7 +6710,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6688 | if (r) | 6710 | if (r) |
6689 | return r; | 6711 | return r; |
6690 | kvm_vcpu_reset(vcpu); | 6712 | kvm_vcpu_reset(vcpu); |
6691 | r = kvm_mmu_setup(vcpu); | 6713 | kvm_mmu_setup(vcpu); |
6692 | vcpu_put(vcpu); | 6714 | vcpu_put(vcpu); |
6693 | 6715 | ||
6694 | return r; | 6716 | return r; |
@@ -6940,6 +6962,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6940 | 6962 | ||
6941 | vcpu->arch.ia32_tsc_adjust_msr = 0x0; | 6963 | vcpu->arch.ia32_tsc_adjust_msr = 0x0; |
6942 | vcpu->arch.pv_time_enabled = false; | 6964 | vcpu->arch.pv_time_enabled = false; |
6965 | |||
6966 | vcpu->arch.guest_supported_xcr0 = 0; | ||
6967 | vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; | ||
6968 | |||
6943 | kvm_async_pf_hash_reset(vcpu); | 6969 | kvm_async_pf_hash_reset(vcpu); |
6944 | kvm_pmu_init(vcpu); | 6970 | kvm_pmu_init(vcpu); |
6945 | 6971 | ||
@@ -6981,6 +7007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
6981 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 7007 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
6982 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); | 7008 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); |
6983 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 7009 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
7010 | atomic_set(&kvm->arch.noncoherent_dma_count, 0); | ||
6984 | 7011 | ||
6985 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 7012 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
6986 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); | 7013 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); |
@@ -7065,7 +7092,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
7065 | kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); | 7092 | kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); |
7066 | } | 7093 | } |
7067 | 7094 | ||
7068 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | 7095 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, |
7069 | struct kvm_memory_slot *dont) | 7096 | struct kvm_memory_slot *dont) |
7070 | { | 7097 | { |
7071 | int i; | 7098 | int i; |
@@ -7086,7 +7113,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, | |||
7086 | } | 7113 | } |
7087 | } | 7114 | } |
7088 | 7115 | ||
7089 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | 7116 | int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, |
7117 | unsigned long npages) | ||
7090 | { | 7118 | { |
7091 | int i; | 7119 | int i; |
7092 | 7120 | ||
@@ -7283,7 +7311,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) | |||
7283 | int r; | 7311 | int r; |
7284 | 7312 | ||
7285 | if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || | 7313 | if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || |
7286 | is_error_page(work->page)) | 7314 | work->wakeup_all) |
7287 | return; | 7315 | return; |
7288 | 7316 | ||
7289 | r = kvm_mmu_reload(vcpu); | 7317 | r = kvm_mmu_reload(vcpu); |
@@ -7393,7 +7421,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |||
7393 | struct x86_exception fault; | 7421 | struct x86_exception fault; |
7394 | 7422 | ||
7395 | trace_kvm_async_pf_ready(work->arch.token, work->gva); | 7423 | trace_kvm_async_pf_ready(work->arch.token, work->gva); |
7396 | if (is_error_page(work->page)) | 7424 | if (work->wakeup_all) |
7397 | work->arch.token = ~0; /* broadcast wakeup */ | 7425 | work->arch.token = ~0; /* broadcast wakeup */ |
7398 | else | 7426 | else |
7399 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); | 7427 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); |
@@ -7420,6 +7448,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) | |||
7420 | kvm_x86_ops->interrupt_allowed(vcpu); | 7448 | kvm_x86_ops->interrupt_allowed(vcpu); |
7421 | } | 7449 | } |
7422 | 7450 | ||
7451 | void kvm_arch_register_noncoherent_dma(struct kvm *kvm) | ||
7452 | { | ||
7453 | atomic_inc(&kvm->arch.noncoherent_dma_count); | ||
7454 | } | ||
7455 | EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); | ||
7456 | |||
7457 | void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) | ||
7458 | { | ||
7459 | atomic_dec(&kvm->arch.noncoherent_dma_count); | ||
7460 | } | ||
7461 | EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); | ||
7462 | |||
7463 | bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) | ||
7464 | { | ||
7465 | return atomic_read(&kvm->arch.noncoherent_dma_count); | ||
7466 | } | ||
7467 | EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); | ||
7468 | |||
7423 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 7469 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
7424 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 7470 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
7425 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 7471 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e224f7a671b6..587fb9ede436 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
122 | gva_t addr, void *val, unsigned int bytes, | 122 | gva_t addr, void *val, unsigned int bytes, |
123 | struct x86_exception *exception); | 123 | struct x86_exception *exception); |
124 | 124 | ||
125 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) | ||
125 | extern u64 host_xcr0; | 126 | extern u64 host_xcr0; |
126 | 127 | ||
127 | extern struct static_key kvm_no_apic_vcpu; | 128 | extern struct static_key kvm_no_apic_vcpu; |