aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-14 23:51:36 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-14 23:51:36 -0500
commitf080480488028bcc25357f85e8ae54ccc3bb7173 (patch)
tree8fcc943f16d26c795b3b6324b478af2d5a30285d /arch/x86
parenteda670c626a4f53eb8ac5f20d8c10d3f0b54c583 (diff)
parente504c9098ed6acd9e1079c5e10e4910724ad429f (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM changes from Paolo Bonzini: "Here are the 3.13 KVM changes. There was a lot of work on the PPC side: the HV and emulation flavors can now coexist in a single kernel is probably the most interesting change from a user point of view. On the x86 side there are nested virtualization improvements and a few bugfixes. ARM got transparent huge page support, improved overcommit, and support for big endian guests. Finally, there is a new interface to connect KVM with VFIO. This helps with devices that use NoSnoop PCI transactions, letting the driver in the guest execute WBINVD instructions. This includes some nVidia cards on Windows, that fail to start without these patches and the corresponding userspace changes" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (146 commits) kvm, vmx: Fix lazy FPU on nested guest arm/arm64: KVM: PSCI: propagate caller endianness to the incoming vcpu arm/arm64: KVM: MMIO support for BE guest kvm, cpuid: Fix sparse warning kvm: Delete prototype for non-existent function kvm_check_iopl kvm: Delete prototype for non-existent function complete_pio hung_task: add method to reset detector pvclock: detect watchdog reset at pvclock read kvm: optimize out smp_mb after srcu_read_unlock srcu: API for barrier after srcu read unlock KVM: remove vm mmap method KVM: IOMMU: hva align mapping page size KVM: x86: trace cpuid emulation when called from emulator KVM: emulator: cleanup decode_register_operand() a bit KVM: emulator: check rex prefix inside decode_register() KVM: x86: fix emulation of "movzbl %bpl, %eax" kvm_host: typo fix KVM: x86: emulate SAHF instruction MAINTAINERS: add tree for kvm.git Documentation/kvm: add a 00-INDEX file ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_emulate.h10
-rw-r--r--arch/x86/include/asm/kvm_host.h23
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h6
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/pvclock.c13
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c115
-rw-r--r--arch/x86/kvm/cpuid.h5
-rw-r--r--arch/x86/kvm/emulate.c130
-rw-r--r--arch/x86/kvm/mmu.c115
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c158
-rw-r--r--arch/x86/kvm/x86.c108
-rw-r--r--arch/x86/kvm/x86.h1
18 files changed, 489 insertions, 214 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 15f960c06ff7..24ec1216596e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -274,13 +274,17 @@ struct x86_emulate_ctxt {
274 274
275 bool guest_mode; /* guest running a nested guest */ 275 bool guest_mode; /* guest running a nested guest */
276 bool perm_ok; /* do not check permissions if true */ 276 bool perm_ok; /* do not check permissions if true */
277 bool only_vendor_specific_insn; 277 bool ud; /* inject an #UD if host doesn't support insn */
278 278
279 bool have_exception; 279 bool have_exception;
280 struct x86_exception exception; 280 struct x86_exception exception;
281 281
282 /* decode cache */ 282 /*
283 u8 twobyte; 283 * decode cache
284 */
285
286 /* current opcode length in bytes */
287 u8 opcode_len;
284 u8 b; 288 u8 b;
285 u8 intercept; 289 u8 intercept;
286 u8 lock_prefix; 290 u8 lock_prefix;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c76ff74a98f2..ae5d7830855c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
79#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 79#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
80#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 80#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
81 81
82static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
83{
84 /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
85 return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
86 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
87}
88
82#define SELECTOR_TI_MASK (1 << 2) 89#define SELECTOR_TI_MASK (1 << 2)
83#define SELECTOR_RPL_MASK 0x03 90#define SELECTOR_RPL_MASK 0x03
84 91
@@ -253,7 +260,6 @@ struct kvm_pio_request {
253 * mode. 260 * mode.
254 */ 261 */
255struct kvm_mmu { 262struct kvm_mmu {
256 void (*new_cr3)(struct kvm_vcpu *vcpu);
257 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); 263 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
258 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); 264 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
259 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); 265 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
@@ -261,7 +267,6 @@ struct kvm_mmu {
261 bool prefault); 267 bool prefault);
262 void (*inject_page_fault)(struct kvm_vcpu *vcpu, 268 void (*inject_page_fault)(struct kvm_vcpu *vcpu,
263 struct x86_exception *fault); 269 struct x86_exception *fault);
264 void (*free)(struct kvm_vcpu *vcpu);
265 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 270 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
266 struct x86_exception *exception); 271 struct x86_exception *exception);
267 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); 272 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch {
389 394
390 struct fpu guest_fpu; 395 struct fpu guest_fpu;
391 u64 xcr0; 396 u64 xcr0;
397 u64 guest_supported_xcr0;
398 u32 guest_xstate_size;
392 399
393 struct kvm_pio_request pio; 400 struct kvm_pio_request pio;
394 void *pio_data; 401 void *pio_data;
@@ -557,7 +564,9 @@ struct kvm_arch {
557 564
558 struct list_head assigned_dev_head; 565 struct list_head assigned_dev_head;
559 struct iommu_domain *iommu_domain; 566 struct iommu_domain *iommu_domain;
560 int iommu_flags; 567 bool iommu_noncoherent;
568#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
569 atomic_t noncoherent_dma_count;
561 struct kvm_pic *vpic; 570 struct kvm_pic *vpic;
562 struct kvm_ioapic *vioapic; 571 struct kvm_ioapic *vioapic;
563 struct kvm_pit *vpit; 572 struct kvm_pit *vpit;
@@ -780,11 +789,11 @@ void kvm_mmu_module_exit(void);
780 789
781void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 790void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
782int kvm_mmu_create(struct kvm_vcpu *vcpu); 791int kvm_mmu_create(struct kvm_vcpu *vcpu);
783int kvm_mmu_setup(struct kvm_vcpu *vcpu); 792void kvm_mmu_setup(struct kvm_vcpu *vcpu);
784void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 793void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
785 u64 dirty_mask, u64 nx_mask, u64 x_mask); 794 u64 dirty_mask, u64 nx_mask, u64 x_mask);
786 795
787int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 796void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
788void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 797void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
789void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 798void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
790 struct kvm_memory_slot *slot, 799 struct kvm_memory_slot *slot,
@@ -922,13 +931,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
922int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, 931int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
923 void *insn, int insn_len); 932 void *insn, int insn_len);
924void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); 933void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
934void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
925 935
926void kvm_enable_tdp(void); 936void kvm_enable_tdp(void);
927void kvm_disable_tdp(void); 937void kvm_disable_tdp(void);
928 938
929int complete_pio(struct kvm_vcpu *vcpu);
930bool kvm_check_iopl(struct kvm_vcpu *vcpu);
931
932static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 939static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
933{ 940{
934 return gpa; 941 return gpa;
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index be8269b00e2a..d6b078e9fa28 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -14,6 +14,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
14 struct timespec *ts); 14 struct timespec *ts);
15void pvclock_resume(void); 15void pvclock_resume(void);
16 16
17void pvclock_touch_watchdogs(void);
18
17/* 19/*
18 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 20 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
19 * yielding a 64-bit result. 21 * yielding a 64-bit result.
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5d9a3033b3d7..d3a87780c70b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
211 __u32 padding[3]; 211 __u32 padding[3];
212}; 212};
213 213
214#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 214#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
215#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 215#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
216#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 216#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
217 217
218/* for KVM_SET_CPUID2 */ 218/* for KVM_SET_CPUID2 */
219struct kvm_cpuid2 { 219struct kvm_cpuid2 {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb0465090ae5..b93e09a0fa21 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -536,6 +536,7 @@
536 536
537/* MSR_IA32_VMX_MISC bits */ 537/* MSR_IA32_VMX_MISC bits */
538#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) 538#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
539#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
539/* AMD-V MSRs */ 540/* AMD-V MSRs */
540 541
541#define MSR_VM_CR 0xc0010114 542#define MSR_VM_CR 0xc0010114
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1570e0741344..e6041094ff26 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -139,6 +139,7 @@ bool kvm_check_and_clear_guest_paused(void)
139 src = &hv_clock[cpu].pvti; 139 src = &hv_clock[cpu].pvti;
140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
141 src->flags &= ~PVCLOCK_GUEST_STOPPED; 141 src->flags &= ~PVCLOCK_GUEST_STOPPED;
142 pvclock_touch_watchdogs();
142 ret = true; 143 ret = true;
143 } 144 }
144 145
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index a16bae3f83b3..2f355d229a58 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -43,6 +43,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
43 return pv_tsc_khz; 43 return pv_tsc_khz;
44} 44}
45 45
46void pvclock_touch_watchdogs(void)
47{
48 touch_softlockup_watchdog_sync();
49 clocksource_touch_watchdog();
50 rcu_cpu_stall_reset();
51 reset_hung_task_detector();
52}
53
46static atomic64_t last_value = ATOMIC64_INIT(0); 54static atomic64_t last_value = ATOMIC64_INIT(0);
47 55
48void pvclock_resume(void) 56void pvclock_resume(void)
@@ -74,6 +82,11 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
74 version = __pvclock_read_cycles(src, &ret, &flags); 82 version = __pvclock_read_cycles(src, &ret, &flags);
75 } while ((src->version & 1) || version != src->version); 83 } while ((src->version & 1) || version != src->version);
76 84
85 if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
86 src->flags &= ~PVCLOCK_GUEST_STOPPED;
87 pvclock_touch_watchdogs();
88 }
89
77 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 90 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
78 (flags & PVCLOCK_TSC_STABLE_BIT)) 91 (flags & PVCLOCK_TSC_STABLE_BIT))
79 return ret; 92 return ret;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..b89c5db2b832 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -38,6 +38,7 @@ config KVM
38 select PERF_EVENTS 38 select PERF_EVENTS
39 select HAVE_KVM_MSI 39 select HAVE_KVM_MSI
40 select HAVE_KVM_CPU_RELAX_INTERCEPT 40 select HAVE_KVM_CPU_RELAX_INTERCEPT
41 select KVM_VFIO
41 ---help--- 42 ---help---
42 Support hosting fully virtualized guest machines using hardware 43 Support hosting fully virtualized guest machines using hardware
43 virtualization extensions. You will need a fairly recent 44 virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index bf4fb04d0112..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm
9 9
10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ 10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ 11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
12 $(KVM)/eventfd.o $(KVM)/irqchip.o 12 $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o 13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o
14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
15 15
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b110fe6c03d4..c6976257eff5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,6 +23,26 @@
23#include "mmu.h" 23#include "mmu.h"
24#include "trace.h" 24#include "trace.h"
25 25
26static u32 xstate_required_size(u64 xstate_bv)
27{
28 int feature_bit = 0;
29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
30
31 xstate_bv &= ~XSTATE_FPSSE;
32 while (xstate_bv) {
33 if (xstate_bv & 0x1) {
34 u32 eax, ebx, ecx, edx;
35 cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
36 ret = max(ret, eax + ebx);
37 }
38
39 xstate_bv >>= 1;
40 feature_bit++;
41 }
42
43 return ret;
44}
45
26void kvm_update_cpuid(struct kvm_vcpu *vcpu) 46void kvm_update_cpuid(struct kvm_vcpu *vcpu)
27{ 47{
28 struct kvm_cpuid_entry2 *best; 48 struct kvm_cpuid_entry2 *best;
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
46 apic->lapic_timer.timer_mode_mask = 1 << 17; 66 apic->lapic_timer.timer_mode_mask = 1 << 17;
47 } 67 }
48 68
69 best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
70 if (!best) {
71 vcpu->arch.guest_supported_xcr0 = 0;
72 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
73 } else {
74 vcpu->arch.guest_supported_xcr0 =
75 (best->eax | ((u64)best->edx << 32)) &
76 host_xcr0 & KVM_SUPPORTED_XCR0;
77 vcpu->arch.guest_xstate_size =
78 xstate_required_size(vcpu->arch.guest_supported_xcr0);
79 }
80
49 kvm_pmu_cpuid_update(vcpu); 81 kvm_pmu_cpuid_update(vcpu);
50} 82}
51 83
@@ -182,13 +214,35 @@ static bool supported_xcr0_bit(unsigned bit)
182{ 214{
183 u64 mask = ((u64)1 << bit); 215 u64 mask = ((u64)1 << bit);
184 216
185 return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; 217 return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
186} 218}
187 219
188#define F(x) bit(X86_FEATURE_##x) 220#define F(x) bit(X86_FEATURE_##x)
189 221
190static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 222static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
191 u32 index, int *nent, int maxnent) 223 u32 func, u32 index, int *nent, int maxnent)
224{
225 switch (func) {
226 case 0:
227 entry->eax = 1; /* only one leaf currently */
228 ++*nent;
229 break;
230 case 1:
231 entry->ecx = F(MOVBE);
232 ++*nent;
233 break;
234 default:
235 break;
236 }
237
238 entry->function = func;
239 entry->index = index;
240
241 return 0;
242}
243
244static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
245 u32 index, int *nent, int maxnent)
192{ 246{
193 int r; 247 int r;
194 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 248 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -383,6 +437,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
383 case 0xd: { 437 case 0xd: {
384 int idx, i; 438 int idx, i;
385 439
440 entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
441 entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
386 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 442 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
387 for (idx = 1, i = 1; idx < 64; ++idx) { 443 for (idx = 1, i = 1; idx < 64; ++idx) {
388 if (*nent >= maxnent) 444 if (*nent >= maxnent)
@@ -481,6 +537,15 @@ out:
481 return r; 537 return r;
482} 538}
483 539
540static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
541 u32 idx, int *nent, int maxnent, unsigned int type)
542{
543 if (type == KVM_GET_EMULATED_CPUID)
544 return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
545
546 return __do_cpuid_ent(entry, func, idx, nent, maxnent);
547}
548
484#undef F 549#undef F
485 550
486struct kvm_cpuid_param { 551struct kvm_cpuid_param {
@@ -495,8 +560,36 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
495 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; 560 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
496} 561}
497 562
498int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 563static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
499 struct kvm_cpuid_entry2 __user *entries) 564 __u32 num_entries, unsigned int ioctl_type)
565{
566 int i;
567 __u32 pad[3];
568
569 if (ioctl_type != KVM_GET_EMULATED_CPUID)
570 return false;
571
572 /*
573 * We want to make sure that ->padding is being passed clean from
574 * userspace in case we want to use it for something in the future.
575 *
576 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
577 * have to give ourselves satisfied only with the emulated side. /me
578 * sheds a tear.
579 */
580 for (i = 0; i < num_entries; i++) {
581 if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
582 return true;
583
584 if (pad[0] || pad[1] || pad[2])
585 return true;
586 }
587 return false;
588}
589
590int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
591 struct kvm_cpuid_entry2 __user *entries,
592 unsigned int type)
500{ 593{
501 struct kvm_cpuid_entry2 *cpuid_entries; 594 struct kvm_cpuid_entry2 *cpuid_entries;
502 int limit, nent = 0, r = -E2BIG, i; 595 int limit, nent = 0, r = -E2BIG, i;
@@ -513,8 +606,12 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
513 goto out; 606 goto out;
514 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 607 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
515 cpuid->nent = KVM_MAX_CPUID_ENTRIES; 608 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
609
610 if (sanity_check_entries(entries, cpuid->nent, type))
611 return -EINVAL;
612
516 r = -ENOMEM; 613 r = -ENOMEM;
517 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 614 cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
518 if (!cpuid_entries) 615 if (!cpuid_entries)
519 goto out; 616 goto out;
520 617
@@ -526,7 +623,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
526 continue; 623 continue;
527 624
528 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, 625 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
529 &nent, cpuid->nent); 626 &nent, cpuid->nent, type);
530 627
531 if (r) 628 if (r)
532 goto out_free; 629 goto out_free;
@@ -537,7 +634,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
537 limit = cpuid_entries[nent - 1].eax; 634 limit = cpuid_entries[nent - 1].eax;
538 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) 635 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
539 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, 636 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
540 &nent, cpuid->nent); 637 &nent, cpuid->nent, type);
541 638
542 if (r) 639 if (r)
543 goto out_free; 640 goto out_free;
@@ -661,6 +758,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
661 *edx = best->edx; 758 *edx = best->edx;
662 } else 759 } else
663 *eax = *ebx = *ecx = *edx = 0; 760 *eax = *ebx = *ecx = *edx = 0;
761 trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
664} 762}
665EXPORT_SYMBOL_GPL(kvm_cpuid); 763EXPORT_SYMBOL_GPL(kvm_cpuid);
666 764
@@ -676,6 +774,5 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
676 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); 774 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
677 kvm_register_write(vcpu, VCPU_REGS_RDX, edx); 775 kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
678 kvm_x86_ops->skip_emulated_instruction(vcpu); 776 kvm_x86_ops->skip_emulated_instruction(vcpu);
679 trace_kvm_cpuid(function, eax, ebx, ecx, edx);
680} 777}
681EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 778EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b7fd07984888..f1e4895174b2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,9 @@
6void kvm_update_cpuid(struct kvm_vcpu *vcpu); 6void kvm_update_cpuid(struct kvm_vcpu *vcpu);
7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
8 u32 function, u32 index); 8 u32 function, u32 index);
9int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 9int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
10 struct kvm_cpuid_entry2 __user *entries); 10 struct kvm_cpuid_entry2 __user *entries,
11 unsigned int type);
11int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 12int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
12 struct kvm_cpuid *cpuid, 13 struct kvm_cpuid *cpuid,
13 struct kvm_cpuid_entry __user *entries); 14 struct kvm_cpuid_entry __user *entries);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ddc3f3d2afdb..07ffca0a89e9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -130,7 +130,7 @@
130#define Mov (1<<20) 130#define Mov (1<<20)
131/* Misc flags */ 131/* Misc flags */
132#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ 132#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
133#define VendorSpecific (1<<22) /* Vendor specific instruction */ 133#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
134#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ 134#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
135#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ 135#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
136#define Undefined (1<<25) /* No Such Instruction */ 136#define Undefined (1<<25) /* No Such Instruction */
@@ -785,9 +785,10 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH. 785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
786 */ 786 */
787static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, 787static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
788 int highbyte_regs) 788 int byteop)
789{ 789{
790 void *p; 790 void *p;
791 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
791 792
792 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) 793 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
793 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; 794 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
@@ -1024,7 +1025,6 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1024 struct operand *op) 1025 struct operand *op)
1025{ 1026{
1026 unsigned reg = ctxt->modrm_reg; 1027 unsigned reg = ctxt->modrm_reg;
1027 int highbyte_regs = ctxt->rex_prefix == 0;
1028 1028
1029 if (!(ctxt->d & ModRM)) 1029 if (!(ctxt->d & ModRM))
1030 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); 1030 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
@@ -1045,13 +1045,9 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1045 } 1045 }
1046 1046
1047 op->type = OP_REG; 1047 op->type = OP_REG;
1048 if (ctxt->d & ByteOp) { 1048 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1049 op->addr.reg = decode_register(ctxt, reg, highbyte_regs); 1049 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1050 op->bytes = 1; 1050
1051 } else {
1052 op->addr.reg = decode_register(ctxt, reg, 0);
1053 op->bytes = ctxt->op_bytes;
1054 }
1055 fetch_register_operand(op); 1051 fetch_register_operand(op);
1056 op->orig_val = op->val; 1052 op->orig_val = op->val;
1057} 1053}
@@ -1082,12 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1082 ctxt->modrm_seg = VCPU_SREG_DS; 1078 ctxt->modrm_seg = VCPU_SREG_DS;
1083 1079
1084 if (ctxt->modrm_mod == 3) { 1080 if (ctxt->modrm_mod == 3) {
1085 int highbyte_regs = ctxt->rex_prefix == 0;
1086
1087 op->type = OP_REG; 1081 op->type = OP_REG;
1088 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1082 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1089 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1083 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1090 highbyte_regs && (ctxt->d & ByteOp)); 1084 ctxt->d & ByteOp);
1091 if (ctxt->d & Sse) { 1085 if (ctxt->d & Sse) {
1092 op->type = OP_XMM; 1086 op->type = OP_XMM;
1093 op->bytes = 16; 1087 op->bytes = 16;
@@ -2961,6 +2955,46 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
2961 return X86EMUL_CONTINUE; 2955 return X86EMUL_CONTINUE;
2962} 2956}
2963 2957
2958#define FFL(x) bit(X86_FEATURE_##x)
2959
2960static int em_movbe(struct x86_emulate_ctxt *ctxt)
2961{
2962 u32 ebx, ecx, edx, eax = 1;
2963 u16 tmp;
2964
2965 /*
2966 * Check MOVBE is set in the guest-visible CPUID leaf.
2967 */
2968 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2969 if (!(ecx & FFL(MOVBE)))
2970 return emulate_ud(ctxt);
2971
2972 switch (ctxt->op_bytes) {
2973 case 2:
2974 /*
2975 * From MOVBE definition: "...When the operand size is 16 bits,
2976 * the upper word of the destination register remains unchanged
2977 * ..."
2978 *
2979 * Both casting ->valptr and ->val to u16 breaks strict aliasing
2980 * rules so we have to do the operation almost per hand.
2981 */
2982 tmp = (u16)ctxt->src.val;
2983 ctxt->dst.val &= ~0xffffUL;
2984 ctxt->dst.val |= (unsigned long)swab16(tmp);
2985 break;
2986 case 4:
2987 ctxt->dst.val = swab32((u32)ctxt->src.val);
2988 break;
2989 case 8:
2990 ctxt->dst.val = swab64(ctxt->src.val);
2991 break;
2992 default:
2993 return X86EMUL_PROPAGATE_FAULT;
2994 }
2995 return X86EMUL_CONTINUE;
2996}
2997
2964static int em_cr_write(struct x86_emulate_ctxt *ctxt) 2998static int em_cr_write(struct x86_emulate_ctxt *ctxt)
2965{ 2999{
2966 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) 3000 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
@@ -3256,6 +3290,18 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3256 return X86EMUL_CONTINUE; 3290 return X86EMUL_CONTINUE;
3257} 3291}
3258 3292
3293static int em_sahf(struct x86_emulate_ctxt *ctxt)
3294{
3295 u32 flags;
3296
3297 flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
3298 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3299
3300 ctxt->eflags &= ~0xffUL;
3301 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3302 return X86EMUL_CONTINUE;
3303}
3304
3259static int em_lahf(struct x86_emulate_ctxt *ctxt) 3305static int em_lahf(struct x86_emulate_ctxt *ctxt)
3260{ 3306{
3261 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; 3307 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
@@ -3502,7 +3548,7 @@ static const struct opcode group7_rm1[] = {
3502 3548
3503static const struct opcode group7_rm3[] = { 3549static const struct opcode group7_rm3[] = {
3504 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), 3550 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
3505 II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), 3551 II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall),
3506 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), 3552 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
3507 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), 3553 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
3508 DIP(SrcNone | Prot | Priv, stgi, check_svme), 3554 DIP(SrcNone | Prot | Priv, stgi, check_svme),
@@ -3587,7 +3633,7 @@ static const struct group_dual group7 = { {
3587 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), 3633 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3588 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3634 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3589}, { 3635}, {
3590 I(SrcNone | Priv | VendorSpecific, em_vmcall), 3636 I(SrcNone | Priv | EmulateOnUD, em_vmcall),
3591 EXT(0, group7_rm1), 3637 EXT(0, group7_rm1),
3592 N, EXT(0, group7_rm3), 3638 N, EXT(0, group7_rm3),
3593 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, 3639 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3750,7 +3796,8 @@ static const struct opcode opcode_table[256] = {
3750 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), 3796 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
3751 I(SrcImmFAddr | No64, em_call_far), N, 3797 I(SrcImmFAddr | No64, em_call_far), N,
3752 II(ImplicitOps | Stack, em_pushf, pushf), 3798 II(ImplicitOps | Stack, em_pushf, pushf),
3753 II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), 3799 II(ImplicitOps | Stack, em_popf, popf),
3800 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
3754 /* 0xA0 - 0xA7 */ 3801 /* 0xA0 - 0xA7 */
3755 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3802 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3756 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 3803 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
@@ -3810,7 +3857,7 @@ static const struct opcode opcode_table[256] = {
3810static const struct opcode twobyte_table[256] = { 3857static const struct opcode twobyte_table[256] = {
3811 /* 0x00 - 0x0F */ 3858 /* 0x00 - 0x0F */
3812 G(0, group6), GD(0, &group7), N, N, 3859 G(0, group6), GD(0, &group7), N, N,
3813 N, I(ImplicitOps | VendorSpecific, em_syscall), 3860 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3814 II(ImplicitOps | Priv, em_clts, clts), N, 3861 II(ImplicitOps | Priv, em_clts, clts), N,
3815 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 3862 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3816 N, D(ImplicitOps | ModRM), N, N, 3863 N, D(ImplicitOps | ModRM), N, N,
@@ -3830,8 +3877,8 @@ static const struct opcode twobyte_table[256] = {
3830 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), 3877 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
3831 II(ImplicitOps | Priv, em_rdmsr, rdmsr), 3878 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
3832 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), 3879 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
3833 I(ImplicitOps | VendorSpecific, em_sysenter), 3880 I(ImplicitOps | EmulateOnUD, em_sysenter),
3834 I(ImplicitOps | Priv | VendorSpecific, em_sysexit), 3881 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
3835 N, N, 3882 N, N,
3836 N, N, N, N, N, N, N, N, 3883 N, N, N, N, N, N, N, N,
3837 /* 0x40 - 0x4F */ 3884 /* 0x40 - 0x4F */
@@ -3892,6 +3939,30 @@ static const struct opcode twobyte_table[256] = {
3892 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 3939 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
3893}; 3940};
3894 3941
3942static const struct gprefix three_byte_0f_38_f0 = {
3943 I(DstReg | SrcMem | Mov, em_movbe), N, N, N
3944};
3945
3946static const struct gprefix three_byte_0f_38_f1 = {
3947 I(DstMem | SrcReg | Mov, em_movbe), N, N, N
3948};
3949
3950/*
3951 * Insns below are selected by the prefix which indexed by the third opcode
3952 * byte.
3953 */
3954static const struct opcode opcode_map_0f_38[256] = {
3955 /* 0x00 - 0x7f */
3956 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
3957 /* 0x80 - 0xef */
3958 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
3959 /* 0xf0 - 0xf1 */
3960 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
3961 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
3962 /* 0xf2 - 0xff */
3963 N, N, X4(N), X8(N)
3964};
3965
3895#undef D 3966#undef D
3896#undef N 3967#undef N
3897#undef G 3968#undef G
@@ -4040,7 +4111,8 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4040 case OpMem8: 4111 case OpMem8:
4041 ctxt->memop.bytes = 1; 4112 ctxt->memop.bytes = 1;
4042 if (ctxt->memop.type == OP_REG) { 4113 if (ctxt->memop.type == OP_REG) {
4043 ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); 4114 ctxt->memop.addr.reg = decode_register(ctxt,
4115 ctxt->modrm_rm, true);
4044 fetch_register_operand(&ctxt->memop); 4116 fetch_register_operand(&ctxt->memop);
4045 } 4117 }
4046 goto mem_common; 4118 goto mem_common;
@@ -4126,6 +4198,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4126 ctxt->_eip = ctxt->eip; 4198 ctxt->_eip = ctxt->eip;
4127 ctxt->fetch.start = ctxt->_eip; 4199 ctxt->fetch.start = ctxt->_eip;
4128 ctxt->fetch.end = ctxt->fetch.start + insn_len; 4200 ctxt->fetch.end = ctxt->fetch.start + insn_len;
4201 ctxt->opcode_len = 1;
4129 if (insn_len > 0) 4202 if (insn_len > 0)
4130 memcpy(ctxt->fetch.data, insn, insn_len); 4203 memcpy(ctxt->fetch.data, insn, insn_len);
4131 4204
@@ -4208,9 +4281,16 @@ done_prefixes:
4208 opcode = opcode_table[ctxt->b]; 4281 opcode = opcode_table[ctxt->b];
4209 /* Two-byte opcode? */ 4282 /* Two-byte opcode? */
4210 if (ctxt->b == 0x0f) { 4283 if (ctxt->b == 0x0f) {
4211 ctxt->twobyte = 1; 4284 ctxt->opcode_len = 2;
4212 ctxt->b = insn_fetch(u8, ctxt); 4285 ctxt->b = insn_fetch(u8, ctxt);
4213 opcode = twobyte_table[ctxt->b]; 4286 opcode = twobyte_table[ctxt->b];
4287
4288 /* 0F_38 opcode map */
4289 if (ctxt->b == 0x38) {
4290 ctxt->opcode_len = 3;
4291 ctxt->b = insn_fetch(u8, ctxt);
4292 opcode = opcode_map_0f_38[ctxt->b];
4293 }
4214 } 4294 }
4215 ctxt->d = opcode.flags; 4295 ctxt->d = opcode.flags;
4216 4296
@@ -4267,7 +4347,7 @@ done_prefixes:
4267 if (ctxt->d == 0 || (ctxt->d & NotImpl)) 4347 if (ctxt->d == 0 || (ctxt->d & NotImpl))
4268 return EMULATION_FAILED; 4348 return EMULATION_FAILED;
4269 4349
4270 if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) 4350 if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
4271 return EMULATION_FAILED; 4351 return EMULATION_FAILED;
4272 4352
4273 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) 4353 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
@@ -4540,8 +4620,10 @@ special_insn:
4540 goto writeback; 4620 goto writeback;
4541 } 4621 }
4542 4622
4543 if (ctxt->twobyte) 4623 if (ctxt->opcode_len == 2)
4544 goto twobyte_insn; 4624 goto twobyte_insn;
4625 else if (ctxt->opcode_len == 3)
4626 goto threebyte_insn;
4545 4627
4546 switch (ctxt->b) { 4628 switch (ctxt->b) {
4547 case 0x63: /* movsxd */ 4629 case 0x63: /* movsxd */
@@ -4726,6 +4808,8 @@ twobyte_insn:
4726 goto cannot_emulate; 4808 goto cannot_emulate;
4727 } 4809 }
4728 4810
4811threebyte_insn:
4812
4729 if (rc != X86EMUL_CONTINUE) 4813 if (rc != X86EMUL_CONTINUE)
4730 goto done; 4814 goto done;
4731 4815
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dce0df8150df..40772ef0f2b1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2570 kvm_release_pfn_clean(pfn); 2570 kvm_release_pfn_clean(pfn);
2571} 2571}
2572 2572
2573static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
2574{
2575 mmu_free_roots(vcpu);
2576}
2577
2578static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, 2573static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
2579 bool no_dirty_log) 2574 bool no_dirty_log)
2580{ 2575{
@@ -3424,18 +3419,11 @@ out_unlock:
3424 return 0; 3419 return 0;
3425} 3420}
3426 3421
3427static void nonpaging_free(struct kvm_vcpu *vcpu) 3422static void nonpaging_init_context(struct kvm_vcpu *vcpu,
3428{ 3423 struct kvm_mmu *context)
3429 mmu_free_roots(vcpu);
3430}
3431
3432static int nonpaging_init_context(struct kvm_vcpu *vcpu,
3433 struct kvm_mmu *context)
3434{ 3424{
3435 context->new_cr3 = nonpaging_new_cr3;
3436 context->page_fault = nonpaging_page_fault; 3425 context->page_fault = nonpaging_page_fault;
3437 context->gva_to_gpa = nonpaging_gva_to_gpa; 3426 context->gva_to_gpa = nonpaging_gva_to_gpa;
3438 context->free = nonpaging_free;
3439 context->sync_page = nonpaging_sync_page; 3427 context->sync_page = nonpaging_sync_page;
3440 context->invlpg = nonpaging_invlpg; 3428 context->invlpg = nonpaging_invlpg;
3441 context->update_pte = nonpaging_update_pte; 3429 context->update_pte = nonpaging_update_pte;
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
3444 context->root_hpa = INVALID_PAGE; 3432 context->root_hpa = INVALID_PAGE;
3445 context->direct_map = true; 3433 context->direct_map = true;
3446 context->nx = false; 3434 context->nx = false;
3447 return 0;
3448} 3435}
3449 3436
3450void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 3437void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
3454} 3441}
3455EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); 3442EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
3456 3443
3457static void paging_new_cr3(struct kvm_vcpu *vcpu) 3444void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
3458{ 3445{
3459 pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
3460 mmu_free_roots(vcpu); 3446 mmu_free_roots(vcpu);
3461} 3447}
3462 3448
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
3471 vcpu->arch.mmu.inject_page_fault(vcpu, fault); 3457 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
3472} 3458}
3473 3459
3474static void paging_free(struct kvm_vcpu *vcpu)
3475{
3476 nonpaging_free(vcpu);
3477}
3478
3479static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, 3460static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
3480 unsigned access, int *nr_present) 3461 unsigned access, int *nr_present)
3481{ 3462{
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3665 mmu->last_pte_bitmap = map; 3646 mmu->last_pte_bitmap = map;
3666} 3647}
3667 3648
3668static int paging64_init_context_common(struct kvm_vcpu *vcpu, 3649static void paging64_init_context_common(struct kvm_vcpu *vcpu,
3669 struct kvm_mmu *context, 3650 struct kvm_mmu *context,
3670 int level) 3651 int level)
3671{ 3652{
3672 context->nx = is_nx(vcpu); 3653 context->nx = is_nx(vcpu);
3673 context->root_level = level; 3654 context->root_level = level;
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3677 update_last_pte_bitmap(vcpu, context); 3658 update_last_pte_bitmap(vcpu, context);
3678 3659
3679 ASSERT(is_pae(vcpu)); 3660 ASSERT(is_pae(vcpu));
3680 context->new_cr3 = paging_new_cr3;
3681 context->page_fault = paging64_page_fault; 3661 context->page_fault = paging64_page_fault;
3682 context->gva_to_gpa = paging64_gva_to_gpa; 3662 context->gva_to_gpa = paging64_gva_to_gpa;
3683 context->sync_page = paging64_sync_page; 3663 context->sync_page = paging64_sync_page;
3684 context->invlpg = paging64_invlpg; 3664 context->invlpg = paging64_invlpg;
3685 context->update_pte = paging64_update_pte; 3665 context->update_pte = paging64_update_pte;
3686 context->free = paging_free;
3687 context->shadow_root_level = level; 3666 context->shadow_root_level = level;
3688 context->root_hpa = INVALID_PAGE; 3667 context->root_hpa = INVALID_PAGE;
3689 context->direct_map = false; 3668 context->direct_map = false;
3690 return 0;
3691} 3669}
3692 3670
3693static int paging64_init_context(struct kvm_vcpu *vcpu, 3671static void paging64_init_context(struct kvm_vcpu *vcpu,
3694 struct kvm_mmu *context) 3672 struct kvm_mmu *context)
3695{ 3673{
3696 return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); 3674 paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
3697} 3675}
3698 3676
3699static int paging32_init_context(struct kvm_vcpu *vcpu, 3677static void paging32_init_context(struct kvm_vcpu *vcpu,
3700 struct kvm_mmu *context) 3678 struct kvm_mmu *context)
3701{ 3679{
3702 context->nx = false; 3680 context->nx = false;
3703 context->root_level = PT32_ROOT_LEVEL; 3681 context->root_level = PT32_ROOT_LEVEL;
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3706 update_permission_bitmask(vcpu, context, false); 3684 update_permission_bitmask(vcpu, context, false);
3707 update_last_pte_bitmap(vcpu, context); 3685 update_last_pte_bitmap(vcpu, context);
3708 3686
3709 context->new_cr3 = paging_new_cr3;
3710 context->page_fault = paging32_page_fault; 3687 context->page_fault = paging32_page_fault;
3711 context->gva_to_gpa = paging32_gva_to_gpa; 3688 context->gva_to_gpa = paging32_gva_to_gpa;
3712 context->free = paging_free;
3713 context->sync_page = paging32_sync_page; 3689 context->sync_page = paging32_sync_page;
3714 context->invlpg = paging32_invlpg; 3690 context->invlpg = paging32_invlpg;
3715 context->update_pte = paging32_update_pte; 3691 context->update_pte = paging32_update_pte;
3716 context->shadow_root_level = PT32E_ROOT_LEVEL; 3692 context->shadow_root_level = PT32E_ROOT_LEVEL;
3717 context->root_hpa = INVALID_PAGE; 3693 context->root_hpa = INVALID_PAGE;
3718 context->direct_map = false; 3694 context->direct_map = false;
3719 return 0;
3720} 3695}
3721 3696
3722static int paging32E_init_context(struct kvm_vcpu *vcpu, 3697static void paging32E_init_context(struct kvm_vcpu *vcpu,
3723 struct kvm_mmu *context) 3698 struct kvm_mmu *context)
3724{ 3699{
3725 return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); 3700 paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
3726} 3701}
3727 3702
3728static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) 3703static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3729{ 3704{
3730 struct kvm_mmu *context = vcpu->arch.walk_mmu; 3705 struct kvm_mmu *context = vcpu->arch.walk_mmu;
3731 3706
3732 context->base_role.word = 0; 3707 context->base_role.word = 0;
3733 context->new_cr3 = nonpaging_new_cr3;
3734 context->page_fault = tdp_page_fault; 3708 context->page_fault = tdp_page_fault;
3735 context->free = nonpaging_free;
3736 context->sync_page = nonpaging_sync_page; 3709 context->sync_page = nonpaging_sync_page;
3737 context->invlpg = nonpaging_invlpg; 3710 context->invlpg = nonpaging_invlpg;
3738 context->update_pte = nonpaging_update_pte; 3711 context->update_pte = nonpaging_update_pte;
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3767 3740
3768 update_permission_bitmask(vcpu, context, false); 3741 update_permission_bitmask(vcpu, context, false);
3769 update_last_pte_bitmap(vcpu, context); 3742 update_last_pte_bitmap(vcpu, context);
3770
3771 return 0;
3772} 3743}
3773 3744
3774int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3745void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3775{ 3746{
3776 int r;
3777 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3747 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3778 ASSERT(vcpu); 3748 ASSERT(vcpu);
3779 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3749 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
3780 3750
3781 if (!is_paging(vcpu)) 3751 if (!is_paging(vcpu))
3782 r = nonpaging_init_context(vcpu, context); 3752 nonpaging_init_context(vcpu, context);
3783 else if (is_long_mode(vcpu)) 3753 else if (is_long_mode(vcpu))
3784 r = paging64_init_context(vcpu, context); 3754 paging64_init_context(vcpu, context);
3785 else if (is_pae(vcpu)) 3755 else if (is_pae(vcpu))
3786 r = paging32E_init_context(vcpu, context); 3756 paging32E_init_context(vcpu, context);
3787 else 3757 else
3788 r = paging32_init_context(vcpu, context); 3758 paging32_init_context(vcpu, context);
3789 3759
3790 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); 3760 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
3791 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3761 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
3792 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3762 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
3793 vcpu->arch.mmu.base_role.smep_andnot_wp 3763 vcpu->arch.mmu.base_role.smep_andnot_wp
3794 = smep && !is_write_protection(vcpu); 3764 = smep && !is_write_protection(vcpu);
3795
3796 return r;
3797} 3765}
3798EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3766EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
3799 3767
3800int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 3768void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
3801 bool execonly) 3769 bool execonly)
3802{ 3770{
3803 ASSERT(vcpu); 3771 ASSERT(vcpu);
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
3806 context->shadow_root_level = kvm_x86_ops->get_tdp_level(); 3774 context->shadow_root_level = kvm_x86_ops->get_tdp_level();
3807 3775
3808 context->nx = true; 3776 context->nx = true;
3809 context->new_cr3 = paging_new_cr3;
3810 context->page_fault = ept_page_fault; 3777 context->page_fault = ept_page_fault;
3811 context->gva_to_gpa = ept_gva_to_gpa; 3778 context->gva_to_gpa = ept_gva_to_gpa;
3812 context->sync_page = ept_sync_page; 3779 context->sync_page = ept_sync_page;
3813 context->invlpg = ept_invlpg; 3780 context->invlpg = ept_invlpg;
3814 context->update_pte = ept_update_pte; 3781 context->update_pte = ept_update_pte;
3815 context->free = paging_free;
3816 context->root_level = context->shadow_root_level; 3782 context->root_level = context->shadow_root_level;
3817 context->root_hpa = INVALID_PAGE; 3783 context->root_hpa = INVALID_PAGE;
3818 context->direct_map = false; 3784 context->direct_map = false;
3819 3785
3820 update_permission_bitmask(vcpu, context, true); 3786 update_permission_bitmask(vcpu, context, true);
3821 reset_rsvds_bits_mask_ept(vcpu, context, execonly); 3787 reset_rsvds_bits_mask_ept(vcpu, context, execonly);
3822
3823 return 0;
3824} 3788}
3825EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); 3789EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
3826 3790
3827static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 3791static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
3828{ 3792{
3829 int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); 3793 kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
3830
3831 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; 3794 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
3832 vcpu->arch.walk_mmu->get_cr3 = get_cr3; 3795 vcpu->arch.walk_mmu->get_cr3 = get_cr3;
3833 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; 3796 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
3834 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 3797 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
3835
3836 return r;
3837} 3798}
3838 3799
3839static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 3800static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3840{ 3801{
3841 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; 3802 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
3842 3803
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3873 3834
3874 update_permission_bitmask(vcpu, g_context, false); 3835 update_permission_bitmask(vcpu, g_context, false);
3875 update_last_pte_bitmap(vcpu, g_context); 3836 update_last_pte_bitmap(vcpu, g_context);
3876
3877 return 0;
3878} 3837}
3879 3838
3880static int init_kvm_mmu(struct kvm_vcpu *vcpu) 3839static void init_kvm_mmu(struct kvm_vcpu *vcpu)
3881{ 3840{
3882 if (mmu_is_nested(vcpu)) 3841 if (mmu_is_nested(vcpu))
3883 return init_kvm_nested_mmu(vcpu); 3842 return init_kvm_nested_mmu(vcpu);
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
3887 return init_kvm_softmmu(vcpu); 3846 return init_kvm_softmmu(vcpu);
3888} 3847}
3889 3848
3890static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 3849void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
3891{ 3850{
3892 ASSERT(vcpu); 3851 ASSERT(vcpu);
3893 if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
3894 /* mmu.free() should set root_hpa = INVALID_PAGE */
3895 vcpu->arch.mmu.free(vcpu);
3896}
3897 3852
3898int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 3853 kvm_mmu_unload(vcpu);
3899{ 3854 init_kvm_mmu(vcpu);
3900 destroy_kvm_mmu(vcpu);
3901 return init_kvm_mmu(vcpu);
3902} 3855}
3903EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); 3856EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
3904 3857
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
3923void kvm_mmu_unload(struct kvm_vcpu *vcpu) 3876void kvm_mmu_unload(struct kvm_vcpu *vcpu)
3924{ 3877{
3925 mmu_free_roots(vcpu); 3878 mmu_free_roots(vcpu);
3879 WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
3926} 3880}
3927EXPORT_SYMBOL_GPL(kvm_mmu_unload); 3881EXPORT_SYMBOL_GPL(kvm_mmu_unload);
3928 3882
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
4281 return alloc_mmu_pages(vcpu); 4235 return alloc_mmu_pages(vcpu);
4282} 4236}
4283 4237
4284int kvm_mmu_setup(struct kvm_vcpu *vcpu) 4238void kvm_mmu_setup(struct kvm_vcpu *vcpu)
4285{ 4239{
4286 ASSERT(vcpu); 4240 ASSERT(vcpu);
4287 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 4241 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
4288 4242
4289 return init_kvm_mmu(vcpu); 4243 init_kvm_mmu(vcpu);
4290} 4244}
4291 4245
4292void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 4246void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
4428 int nr_to_scan = sc->nr_to_scan; 4382 int nr_to_scan = sc->nr_to_scan;
4429 unsigned long freed = 0; 4383 unsigned long freed = 0;
4430 4384
4431 raw_spin_lock(&kvm_lock); 4385 spin_lock(&kvm_lock);
4432 4386
4433 list_for_each_entry(kvm, &vm_list, vm_list) { 4387 list_for_each_entry(kvm, &vm_list, vm_list) {
4434 int idx; 4388 int idx;
@@ -4478,9 +4432,8 @@ unlock:
4478 break; 4432 break;
4479 } 4433 }
4480 4434
4481 raw_spin_unlock(&kvm_lock); 4435 spin_unlock(&kvm_lock);
4482 return freed; 4436 return freed;
4483
4484} 4437}
4485 4438
4486static unsigned long 4439static unsigned long
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
4574{ 4527{
4575 ASSERT(vcpu); 4528 ASSERT(vcpu);
4576 4529
4577 destroy_kvm_mmu(vcpu); 4530 kvm_mmu_unload(vcpu);
4578 free_mmu_pages(vcpu); 4531 free_mmu_pages(vcpu);
4579 mmu_free_memory_caches(vcpu); 4532 mmu_free_memory_caches(vcpu);
4580} 4533}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 77e044a0f5f7..292615274358 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -70,8 +70,8 @@ enum {
70}; 70};
71 71
72int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 72int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
73int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 73void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
74int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 74void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
75 bool execonly); 75 bool execonly);
76 76
77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) 77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c0bc80391e40..c7168a5cff1b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1959 nested_svm_vmexit(svm); 1959 nested_svm_vmexit(svm);
1960} 1960}
1961 1961
1962static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1962static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1963{ 1963{
1964 int r; 1964 kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1965
1966 r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1967 1965
1968 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 1966 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
1969 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; 1967 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1971 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 1969 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1972 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 1970 vcpu->arch.mmu.shadow_root_level = get_npt_level();
1973 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 1971 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
1974
1975 return r;
1976} 1972}
1977 1973
1978static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 1974static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2b2fce1b2009..b2fe1c252f35 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1498,7 +1498,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1498 break; 1498 break;
1499 1499
1500 if (i == NR_AUTOLOAD_MSRS) { 1500 if (i == NR_AUTOLOAD_MSRS) {
1501 printk_once(KERN_WARNING"Not enough mst switch entries. " 1501 printk_once(KERN_WARNING "Not enough msr switch entries. "
1502 "Can't add msr %x\n", msr); 1502 "Can't add msr %x\n", msr);
1503 return; 1503 return;
1504 } else if (i == m->nr) { 1504 } else if (i == m->nr) {
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
1898/* 1898/*
1899 * KVM wants to inject page-faults which it got to the guest. This function 1899 * KVM wants to inject page-faults which it got to the guest. This function
1900 * checks whether in a nested guest, we need to inject them to L1 or L2. 1900 * checks whether in a nested guest, we need to inject them to L1 or L2.
1901 * This function assumes it is called with the exit reason in vmcs02 being
1902 * a #PF exception (this is the only case in which KVM injects a #PF when L2
1903 * is running).
1904 */ 1901 */
1905static int nested_pf_handled(struct kvm_vcpu *vcpu) 1902static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906{ 1903{
1907 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 1904 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1908 1905
1909 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 1906 if (!(vmcs12->exception_bitmap & (1u << nr)))
1910 if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
1911 return 0; 1907 return 0;
1912 1908
1913 nested_vmx_vmexit(vcpu); 1909 nested_vmx_vmexit(vcpu);
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1921 struct vcpu_vmx *vmx = to_vmx(vcpu); 1917 struct vcpu_vmx *vmx = to_vmx(vcpu);
1922 u32 intr_info = nr | INTR_INFO_VALID_MASK; 1918 u32 intr_info = nr | INTR_INFO_VALID_MASK;
1923 1919
1924 if (nr == PF_VECTOR && is_guest_mode(vcpu) && 1920 if (!reinject && is_guest_mode(vcpu) &&
1925 !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) 1921 nested_vmx_check_exception(vcpu, nr))
1926 return; 1922 return;
1927 1923
1928 if (has_error_code) { 1924 if (has_error_code) {
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2204#ifdef CONFIG_X86_64 2200#ifdef CONFIG_X86_64
2205 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2201 VM_EXIT_HOST_ADDR_SPACE_SIZE |
2206#endif 2202#endif
2207 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; 2203 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
2204 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2205 if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
2206 !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
2207 nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2208 nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2209 }
2208 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 2210 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2209 VM_EXIT_LOAD_IA32_EFER); 2211 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
2210 2212
2211 /* entry controls */ 2213 /* entry controls */
2212 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 2214 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2226,7 +2228,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2226 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); 2228 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
2227 nested_vmx_procbased_ctls_low = 0; 2229 nested_vmx_procbased_ctls_low = 0;
2228 nested_vmx_procbased_ctls_high &= 2230 nested_vmx_procbased_ctls_high &=
2229 CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2231 CPU_BASED_VIRTUAL_INTR_PENDING |
2232 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
2230 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | 2233 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
2231 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | 2234 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
2232 CPU_BASED_CR3_STORE_EXITING | 2235 CPU_BASED_CR3_STORE_EXITING |
@@ -2252,13 +2255,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2252 nested_vmx_secondary_ctls_low = 0; 2255 nested_vmx_secondary_ctls_low = 0;
2253 nested_vmx_secondary_ctls_high &= 2256 nested_vmx_secondary_ctls_high &=
2254 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2257 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2258 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2255 SECONDARY_EXEC_WBINVD_EXITING; 2259 SECONDARY_EXEC_WBINVD_EXITING;
2256 2260
2257 if (enable_ept) { 2261 if (enable_ept) {
2258 /* nested EPT: emulate EPT also to L1 */ 2262 /* nested EPT: emulate EPT also to L1 */
2259 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2263 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
2260 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2264 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
2261 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; 2265 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
2266 VMX_EPT_INVEPT_BIT;
2262 nested_vmx_ept_caps &= vmx_capability.ept; 2267 nested_vmx_ept_caps &= vmx_capability.ept;
2263 /* 2268 /*
2264 * Since invept is completely emulated we support both global 2269 * Since invept is completely emulated we support both global
@@ -3380,8 +3385,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
3380 if (enable_ept) { 3385 if (enable_ept) {
3381 eptp = construct_eptp(cr3); 3386 eptp = construct_eptp(cr3);
3382 vmcs_write64(EPT_POINTER, eptp); 3387 vmcs_write64(EPT_POINTER, eptp);
3383 guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : 3388 if (is_paging(vcpu) || is_guest_mode(vcpu))
3384 vcpu->kvm->arch.ept_identity_map_addr; 3389 guest_cr3 = kvm_read_cr3(vcpu);
3390 else
3391 guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
3385 ept_load_pdptrs(vcpu); 3392 ept_load_pdptrs(vcpu);
3386 } 3393 }
3387 3394
@@ -4879,6 +4886,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4879 hypercall[2] = 0xc1; 4886 hypercall[2] = 0xc1;
4880} 4887}
4881 4888
4889static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
4890{
4891 unsigned long always_on = VMXON_CR0_ALWAYSON;
4892
4893 if (nested_vmx_secondary_ctls_high &
4894 SECONDARY_EXEC_UNRESTRICTED_GUEST &&
4895 nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
4896 always_on &= ~(X86_CR0_PE | X86_CR0_PG);
4897 return (val & always_on) == always_on;
4898}
4899
4882/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ 4900/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
4883static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 4901static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4884{ 4902{
@@ -4897,9 +4915,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4897 val = (val & ~vmcs12->cr0_guest_host_mask) | 4915 val = (val & ~vmcs12->cr0_guest_host_mask) |
4898 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); 4916 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
4899 4917
4900 /* TODO: will have to take unrestricted guest mode into 4918 if (!nested_cr0_valid(vmcs12, val))
4901 * account */
4902 if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
4903 return 1; 4919 return 1;
4904 4920
4905 if (kvm_set_cr0(vcpu, val)) 4921 if (kvm_set_cr0(vcpu, val))
@@ -6627,6 +6643,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6627 return 0; 6643 return 0;
6628 else if (is_page_fault(intr_info)) 6644 else if (is_page_fault(intr_info))
6629 return enable_ept; 6645 return enable_ept;
6646 else if (is_no_device(intr_info) &&
6647 !(nested_read_cr0(vmcs12) & X86_CR0_TS))
6648 return 0;
6630 return vmcs12->exception_bitmap & 6649 return vmcs12->exception_bitmap &
6631 (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 6650 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
6632 case EXIT_REASON_EXTERNAL_INTERRUPT: 6651 case EXIT_REASON_EXTERNAL_INTERRUPT:
@@ -6722,6 +6741,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6722 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 6741 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6723} 6742}
6724 6743
6744static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
6745{
6746 u64 delta_tsc_l1;
6747 u32 preempt_val_l1, preempt_val_l2, preempt_scale;
6748
6749 if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
6750 PIN_BASED_VMX_PREEMPTION_TIMER))
6751 return;
6752 preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
6753 MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
6754 preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
6755 delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
6756 - vcpu->arch.last_guest_tsc;
6757 preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
6758 if (preempt_val_l2 <= preempt_val_l1)
6759 preempt_val_l2 = 0;
6760 else
6761 preempt_val_l2 -= preempt_val_l1;
6762 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
6763}
6764
6725/* 6765/*
6726 * The guest has exited. See if we can fix it or if we need userspace 6766 * The guest has exited. See if we can fix it or if we need userspace
6727 * assistance. 6767 * assistance.
@@ -6736,20 +6776,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6736 if (vmx->emulation_required) 6776 if (vmx->emulation_required)
6737 return handle_invalid_guest_state(vcpu); 6777 return handle_invalid_guest_state(vcpu);
6738 6778
6739 /*
6740 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
6741 * we did not inject a still-pending event to L1 now because of
6742 * nested_run_pending, we need to re-enable this bit.
6743 */
6744 if (vmx->nested.nested_run_pending)
6745 kvm_make_request(KVM_REQ_EVENT, vcpu);
6746
6747 if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
6748 exit_reason == EXIT_REASON_VMRESUME))
6749 vmx->nested.nested_run_pending = 1;
6750 else
6751 vmx->nested.nested_run_pending = 0;
6752
6753 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6754 nested_vmx_vmexit(vcpu); 6780 nested_vmx_vmexit(vcpu);
6755 return 1; 6781 return 1;
@@ -7061,9 +7087,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
7061 case INTR_TYPE_HARD_EXCEPTION: 7087 case INTR_TYPE_HARD_EXCEPTION:
7062 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 7088 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
7063 u32 err = vmcs_read32(error_code_field); 7089 u32 err = vmcs_read32(error_code_field);
7064 kvm_queue_exception_e(vcpu, vector, err); 7090 kvm_requeue_exception_e(vcpu, vector, err);
7065 } else 7091 } else
7066 kvm_queue_exception(vcpu, vector); 7092 kvm_requeue_exception(vcpu, vector);
7067 break; 7093 break;
7068 case INTR_TYPE_SOFT_INTR: 7094 case INTR_TYPE_SOFT_INTR:
7069 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); 7095 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@ -7146,6 +7172,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7146 atomic_switch_perf_msrs(vmx); 7172 atomic_switch_perf_msrs(vmx);
7147 debugctlmsr = get_debugctlmsr(); 7173 debugctlmsr = get_debugctlmsr();
7148 7174
7175 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
7176 nested_adjust_preemption_timer(vcpu);
7149 vmx->__launched = vmx->loaded_vmcs->launched; 7177 vmx->__launched = vmx->loaded_vmcs->launched;
7150 asm( 7178 asm(
7151 /* Store host registers */ 7179 /* Store host registers */
@@ -7284,6 +7312,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7284 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 7312 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
7285 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); 7313 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
7286 7314
7315 /*
7316 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
7317 * we did not inject a still-pending event to L1 now because of
7318 * nested_run_pending, we need to re-enable this bit.
7319 */
7320 if (vmx->nested.nested_run_pending)
7321 kvm_make_request(KVM_REQ_EVENT, vcpu);
7322
7323 vmx->nested.nested_run_pending = 0;
7324
7287 vmx_complete_atomic_exit(vmx); 7325 vmx_complete_atomic_exit(vmx);
7288 vmx_recover_nmi_blocking(vmx); 7326 vmx_recover_nmi_blocking(vmx);
7289 vmx_complete_interrupts(vmx); 7327 vmx_complete_interrupts(vmx);
@@ -7410,8 +7448,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
7410 */ 7448 */
7411 if (is_mmio) 7449 if (is_mmio)
7412 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; 7450 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
7413 else if (vcpu->kvm->arch.iommu_domain && 7451 else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
7414 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
7415 ret = kvm_get_guest_memory_type(vcpu, gfn) << 7452 ret = kvm_get_guest_memory_type(vcpu, gfn) <<
7416 VMX_EPT_MT_EPTE_SHIFT; 7453 VMX_EPT_MT_EPTE_SHIFT;
7417 else 7454 else
@@ -7501,9 +7538,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
7501 return get_vmcs12(vcpu)->ept_pointer; 7538 return get_vmcs12(vcpu)->ept_pointer;
7502} 7539}
7503 7540
7504static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 7541static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
7505{ 7542{
7506 int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, 7543 kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
7507 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); 7544 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
7508 7545
7509 vcpu->arch.mmu.set_cr3 = vmx_set_cr3; 7546 vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
@@ -7511,8 +7548,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
7511 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; 7548 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
7512 7549
7513 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 7550 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
7514
7515 return r;
7516} 7551}
7517 7552
7518static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 7553static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -7520,6 +7555,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
7520 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 7555 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
7521} 7556}
7522 7557
7558static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7559 struct x86_exception *fault)
7560{
7561 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7562
7563 WARN_ON(!is_guest_mode(vcpu));
7564
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu);
7568 else
7569 kvm_inject_page_fault(vcpu, fault);
7570}
7571
7523/* 7572/*
7524 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 7573 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
7525 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 7574 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7533,6 +7582,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7533{ 7582{
7534 struct vcpu_vmx *vmx = to_vmx(vcpu); 7583 struct vcpu_vmx *vmx = to_vmx(vcpu);
7535 u32 exec_control; 7584 u32 exec_control;
7585 u32 exit_control;
7536 7586
7537 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 7587 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
7538 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 7588 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7706,7 +7756,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 7756 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
7707 * bits are further modified by vmx_set_efer() below. 7757 * bits are further modified by vmx_set_efer() below.
7708 */ 7758 */
7709 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 7759 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control);
7710 7763
7711 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7712 * emulated by vmx_set_efer(), below. 7765 * emulated by vmx_set_efer(), below.
@@ -7773,6 +7826,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7773 kvm_set_cr3(vcpu, vmcs12->guest_cr3); 7826 kvm_set_cr3(vcpu, vmcs12->guest_cr3);
7774 kvm_mmu_reset_context(vcpu); 7827 kvm_mmu_reset_context(vcpu);
7775 7828
7829 if (!enable_ept)
7830 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
7831
7776 /* 7832 /*
7777 * L1 may access the L2's PDPTR, so save them to construct vmcs12 7833 * L1 may access the L2's PDPTR, so save them to construct vmcs12
7778 */ 7834 */
@@ -7876,7 +7932,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7876 return 1; 7932 return 1;
7877 } 7933 }
7878 7934
7879 if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || 7935 if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
7880 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { 7936 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
7881 nested_vmx_entry_failure(vcpu, vmcs12, 7937 nested_vmx_entry_failure(vcpu, vmcs12,
7882 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); 7938 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@ -7938,6 +7994,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7938 7994
7939 enter_guest_mode(vcpu); 7995 enter_guest_mode(vcpu);
7940 7996
7997 vmx->nested.nested_run_pending = 1;
7998
7941 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
7942 8000
7943 cpu = get_cpu(); 8001 cpu = get_cpu();
@@ -8005,7 +8063,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8005 u32 idt_vectoring; 8063 u32 idt_vectoring;
8006 unsigned int nr; 8064 unsigned int nr;
8007 8065
8008 if (vcpu->arch.exception.pending) { 8066 if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
8009 nr = vcpu->arch.exception.nr; 8067 nr = vcpu->arch.exception.nr;
8010 idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 8068 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
8011 8069
@@ -8023,7 +8081,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8023 } 8081 }
8024 8082
8025 vmcs12->idt_vectoring_info_field = idt_vectoring; 8083 vmcs12->idt_vectoring_info_field = idt_vectoring;
8026 } else if (vcpu->arch.nmi_pending) { 8084 } else if (vcpu->arch.nmi_injected) {
8027 vmcs12->idt_vectoring_info_field = 8085 vmcs12->idt_vectoring_info_field =
8028 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; 8086 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
8029 } else if (vcpu->arch.interrupt.pending) { 8087 } else if (vcpu->arch.interrupt.pending) {
@@ -8105,6 +8163,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8105 vmcs12->guest_pending_dbg_exceptions = 8163 vmcs12->guest_pending_dbg_exceptions =
8106 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8107 8165
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
8168 vmcs12->vmx_preemption_timer_value =
8169 vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
8170
8108 /* 8171 /*
8109 * In some cases (usually, nested EPT), L2 is allowed to change its 8172 * In some cases (usually, nested EPT), L2 is allowed to change its
8110 * own CR3 without exiting. If it has changed it, we must keep it. 8173 * own CR3 without exiting. If it has changed it, we must keep it.
@@ -8130,6 +8193,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8130 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 8193 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8131 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 8194 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
8132 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 8195 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
8196 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
8197 vmcs12->guest_ia32_efer = vcpu->arch.efer;
8133 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 8198 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
8134 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 8199 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
8135 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 8200 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@ -8201,7 +8266,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8201 * fpu_active (which may have changed). 8266 * fpu_active (which may have changed).
8202 * Note that vmx_set_cr0 refers to efer set above. 8267 * Note that vmx_set_cr0 refers to efer set above.
8203 */ 8268 */
8204 kvm_set_cr0(vcpu, vmcs12->host_cr0); 8269 vmx_set_cr0(vcpu, vmcs12->host_cr0);
8205 /* 8270 /*
8206 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need 8271 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
8207 * to apply the same changes to L1's vmcs. We just set cr0 correctly, 8272 * to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -8224,6 +8289,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8224 kvm_set_cr3(vcpu, vmcs12->host_cr3); 8289 kvm_set_cr3(vcpu, vmcs12->host_cr3);
8225 kvm_mmu_reset_context(vcpu); 8290 kvm_mmu_reset_context(vcpu);
8226 8291
8292 if (!enable_ept)
8293 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
8294
8227 if (enable_vpid) { 8295 if (enable_vpid) {
8228 /* 8296 /*
8229 * Trivially support vpid by letting L2s share their parent 8297 * Trivially support vpid by letting L2s share their parent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a5cdb6..21ef1ba184ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -577,6 +577,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
578{ 578{
579 u64 xcr0; 579 u64 xcr0;
580 u64 valid_bits;
580 581
581 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 582 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
582 if (index != XCR_XFEATURE_ENABLED_MASK) 583 if (index != XCR_XFEATURE_ENABLED_MASK)
@@ -586,8 +587,16 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
586 return 1; 587 return 1;
587 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 588 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
588 return 1; 589 return 1;
589 if (xcr0 & ~host_xcr0) 590
591 /*
592 * Do not allow the guest to set bits that we do not support
593 * saving. However, xcr0 bit 0 is always set, even if the
594 * emulated CPU does not support XSAVE (see fx_init).
595 */
596 valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
597 if (xcr0 & ~valid_bits)
590 return 1; 598 return 1;
599
591 kvm_put_guest_xcr0(vcpu); 600 kvm_put_guest_xcr0(vcpu);
592 vcpu->arch.xcr0 = xcr0; 601 vcpu->arch.xcr0 = xcr0;
593 return 0; 602 return 0;
@@ -684,7 +693,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
684 693
685 vcpu->arch.cr3 = cr3; 694 vcpu->arch.cr3 = cr3;
686 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); 695 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
687 vcpu->arch.mmu.new_cr3(vcpu); 696 kvm_mmu_new_cr3(vcpu);
688 return 0; 697 return 0;
689} 698}
690EXPORT_SYMBOL_GPL(kvm_set_cr3); 699EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2564,6 +2573,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2564 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 2573 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2565 case KVM_CAP_SET_TSS_ADDR: 2574 case KVM_CAP_SET_TSS_ADDR:
2566 case KVM_CAP_EXT_CPUID: 2575 case KVM_CAP_EXT_CPUID:
2576 case KVM_CAP_EXT_EMUL_CPUID:
2567 case KVM_CAP_CLOCKSOURCE: 2577 case KVM_CAP_CLOCKSOURCE:
2568 case KVM_CAP_PIT: 2578 case KVM_CAP_PIT:
2569 case KVM_CAP_NOP_IO_DELAY: 2579 case KVM_CAP_NOP_IO_DELAY:
@@ -2673,15 +2683,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
2673 r = 0; 2683 r = 0;
2674 break; 2684 break;
2675 } 2685 }
2676 case KVM_GET_SUPPORTED_CPUID: { 2686 case KVM_GET_SUPPORTED_CPUID:
2687 case KVM_GET_EMULATED_CPUID: {
2677 struct kvm_cpuid2 __user *cpuid_arg = argp; 2688 struct kvm_cpuid2 __user *cpuid_arg = argp;
2678 struct kvm_cpuid2 cpuid; 2689 struct kvm_cpuid2 cpuid;
2679 2690
2680 r = -EFAULT; 2691 r = -EFAULT;
2681 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2692 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2682 goto out; 2693 goto out;
2683 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 2694
2684 cpuid_arg->entries); 2695 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
2696 ioctl);
2685 if (r) 2697 if (r)
2686 goto out; 2698 goto out;
2687 2699
@@ -2715,8 +2727,7 @@ static void wbinvd_ipi(void *garbage)
2715 2727
2716static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) 2728static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2717{ 2729{
2718 return vcpu->kvm->arch.iommu_domain && 2730 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
2719 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2720} 2731}
2721 2732
2722void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2733void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -2984,11 +2995,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2984static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, 2995static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2985 struct kvm_xsave *guest_xsave) 2996 struct kvm_xsave *guest_xsave)
2986{ 2997{
2987 if (cpu_has_xsave) 2998 if (cpu_has_xsave) {
2988 memcpy(guest_xsave->region, 2999 memcpy(guest_xsave->region,
2989 &vcpu->arch.guest_fpu.state->xsave, 3000 &vcpu->arch.guest_fpu.state->xsave,
2990 xstate_size); 3001 vcpu->arch.guest_xstate_size);
2991 else { 3002 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
3003 vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
3004 } else {
2992 memcpy(guest_xsave->region, 3005 memcpy(guest_xsave->region,
2993 &vcpu->arch.guest_fpu.state->fxsave, 3006 &vcpu->arch.guest_fpu.state->fxsave,
2994 sizeof(struct i387_fxsave_struct)); 3007 sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3016,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3003 u64 xstate_bv = 3016 u64 xstate_bv =
3004 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; 3017 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3005 3018
3006 if (cpu_has_xsave) 3019 if (cpu_has_xsave) {
3020 /*
3021 * Here we allow setting states that are not present in
3022 * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
3023 * with old userspace.
3024 */
3025 if (xstate_bv & ~KVM_SUPPORTED_XCR0)
3026 return -EINVAL;
3027 if (xstate_bv & ~host_xcr0)
3028 return -EINVAL;
3007 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3029 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3008 guest_xsave->region, xstate_size); 3030 guest_xsave->region, vcpu->arch.guest_xstate_size);
3009 else { 3031 } else {
3010 if (xstate_bv & ~XSTATE_FPSSE) 3032 if (xstate_bv & ~XSTATE_FPSSE)
3011 return -EINVAL; 3033 return -EINVAL;
3012 memcpy(&vcpu->arch.guest_fpu.state->fxsave, 3034 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -3042,9 +3064,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3042 3064
3043 for (i = 0; i < guest_xcrs->nr_xcrs; i++) 3065 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3044 /* Only support XCR0 currently */ 3066 /* Only support XCR0 currently */
3045 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { 3067 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3046 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, 3068 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3047 guest_xcrs->xcrs[0].value); 3069 guest_xcrs->xcrs[i].value);
3048 break; 3070 break;
3049 } 3071 }
3050 if (r) 3072 if (r)
@@ -4775,8 +4797,8 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4775 4797
4776static void init_decode_cache(struct x86_emulate_ctxt *ctxt) 4798static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4777{ 4799{
4778 memset(&ctxt->twobyte, 0, 4800 memset(&ctxt->opcode_len, 0,
4779 (void *)&ctxt->_regs - (void *)&ctxt->twobyte); 4801 (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
4780 4802
4781 ctxt->fetch.start = 0; 4803 ctxt->fetch.start = 0;
4782 ctxt->fetch.end = 0; 4804 ctxt->fetch.end = 0;
@@ -5094,8 +5116,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5094 ctxt->have_exception = false; 5116 ctxt->have_exception = false;
5095 ctxt->perm_ok = false; 5117 ctxt->perm_ok = false;
5096 5118
5097 ctxt->only_vendor_specific_insn 5119 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
5098 = emulation_type & EMULTYPE_TRAP_UD;
5099 5120
5100 r = x86_decode_insn(ctxt, insn, insn_len); 5121 r = x86_decode_insn(ctxt, insn, insn_len);
5101 5122
@@ -5263,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
5263 5284
5264 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); 5285 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5265 5286
5266 raw_spin_lock(&kvm_lock); 5287 spin_lock(&kvm_lock);
5267 list_for_each_entry(kvm, &vm_list, vm_list) { 5288 list_for_each_entry(kvm, &vm_list, vm_list) {
5268 kvm_for_each_vcpu(i, vcpu, kvm) { 5289 kvm_for_each_vcpu(i, vcpu, kvm) {
5269 if (vcpu->cpu != freq->cpu) 5290 if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5294,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
5273 send_ipi = 1; 5294 send_ipi = 1;
5274 } 5295 }
5275 } 5296 }
5276 raw_spin_unlock(&kvm_lock); 5297 spin_unlock(&kvm_lock);
5277 5298
5278 if (freq->old < freq->new && send_ipi) { 5299 if (freq->old < freq->new && send_ipi) {
5279 /* 5300 /*
@@ -5426,12 +5447,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
5426 struct kvm_vcpu *vcpu; 5447 struct kvm_vcpu *vcpu;
5427 int i; 5448 int i;
5428 5449
5429 raw_spin_lock(&kvm_lock); 5450 spin_lock(&kvm_lock);
5430 list_for_each_entry(kvm, &vm_list, vm_list) 5451 list_for_each_entry(kvm, &vm_list, vm_list)
5431 kvm_for_each_vcpu(i, vcpu, kvm) 5452 kvm_for_each_vcpu(i, vcpu, kvm)
5432 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); 5453 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5433 atomic_set(&kvm_guest_has_master_clock, 0); 5454 atomic_set(&kvm_guest_has_master_clock, 0);
5434 raw_spin_unlock(&kvm_lock); 5455 spin_unlock(&kvm_lock);
5435} 5456}
5436 5457
5437static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); 5458static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5945,10 +5966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5945 5966
5946 vcpu->mode = IN_GUEST_MODE; 5967 vcpu->mode = IN_GUEST_MODE;
5947 5968
5969 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5970
5948 /* We should set ->mode before check ->requests, 5971 /* We should set ->mode before check ->requests,
5949 * see the comment in make_all_cpus_request. 5972 * see the comment in make_all_cpus_request.
5950 */ 5973 */
5951 smp_mb(); 5974 smp_mb__after_srcu_read_unlock();
5952 5975
5953 local_irq_disable(); 5976 local_irq_disable();
5954 5977
@@ -5958,12 +5981,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5958 smp_wmb(); 5981 smp_wmb();
5959 local_irq_enable(); 5982 local_irq_enable();
5960 preempt_enable(); 5983 preempt_enable();
5984 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5961 r = 1; 5985 r = 1;
5962 goto cancel_injection; 5986 goto cancel_injection;
5963 } 5987 }
5964 5988
5965 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5966
5967 if (req_immediate_exit) 5989 if (req_immediate_exit)
5968 smp_send_reschedule(vcpu->cpu); 5990 smp_send_reschedule(vcpu->cpu);
5969 5991
@@ -6688,7 +6710,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6688 if (r) 6710 if (r)
6689 return r; 6711 return r;
6690 kvm_vcpu_reset(vcpu); 6712 kvm_vcpu_reset(vcpu);
6691 r = kvm_mmu_setup(vcpu); 6713 kvm_mmu_setup(vcpu);
6692 vcpu_put(vcpu); 6714 vcpu_put(vcpu);
6693 6715
6694 return r; 6716 return r;
@@ -6940,6 +6962,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6940 6962
6941 vcpu->arch.ia32_tsc_adjust_msr = 0x0; 6963 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6942 vcpu->arch.pv_time_enabled = false; 6964 vcpu->arch.pv_time_enabled = false;
6965
6966 vcpu->arch.guest_supported_xcr0 = 0;
6967 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
6968
6943 kvm_async_pf_hash_reset(vcpu); 6969 kvm_async_pf_hash_reset(vcpu);
6944 kvm_pmu_init(vcpu); 6970 kvm_pmu_init(vcpu);
6945 6971
@@ -6981,6 +7007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6981 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 7007 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6982 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 7008 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
6983 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 7009 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
7010 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
6984 7011
6985 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 7012 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
6986 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 7013 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
@@ -7065,7 +7092,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
7065 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7092 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7066} 7093}
7067 7094
7068void kvm_arch_free_memslot(struct kvm_memory_slot *free, 7095void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
7069 struct kvm_memory_slot *dont) 7096 struct kvm_memory_slot *dont)
7070{ 7097{
7071 int i; 7098 int i;
@@ -7086,7 +7113,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
7086 } 7113 }
7087} 7114}
7088 7115
7089int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 7116int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7117 unsigned long npages)
7090{ 7118{
7091 int i; 7119 int i;
7092 7120
@@ -7283,7 +7311,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
7283 int r; 7311 int r;
7284 7312
7285 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || 7313 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
7286 is_error_page(work->page)) 7314 work->wakeup_all)
7287 return; 7315 return;
7288 7316
7289 r = kvm_mmu_reload(vcpu); 7317 r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7421,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7393 struct x86_exception fault; 7421 struct x86_exception fault;
7394 7422
7395 trace_kvm_async_pf_ready(work->arch.token, work->gva); 7423 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7396 if (is_error_page(work->page)) 7424 if (work->wakeup_all)
7397 work->arch.token = ~0; /* broadcast wakeup */ 7425 work->arch.token = ~0; /* broadcast wakeup */
7398 else 7426 else
7399 kvm_del_async_pf_gfn(vcpu, work->arch.gfn); 7427 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
@@ -7420,6 +7448,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7420 kvm_x86_ops->interrupt_allowed(vcpu); 7448 kvm_x86_ops->interrupt_allowed(vcpu);
7421} 7449}
7422 7450
7451void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
7452{
7453 atomic_inc(&kvm->arch.noncoherent_dma_count);
7454}
7455EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
7456
7457void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
7458{
7459 atomic_dec(&kvm->arch.noncoherent_dma_count);
7460}
7461EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
7462
7463bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
7464{
7465 return atomic_read(&kvm->arch.noncoherent_dma_count);
7466}
7467EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
7468
7423EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 7469EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7424EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 7470EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7425EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 7471EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e224f7a671b6..587fb9ede436 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
122 gva_t addr, void *val, unsigned int bytes, 122 gva_t addr, void *val, unsigned int bytes,
123 struct x86_exception *exception); 123 struct x86_exception *exception);
124 124
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
125extern u64 host_xcr0; 126extern u64 host_xcr0;
126 127
127extern struct static_key kvm_no_apic_vcpu; 128extern struct static_key kvm_no_apic_vcpu;