aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_emulate.h10
-rw-r--r--arch/x86/include/asm/kvm_host.h23
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h6
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/pvclock.c13
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c115
-rw-r--r--arch/x86/kvm/cpuid.h5
-rw-r--r--arch/x86/kvm/emulate.c130
-rw-r--r--arch/x86/kvm/mmu.c115
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c158
-rw-r--r--arch/x86/kvm/x86.c108
-rw-r--r--arch/x86/kvm/x86.h1
18 files changed, 489 insertions, 214 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 15f960c06ff7..24ec1216596e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -274,13 +274,17 @@ struct x86_emulate_ctxt {
274 274
275 bool guest_mode; /* guest running a nested guest */ 275 bool guest_mode; /* guest running a nested guest */
276 bool perm_ok; /* do not check permissions if true */ 276 bool perm_ok; /* do not check permissions if true */
277 bool only_vendor_specific_insn; 277 bool ud; /* inject an #UD if host doesn't support insn */
278 278
279 bool have_exception; 279 bool have_exception;
280 struct x86_exception exception; 280 struct x86_exception exception;
281 281
282 /* decode cache */ 282 /*
283 u8 twobyte; 283 * decode cache
284 */
285
286 /* current opcode length in bytes */
287 u8 opcode_len;
284 u8 b; 288 u8 b;
285 u8 intercept; 289 u8 intercept;
286 u8 lock_prefix; 290 u8 lock_prefix;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c76ff74a98f2..ae5d7830855c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
79#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 79#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
80#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 80#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
81 81
82static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
83{
84 /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
85 return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
86 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
87}
88
82#define SELECTOR_TI_MASK (1 << 2) 89#define SELECTOR_TI_MASK (1 << 2)
83#define SELECTOR_RPL_MASK 0x03 90#define SELECTOR_RPL_MASK 0x03
84 91
@@ -253,7 +260,6 @@ struct kvm_pio_request {
253 * mode. 260 * mode.
254 */ 261 */
255struct kvm_mmu { 262struct kvm_mmu {
256 void (*new_cr3)(struct kvm_vcpu *vcpu);
257 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); 263 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
258 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); 264 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
259 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); 265 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
@@ -261,7 +267,6 @@ struct kvm_mmu {
261 bool prefault); 267 bool prefault);
262 void (*inject_page_fault)(struct kvm_vcpu *vcpu, 268 void (*inject_page_fault)(struct kvm_vcpu *vcpu,
263 struct x86_exception *fault); 269 struct x86_exception *fault);
264 void (*free)(struct kvm_vcpu *vcpu);
265 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 270 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
266 struct x86_exception *exception); 271 struct x86_exception *exception);
267 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); 272 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch {
389 394
390 struct fpu guest_fpu; 395 struct fpu guest_fpu;
391 u64 xcr0; 396 u64 xcr0;
397 u64 guest_supported_xcr0;
398 u32 guest_xstate_size;
392 399
393 struct kvm_pio_request pio; 400 struct kvm_pio_request pio;
394 void *pio_data; 401 void *pio_data;
@@ -557,7 +564,9 @@ struct kvm_arch {
557 564
558 struct list_head assigned_dev_head; 565 struct list_head assigned_dev_head;
559 struct iommu_domain *iommu_domain; 566 struct iommu_domain *iommu_domain;
560 int iommu_flags; 567 bool iommu_noncoherent;
568#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
569 atomic_t noncoherent_dma_count;
561 struct kvm_pic *vpic; 570 struct kvm_pic *vpic;
562 struct kvm_ioapic *vioapic; 571 struct kvm_ioapic *vioapic;
563 struct kvm_pit *vpit; 572 struct kvm_pit *vpit;
@@ -780,11 +789,11 @@ void kvm_mmu_module_exit(void);
780 789
781void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 790void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
782int kvm_mmu_create(struct kvm_vcpu *vcpu); 791int kvm_mmu_create(struct kvm_vcpu *vcpu);
783int kvm_mmu_setup(struct kvm_vcpu *vcpu); 792void kvm_mmu_setup(struct kvm_vcpu *vcpu);
784void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 793void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
785 u64 dirty_mask, u64 nx_mask, u64 x_mask); 794 u64 dirty_mask, u64 nx_mask, u64 x_mask);
786 795
787int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 796void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
788void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 797void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
789void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 798void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
790 struct kvm_memory_slot *slot, 799 struct kvm_memory_slot *slot,
@@ -922,13 +931,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
922int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, 931int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
923 void *insn, int insn_len); 932 void *insn, int insn_len);
924void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); 933void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
934void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
925 935
926void kvm_enable_tdp(void); 936void kvm_enable_tdp(void);
927void kvm_disable_tdp(void); 937void kvm_disable_tdp(void);
928 938
929int complete_pio(struct kvm_vcpu *vcpu);
930bool kvm_check_iopl(struct kvm_vcpu *vcpu);
931
932static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 939static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
933{ 940{
934 return gpa; 941 return gpa;
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index be8269b00e2a..d6b078e9fa28 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -14,6 +14,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
14 struct timespec *ts); 14 struct timespec *ts);
15void pvclock_resume(void); 15void pvclock_resume(void);
16 16
17void pvclock_touch_watchdogs(void);
18
17/* 19/*
18 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 20 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
19 * yielding a 64-bit result. 21 * yielding a 64-bit result.
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5d9a3033b3d7..d3a87780c70b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
211 __u32 padding[3]; 211 __u32 padding[3];
212}; 212};
213 213
214#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 214#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
215#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 215#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
216#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 216#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
217 217
218/* for KVM_SET_CPUID2 */ 218/* for KVM_SET_CPUID2 */
219struct kvm_cpuid2 { 219struct kvm_cpuid2 {
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb0465090ae5..b93e09a0fa21 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -536,6 +536,7 @@
536 536
537/* MSR_IA32_VMX_MISC bits */ 537/* MSR_IA32_VMX_MISC bits */
538#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) 538#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
539#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
539/* AMD-V MSRs */ 540/* AMD-V MSRs */
540 541
541#define MSR_VM_CR 0xc0010114 542#define MSR_VM_CR 0xc0010114
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1570e0741344..e6041094ff26 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -139,6 +139,7 @@ bool kvm_check_and_clear_guest_paused(void)
139 src = &hv_clock[cpu].pvti; 139 src = &hv_clock[cpu].pvti;
140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
141 src->flags &= ~PVCLOCK_GUEST_STOPPED; 141 src->flags &= ~PVCLOCK_GUEST_STOPPED;
142 pvclock_touch_watchdogs();
142 ret = true; 143 ret = true;
143 } 144 }
144 145
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index a16bae3f83b3..2f355d229a58 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -43,6 +43,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
43 return pv_tsc_khz; 43 return pv_tsc_khz;
44} 44}
45 45
46void pvclock_touch_watchdogs(void)
47{
48 touch_softlockup_watchdog_sync();
49 clocksource_touch_watchdog();
50 rcu_cpu_stall_reset();
51 reset_hung_task_detector();
52}
53
46static atomic64_t last_value = ATOMIC64_INIT(0); 54static atomic64_t last_value = ATOMIC64_INIT(0);
47 55
48void pvclock_resume(void) 56void pvclock_resume(void)
@@ -74,6 +82,11 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
74 version = __pvclock_read_cycles(src, &ret, &flags); 82 version = __pvclock_read_cycles(src, &ret, &flags);
75 } while ((src->version & 1) || version != src->version); 83 } while ((src->version & 1) || version != src->version);
76 84
85 if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
86 src->flags &= ~PVCLOCK_GUEST_STOPPED;
87 pvclock_touch_watchdogs();
88 }
89
77 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 90 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
78 (flags & PVCLOCK_TSC_STABLE_BIT)) 91 (flags & PVCLOCK_TSC_STABLE_BIT))
79 return ret; 92 return ret;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..b89c5db2b832 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -38,6 +38,7 @@ config KVM
38 select PERF_EVENTS 38 select PERF_EVENTS
39 select HAVE_KVM_MSI 39 select HAVE_KVM_MSI
40 select HAVE_KVM_CPU_RELAX_INTERCEPT 40 select HAVE_KVM_CPU_RELAX_INTERCEPT
41 select KVM_VFIO
41 ---help--- 42 ---help---
42 Support hosting fully virtualized guest machines using hardware 43 Support hosting fully virtualized guest machines using hardware
43 virtualization extensions. You will need a fairly recent 44 virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index bf4fb04d0112..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm
9 9
10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ 10kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ 11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
12 $(KVM)/eventfd.o $(KVM)/irqchip.o 12 $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o 13kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o
14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 14kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
15 15
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b110fe6c03d4..c6976257eff5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,6 +23,26 @@
23#include "mmu.h" 23#include "mmu.h"
24#include "trace.h" 24#include "trace.h"
25 25
26static u32 xstate_required_size(u64 xstate_bv)
27{
28 int feature_bit = 0;
29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
30
31 xstate_bv &= ~XSTATE_FPSSE;
32 while (xstate_bv) {
33 if (xstate_bv & 0x1) {
34 u32 eax, ebx, ecx, edx;
35 cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
36 ret = max(ret, eax + ebx);
37 }
38
39 xstate_bv >>= 1;
40 feature_bit++;
41 }
42
43 return ret;
44}
45
26void kvm_update_cpuid(struct kvm_vcpu *vcpu) 46void kvm_update_cpuid(struct kvm_vcpu *vcpu)
27{ 47{
28 struct kvm_cpuid_entry2 *best; 48 struct kvm_cpuid_entry2 *best;
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
46 apic->lapic_timer.timer_mode_mask = 1 << 17; 66 apic->lapic_timer.timer_mode_mask = 1 << 17;
47 } 67 }
48 68
69 best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
70 if (!best) {
71 vcpu->arch.guest_supported_xcr0 = 0;
72 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
73 } else {
74 vcpu->arch.guest_supported_xcr0 =
75 (best->eax | ((u64)best->edx << 32)) &
76 host_xcr0 & KVM_SUPPORTED_XCR0;
77 vcpu->arch.guest_xstate_size =
78 xstate_required_size(vcpu->arch.guest_supported_xcr0);
79 }
80
49 kvm_pmu_cpuid_update(vcpu); 81 kvm_pmu_cpuid_update(vcpu);
50} 82}
51 83
@@ -182,13 +214,35 @@ static bool supported_xcr0_bit(unsigned bit)
182{ 214{
183 u64 mask = ((u64)1 << bit); 215 u64 mask = ((u64)1 << bit);
184 216
185 return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; 217 return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
186} 218}
187 219
188#define F(x) bit(X86_FEATURE_##x) 220#define F(x) bit(X86_FEATURE_##x)
189 221
190static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 222static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
191 u32 index, int *nent, int maxnent) 223 u32 func, u32 index, int *nent, int maxnent)
224{
225 switch (func) {
226 case 0:
227 entry->eax = 1; /* only one leaf currently */
228 ++*nent;
229 break;
230 case 1:
231 entry->ecx = F(MOVBE);
232 ++*nent;
233 break;
234 default:
235 break;
236 }
237
238 entry->function = func;
239 entry->index = index;
240
241 return 0;
242}
243
244static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
245 u32 index, int *nent, int maxnent)
192{ 246{
193 int r; 247 int r;
194 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 248 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -383,6 +437,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
383 case 0xd: { 437 case 0xd: {
384 int idx, i; 438 int idx, i;
385 439
440 entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
441 entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
386 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 442 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
387 for (idx = 1, i = 1; idx < 64; ++idx) { 443 for (idx = 1, i = 1; idx < 64; ++idx) {
388 if (*nent >= maxnent) 444 if (*nent >= maxnent)
@@ -481,6 +537,15 @@ out:
481 return r; 537 return r;
482} 538}
483 539
540static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
541 u32 idx, int *nent, int maxnent, unsigned int type)
542{
543 if (type == KVM_GET_EMULATED_CPUID)
544 return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
545
546 return __do_cpuid_ent(entry, func, idx, nent, maxnent);
547}
548
484#undef F 549#undef F
485 550
486struct kvm_cpuid_param { 551struct kvm_cpuid_param {
@@ -495,8 +560,36 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
495 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; 560 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
496} 561}
497 562
498int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 563static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
499 struct kvm_cpuid_entry2 __user *entries) 564 __u32 num_entries, unsigned int ioctl_type)
565{
566 int i;
567 __u32 pad[3];
568
569 if (ioctl_type != KVM_GET_EMULATED_CPUID)
570 return false;
571
572 /*
573 * We want to make sure that ->padding is being passed clean from
574 * userspace in case we want to use it for something in the future.
575 *
576 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
577 * have to give ourselves satisfied only with the emulated side. /me
578 * sheds a tear.
579 */
580 for (i = 0; i < num_entries; i++) {
581 if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
582 return true;
583
584 if (pad[0] || pad[1] || pad[2])
585 return true;
586 }
587 return false;
588}
589
590int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
591 struct kvm_cpuid_entry2 __user *entries,
592 unsigned int type)
500{ 593{
501 struct kvm_cpuid_entry2 *cpuid_entries; 594 struct kvm_cpuid_entry2 *cpuid_entries;
502 int limit, nent = 0, r = -E2BIG, i; 595 int limit, nent = 0, r = -E2BIG, i;
@@ -513,8 +606,12 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
513 goto out; 606 goto out;
514 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 607 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
515 cpuid->nent = KVM_MAX_CPUID_ENTRIES; 608 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
609
610 if (sanity_check_entries(entries, cpuid->nent, type))
611 return -EINVAL;
612
516 r = -ENOMEM; 613 r = -ENOMEM;
517 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 614 cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
518 if (!cpuid_entries) 615 if (!cpuid_entries)
519 goto out; 616 goto out;
520 617
@@ -526,7 +623,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
526 continue; 623 continue;
527 624
528 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, 625 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
529 &nent, cpuid->nent); 626 &nent, cpuid->nent, type);
530 627
531 if (r) 628 if (r)
532 goto out_free; 629 goto out_free;
@@ -537,7 +634,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
537 limit = cpuid_entries[nent - 1].eax; 634 limit = cpuid_entries[nent - 1].eax;
538 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) 635 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
539 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, 636 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
540 &nent, cpuid->nent); 637 &nent, cpuid->nent, type);
541 638
542 if (r) 639 if (r)
543 goto out_free; 640 goto out_free;
@@ -661,6 +758,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
661 *edx = best->edx; 758 *edx = best->edx;
662 } else 759 } else
663 *eax = *ebx = *ecx = *edx = 0; 760 *eax = *ebx = *ecx = *edx = 0;
761 trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
664} 762}
665EXPORT_SYMBOL_GPL(kvm_cpuid); 763EXPORT_SYMBOL_GPL(kvm_cpuid);
666 764
@@ -676,6 +774,5 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
676 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); 774 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
677 kvm_register_write(vcpu, VCPU_REGS_RDX, edx); 775 kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
678 kvm_x86_ops->skip_emulated_instruction(vcpu); 776 kvm_x86_ops->skip_emulated_instruction(vcpu);
679 trace_kvm_cpuid(function, eax, ebx, ecx, edx);
680} 777}
681EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 778EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b7fd07984888..f1e4895174b2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,9 @@
6void kvm_update_cpuid(struct kvm_vcpu *vcpu); 6void kvm_update_cpuid(struct kvm_vcpu *vcpu);
7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 7struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
8 u32 function, u32 index); 8 u32 function, u32 index);
9int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 9int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
10 struct kvm_cpuid_entry2 __user *entries); 10 struct kvm_cpuid_entry2 __user *entries,
11 unsigned int type);
11int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 12int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
12 struct kvm_cpuid *cpuid, 13 struct kvm_cpuid *cpuid,
13 struct kvm_cpuid_entry __user *entries); 14 struct kvm_cpuid_entry __user *entries);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ddc3f3d2afdb..07ffca0a89e9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -130,7 +130,7 @@
130#define Mov (1<<20) 130#define Mov (1<<20)
131/* Misc flags */ 131/* Misc flags */
132#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ 132#define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
133#define VendorSpecific (1<<22) /* Vendor specific instruction */ 133#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
134#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ 134#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
135#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ 135#define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
136#define Undefined (1<<25) /* No Such Instruction */ 136#define Undefined (1<<25) /* No Such Instruction */
@@ -785,9 +785,10 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH. 785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
786 */ 786 */
787static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, 787static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
788 int highbyte_regs) 788 int byteop)
789{ 789{
790 void *p; 790 void *p;
791 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
791 792
792 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) 793 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
793 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; 794 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
@@ -1024,7 +1025,6 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1024 struct operand *op) 1025 struct operand *op)
1025{ 1026{
1026 unsigned reg = ctxt->modrm_reg; 1027 unsigned reg = ctxt->modrm_reg;
1027 int highbyte_regs = ctxt->rex_prefix == 0;
1028 1028
1029 if (!(ctxt->d & ModRM)) 1029 if (!(ctxt->d & ModRM))
1030 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); 1030 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
@@ -1045,13 +1045,9 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1045 } 1045 }
1046 1046
1047 op->type = OP_REG; 1047 op->type = OP_REG;
1048 if (ctxt->d & ByteOp) { 1048 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1049 op->addr.reg = decode_register(ctxt, reg, highbyte_regs); 1049 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1050 op->bytes = 1; 1050
1051 } else {
1052 op->addr.reg = decode_register(ctxt, reg, 0);
1053 op->bytes = ctxt->op_bytes;
1054 }
1055 fetch_register_operand(op); 1051 fetch_register_operand(op);
1056 op->orig_val = op->val; 1052 op->orig_val = op->val;
1057} 1053}
@@ -1082,12 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1082 ctxt->modrm_seg = VCPU_SREG_DS; 1078 ctxt->modrm_seg = VCPU_SREG_DS;
1083 1079
1084 if (ctxt->modrm_mod == 3) { 1080 if (ctxt->modrm_mod == 3) {
1085 int highbyte_regs = ctxt->rex_prefix == 0;
1086
1087 op->type = OP_REG; 1081 op->type = OP_REG;
1088 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1082 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1089 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1083 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1090 highbyte_regs && (ctxt->d & ByteOp)); 1084 ctxt->d & ByteOp);
1091 if (ctxt->d & Sse) { 1085 if (ctxt->d & Sse) {
1092 op->type = OP_XMM; 1086 op->type = OP_XMM;
1093 op->bytes = 16; 1087 op->bytes = 16;
@@ -2961,6 +2955,46 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
2961 return X86EMUL_CONTINUE; 2955 return X86EMUL_CONTINUE;
2962} 2956}
2963 2957
2958#define FFL(x) bit(X86_FEATURE_##x)
2959
2960static int em_movbe(struct x86_emulate_ctxt *ctxt)
2961{
2962 u32 ebx, ecx, edx, eax = 1;
2963 u16 tmp;
2964
2965 /*
2966 * Check MOVBE is set in the guest-visible CPUID leaf.
2967 */
2968 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2969 if (!(ecx & FFL(MOVBE)))
2970 return emulate_ud(ctxt);
2971
2972 switch (ctxt->op_bytes) {
2973 case 2:
2974 /*
2975 * From MOVBE definition: "...When the operand size is 16 bits,
2976 * the upper word of the destination register remains unchanged
2977 * ..."
2978 *
2979 * Both casting ->valptr and ->val to u16 breaks strict aliasing
2980 * rules so we have to do the operation almost per hand.
2981 */
2982 tmp = (u16)ctxt->src.val;
2983 ctxt->dst.val &= ~0xffffUL;
2984 ctxt->dst.val |= (unsigned long)swab16(tmp);
2985 break;
2986 case 4:
2987 ctxt->dst.val = swab32((u32)ctxt->src.val);
2988 break;
2989 case 8:
2990 ctxt->dst.val = swab64(ctxt->src.val);
2991 break;
2992 default:
2993 return X86EMUL_PROPAGATE_FAULT;
2994 }
2995 return X86EMUL_CONTINUE;
2996}
2997
2964static int em_cr_write(struct x86_emulate_ctxt *ctxt) 2998static int em_cr_write(struct x86_emulate_ctxt *ctxt)
2965{ 2999{
2966 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) 3000 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
@@ -3256,6 +3290,18 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3256 return X86EMUL_CONTINUE; 3290 return X86EMUL_CONTINUE;
3257} 3291}
3258 3292
3293static int em_sahf(struct x86_emulate_ctxt *ctxt)
3294{
3295 u32 flags;
3296
3297 flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
3298 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3299
3300 ctxt->eflags &= ~0xffUL;
3301 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3302 return X86EMUL_CONTINUE;
3303}
3304
3259static int em_lahf(struct x86_emulate_ctxt *ctxt) 3305static int em_lahf(struct x86_emulate_ctxt *ctxt)
3260{ 3306{
3261 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; 3307 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
@@ -3502,7 +3548,7 @@ static const struct opcode group7_rm1[] = {
3502 3548
3503static const struct opcode group7_rm3[] = { 3549static const struct opcode group7_rm3[] = {
3504 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), 3550 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
3505 II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), 3551 II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall),
3506 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), 3552 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
3507 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), 3553 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
3508 DIP(SrcNone | Prot | Priv, stgi, check_svme), 3554 DIP(SrcNone | Prot | Priv, stgi, check_svme),
@@ -3587,7 +3633,7 @@ static const struct group_dual group7 = { {
3587 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), 3633 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3588 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3634 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3589}, { 3635}, {
3590 I(SrcNone | Priv | VendorSpecific, em_vmcall), 3636 I(SrcNone | Priv | EmulateOnUD, em_vmcall),
3591 EXT(0, group7_rm1), 3637 EXT(0, group7_rm1),
3592 N, EXT(0, group7_rm3), 3638 N, EXT(0, group7_rm3),
3593 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, 3639 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3750,7 +3796,8 @@ static const struct opcode opcode_table[256] = {
3750 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), 3796 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
3751 I(SrcImmFAddr | No64, em_call_far), N, 3797 I(SrcImmFAddr | No64, em_call_far), N,
3752 II(ImplicitOps | Stack, em_pushf, pushf), 3798 II(ImplicitOps | Stack, em_pushf, pushf),
3753 II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), 3799 II(ImplicitOps | Stack, em_popf, popf),
3800 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
3754 /* 0xA0 - 0xA7 */ 3801 /* 0xA0 - 0xA7 */
3755 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3802 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3756 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 3803 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
@@ -3810,7 +3857,7 @@ static const struct opcode opcode_table[256] = {
3810static const struct opcode twobyte_table[256] = { 3857static const struct opcode twobyte_table[256] = {
3811 /* 0x00 - 0x0F */ 3858 /* 0x00 - 0x0F */
3812 G(0, group6), GD(0, &group7), N, N, 3859 G(0, group6), GD(0, &group7), N, N,
3813 N, I(ImplicitOps | VendorSpecific, em_syscall), 3860 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3814 II(ImplicitOps | Priv, em_clts, clts), N, 3861 II(ImplicitOps | Priv, em_clts, clts), N,
3815 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 3862 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3816 N, D(ImplicitOps | ModRM), N, N, 3863 N, D(ImplicitOps | ModRM), N, N,
@@ -3830,8 +3877,8 @@ static const struct opcode twobyte_table[256] = {
3830 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), 3877 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
3831 II(ImplicitOps | Priv, em_rdmsr, rdmsr), 3878 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
3832 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), 3879 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
3833 I(ImplicitOps | VendorSpecific, em_sysenter), 3880 I(ImplicitOps | EmulateOnUD, em_sysenter),
3834 I(ImplicitOps | Priv | VendorSpecific, em_sysexit), 3881 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
3835 N, N, 3882 N, N,
3836 N, N, N, N, N, N, N, N, 3883 N, N, N, N, N, N, N, N,
3837 /* 0x40 - 0x4F */ 3884 /* 0x40 - 0x4F */
@@ -3892,6 +3939,30 @@ static const struct opcode twobyte_table[256] = {
3892 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 3939 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
3893}; 3940};
3894 3941
3942static const struct gprefix three_byte_0f_38_f0 = {
3943 I(DstReg | SrcMem | Mov, em_movbe), N, N, N
3944};
3945
3946static const struct gprefix three_byte_0f_38_f1 = {
3947 I(DstMem | SrcReg | Mov, em_movbe), N, N, N
3948};
3949
3950/*
3951 * Insns below are selected by the prefix which indexed by the third opcode
3952 * byte.
3953 */
3954static const struct opcode opcode_map_0f_38[256] = {
3955 /* 0x00 - 0x7f */
3956 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
3957 /* 0x80 - 0xef */
3958 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
3959 /* 0xf0 - 0xf1 */
3960 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
3961 GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
3962 /* 0xf2 - 0xff */
3963 N, N, X4(N), X8(N)
3964};
3965
3895#undef D 3966#undef D
3896#undef N 3967#undef N
3897#undef G 3968#undef G
@@ -4040,7 +4111,8 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4040 case OpMem8: 4111 case OpMem8:
4041 ctxt->memop.bytes = 1; 4112 ctxt->memop.bytes = 1;
4042 if (ctxt->memop.type == OP_REG) { 4113 if (ctxt->memop.type == OP_REG) {
4043 ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); 4114 ctxt->memop.addr.reg = decode_register(ctxt,
4115 ctxt->modrm_rm, true);
4044 fetch_register_operand(&ctxt->memop); 4116 fetch_register_operand(&ctxt->memop);
4045 } 4117 }
4046 goto mem_common; 4118 goto mem_common;
@@ -4126,6 +4198,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4126 ctxt->_eip = ctxt->eip; 4198 ctxt->_eip = ctxt->eip;
4127 ctxt->fetch.start = ctxt->_eip; 4199 ctxt->fetch.start = ctxt->_eip;
4128 ctxt->fetch.end = ctxt->fetch.start + insn_len; 4200 ctxt->fetch.end = ctxt->fetch.start + insn_len;
4201 ctxt->opcode_len = 1;
4129 if (insn_len > 0) 4202 if (insn_len > 0)
4130 memcpy(ctxt->fetch.data, insn, insn_len); 4203 memcpy(ctxt->fetch.data, insn, insn_len);
4131 4204
@@ -4208,9 +4281,16 @@ done_prefixes:
4208 opcode = opcode_table[ctxt->b]; 4281 opcode = opcode_table[ctxt->b];
4209 /* Two-byte opcode? */ 4282 /* Two-byte opcode? */
4210 if (ctxt->b == 0x0f) { 4283 if (ctxt->b == 0x0f) {
4211 ctxt->twobyte = 1; 4284 ctxt->opcode_len = 2;
4212 ctxt->b = insn_fetch(u8, ctxt); 4285 ctxt->b = insn_fetch(u8, ctxt);
4213 opcode = twobyte_table[ctxt->b]; 4286 opcode = twobyte_table[ctxt->b];
4287
4288 /* 0F_38 opcode map */
4289 if (ctxt->b == 0x38) {
4290 ctxt->opcode_len = 3;
4291 ctxt->b = insn_fetch(u8, ctxt);
4292 opcode = opcode_map_0f_38[ctxt->b];
4293 }
4214 } 4294 }
4215 ctxt->d = opcode.flags; 4295 ctxt->d = opcode.flags;
4216 4296
@@ -4267,7 +4347,7 @@ done_prefixes:
4267 if (ctxt->d == 0 || (ctxt->d & NotImpl)) 4347 if (ctxt->d == 0 || (ctxt->d & NotImpl))
4268 return EMULATION_FAILED; 4348 return EMULATION_FAILED;
4269 4349
4270 if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) 4350 if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
4271 return EMULATION_FAILED; 4351 return EMULATION_FAILED;
4272 4352
4273 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) 4353 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
@@ -4540,8 +4620,10 @@ special_insn:
4540 goto writeback; 4620 goto writeback;
4541 } 4621 }
4542 4622
4543 if (ctxt->twobyte) 4623 if (ctxt->opcode_len == 2)
4544 goto twobyte_insn; 4624 goto twobyte_insn;
4625 else if (ctxt->opcode_len == 3)
4626 goto threebyte_insn;
4545 4627
4546 switch (ctxt->b) { 4628 switch (ctxt->b) {
4547 case 0x63: /* movsxd */ 4629 case 0x63: /* movsxd */
@@ -4726,6 +4808,8 @@ twobyte_insn:
4726 goto cannot_emulate; 4808 goto cannot_emulate;
4727 } 4809 }
4728 4810
4811threebyte_insn:
4812
4729 if (rc != X86EMUL_CONTINUE) 4813 if (rc != X86EMUL_CONTINUE)
4730 goto done; 4814 goto done;
4731 4815
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dce0df8150df..40772ef0f2b1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2570 kvm_release_pfn_clean(pfn); 2570 kvm_release_pfn_clean(pfn);
2571} 2571}
2572 2572
2573static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
2574{
2575 mmu_free_roots(vcpu);
2576}
2577
2578static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, 2573static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
2579 bool no_dirty_log) 2574 bool no_dirty_log)
2580{ 2575{
@@ -3424,18 +3419,11 @@ out_unlock:
3424 return 0; 3419 return 0;
3425} 3420}
3426 3421
3427static void nonpaging_free(struct kvm_vcpu *vcpu) 3422static void nonpaging_init_context(struct kvm_vcpu *vcpu,
3428{ 3423 struct kvm_mmu *context)
3429 mmu_free_roots(vcpu);
3430}
3431
3432static int nonpaging_init_context(struct kvm_vcpu *vcpu,
3433 struct kvm_mmu *context)
3434{ 3424{
3435 context->new_cr3 = nonpaging_new_cr3;
3436 context->page_fault = nonpaging_page_fault; 3425 context->page_fault = nonpaging_page_fault;
3437 context->gva_to_gpa = nonpaging_gva_to_gpa; 3426 context->gva_to_gpa = nonpaging_gva_to_gpa;
3438 context->free = nonpaging_free;
3439 context->sync_page = nonpaging_sync_page; 3427 context->sync_page = nonpaging_sync_page;
3440 context->invlpg = nonpaging_invlpg; 3428 context->invlpg = nonpaging_invlpg;
3441 context->update_pte = nonpaging_update_pte; 3429 context->update_pte = nonpaging_update_pte;
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
3444 context->root_hpa = INVALID_PAGE; 3432 context->root_hpa = INVALID_PAGE;
3445 context->direct_map = true; 3433 context->direct_map = true;
3446 context->nx = false; 3434 context->nx = false;
3447 return 0;
3448} 3435}
3449 3436
3450void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 3437void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
3454} 3441}
3455EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); 3442EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
3456 3443
3457static void paging_new_cr3(struct kvm_vcpu *vcpu) 3444void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
3458{ 3445{
3459 pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
3460 mmu_free_roots(vcpu); 3446 mmu_free_roots(vcpu);
3461} 3447}
3462 3448
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
3471 vcpu->arch.mmu.inject_page_fault(vcpu, fault); 3457 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
3472} 3458}
3473 3459
3474static void paging_free(struct kvm_vcpu *vcpu)
3475{
3476 nonpaging_free(vcpu);
3477}
3478
3479static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, 3460static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
3480 unsigned access, int *nr_present) 3461 unsigned access, int *nr_present)
3481{ 3462{
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
3665 mmu->last_pte_bitmap = map; 3646 mmu->last_pte_bitmap = map;
3666} 3647}
3667 3648
3668static int paging64_init_context_common(struct kvm_vcpu *vcpu, 3649static void paging64_init_context_common(struct kvm_vcpu *vcpu,
3669 struct kvm_mmu *context, 3650 struct kvm_mmu *context,
3670 int level) 3651 int level)
3671{ 3652{
3672 context->nx = is_nx(vcpu); 3653 context->nx = is_nx(vcpu);
3673 context->root_level = level; 3654 context->root_level = level;
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
3677 update_last_pte_bitmap(vcpu, context); 3658 update_last_pte_bitmap(vcpu, context);
3678 3659
3679 ASSERT(is_pae(vcpu)); 3660 ASSERT(is_pae(vcpu));
3680 context->new_cr3 = paging_new_cr3;
3681 context->page_fault = paging64_page_fault; 3661 context->page_fault = paging64_page_fault;
3682 context->gva_to_gpa = paging64_gva_to_gpa; 3662 context->gva_to_gpa = paging64_gva_to_gpa;
3683 context->sync_page = paging64_sync_page; 3663 context->sync_page = paging64_sync_page;
3684 context->invlpg = paging64_invlpg; 3664 context->invlpg = paging64_invlpg;
3685 context->update_pte = paging64_update_pte; 3665 context->update_pte = paging64_update_pte;
3686 context->free = paging_free;
3687 context->shadow_root_level = level; 3666 context->shadow_root_level = level;
3688 context->root_hpa = INVALID_PAGE; 3667 context->root_hpa = INVALID_PAGE;
3689 context->direct_map = false; 3668 context->direct_map = false;
3690 return 0;
3691} 3669}
3692 3670
3693static int paging64_init_context(struct kvm_vcpu *vcpu, 3671static void paging64_init_context(struct kvm_vcpu *vcpu,
3694 struct kvm_mmu *context) 3672 struct kvm_mmu *context)
3695{ 3673{
3696 return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); 3674 paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
3697} 3675}
3698 3676
3699static int paging32_init_context(struct kvm_vcpu *vcpu, 3677static void paging32_init_context(struct kvm_vcpu *vcpu,
3700 struct kvm_mmu *context) 3678 struct kvm_mmu *context)
3701{ 3679{
3702 context->nx = false; 3680 context->nx = false;
3703 context->root_level = PT32_ROOT_LEVEL; 3681 context->root_level = PT32_ROOT_LEVEL;
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
3706 update_permission_bitmask(vcpu, context, false); 3684 update_permission_bitmask(vcpu, context, false);
3707 update_last_pte_bitmap(vcpu, context); 3685 update_last_pte_bitmap(vcpu, context);
3708 3686
3709 context->new_cr3 = paging_new_cr3;
3710 context->page_fault = paging32_page_fault; 3687 context->page_fault = paging32_page_fault;
3711 context->gva_to_gpa = paging32_gva_to_gpa; 3688 context->gva_to_gpa = paging32_gva_to_gpa;
3712 context->free = paging_free;
3713 context->sync_page = paging32_sync_page; 3689 context->sync_page = paging32_sync_page;
3714 context->invlpg = paging32_invlpg; 3690 context->invlpg = paging32_invlpg;
3715 context->update_pte = paging32_update_pte; 3691 context->update_pte = paging32_update_pte;
3716 context->shadow_root_level = PT32E_ROOT_LEVEL; 3692 context->shadow_root_level = PT32E_ROOT_LEVEL;
3717 context->root_hpa = INVALID_PAGE; 3693 context->root_hpa = INVALID_PAGE;
3718 context->direct_map = false; 3694 context->direct_map = false;
3719 return 0;
3720} 3695}
3721 3696
3722static int paging32E_init_context(struct kvm_vcpu *vcpu, 3697static void paging32E_init_context(struct kvm_vcpu *vcpu,
3723 struct kvm_mmu *context) 3698 struct kvm_mmu *context)
3724{ 3699{
3725 return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); 3700 paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
3726} 3701}
3727 3702
3728static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) 3703static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3729{ 3704{
3730 struct kvm_mmu *context = vcpu->arch.walk_mmu; 3705 struct kvm_mmu *context = vcpu->arch.walk_mmu;
3731 3706
3732 context->base_role.word = 0; 3707 context->base_role.word = 0;
3733 context->new_cr3 = nonpaging_new_cr3;
3734 context->page_fault = tdp_page_fault; 3708 context->page_fault = tdp_page_fault;
3735 context->free = nonpaging_free;
3736 context->sync_page = nonpaging_sync_page; 3709 context->sync_page = nonpaging_sync_page;
3737 context->invlpg = nonpaging_invlpg; 3710 context->invlpg = nonpaging_invlpg;
3738 context->update_pte = nonpaging_update_pte; 3711 context->update_pte = nonpaging_update_pte;
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
3767 3740
3768 update_permission_bitmask(vcpu, context, false); 3741 update_permission_bitmask(vcpu, context, false);
3769 update_last_pte_bitmap(vcpu, context); 3742 update_last_pte_bitmap(vcpu, context);
3770
3771 return 0;
3772} 3743}
3773 3744
3774int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3745void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3775{ 3746{
3776 int r;
3777 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3747 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3778 ASSERT(vcpu); 3748 ASSERT(vcpu);
3779 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3749 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
3780 3750
3781 if (!is_paging(vcpu)) 3751 if (!is_paging(vcpu))
3782 r = nonpaging_init_context(vcpu, context); 3752 nonpaging_init_context(vcpu, context);
3783 else if (is_long_mode(vcpu)) 3753 else if (is_long_mode(vcpu))
3784 r = paging64_init_context(vcpu, context); 3754 paging64_init_context(vcpu, context);
3785 else if (is_pae(vcpu)) 3755 else if (is_pae(vcpu))
3786 r = paging32E_init_context(vcpu, context); 3756 paging32E_init_context(vcpu, context);
3787 else 3757 else
3788 r = paging32_init_context(vcpu, context); 3758 paging32_init_context(vcpu, context);
3789 3759
3790 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); 3760 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
3791 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3761 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
3792 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3762 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
3793 vcpu->arch.mmu.base_role.smep_andnot_wp 3763 vcpu->arch.mmu.base_role.smep_andnot_wp
3794 = smep && !is_write_protection(vcpu); 3764 = smep && !is_write_protection(vcpu);
3795
3796 return r;
3797} 3765}
3798EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3766EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
3799 3767
3800int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 3768void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
3801 bool execonly) 3769 bool execonly)
3802{ 3770{
3803 ASSERT(vcpu); 3771 ASSERT(vcpu);
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
3806 context->shadow_root_level = kvm_x86_ops->get_tdp_level(); 3774 context->shadow_root_level = kvm_x86_ops->get_tdp_level();
3807 3775
3808 context->nx = true; 3776 context->nx = true;
3809 context->new_cr3 = paging_new_cr3;
3810 context->page_fault = ept_page_fault; 3777 context->page_fault = ept_page_fault;
3811 context->gva_to_gpa = ept_gva_to_gpa; 3778 context->gva_to_gpa = ept_gva_to_gpa;
3812 context->sync_page = ept_sync_page; 3779 context->sync_page = ept_sync_page;
3813 context->invlpg = ept_invlpg; 3780 context->invlpg = ept_invlpg;
3814 context->update_pte = ept_update_pte; 3781 context->update_pte = ept_update_pte;
3815 context->free = paging_free;
3816 context->root_level = context->shadow_root_level; 3782 context->root_level = context->shadow_root_level;
3817 context->root_hpa = INVALID_PAGE; 3783 context->root_hpa = INVALID_PAGE;
3818 context->direct_map = false; 3784 context->direct_map = false;
3819 3785
3820 update_permission_bitmask(vcpu, context, true); 3786 update_permission_bitmask(vcpu, context, true);
3821 reset_rsvds_bits_mask_ept(vcpu, context, execonly); 3787 reset_rsvds_bits_mask_ept(vcpu, context, execonly);
3822
3823 return 0;
3824} 3788}
3825EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); 3789EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
3826 3790
3827static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 3791static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
3828{ 3792{
3829 int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); 3793 kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
3830
3831 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; 3794 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
3832 vcpu->arch.walk_mmu->get_cr3 = get_cr3; 3795 vcpu->arch.walk_mmu->get_cr3 = get_cr3;
3833 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; 3796 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
3834 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 3797 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
3835
3836 return r;
3837} 3798}
3838 3799
3839static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 3800static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3840{ 3801{
3841 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; 3802 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
3842 3803
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
3873 3834
3874 update_permission_bitmask(vcpu, g_context, false); 3835 update_permission_bitmask(vcpu, g_context, false);
3875 update_last_pte_bitmap(vcpu, g_context); 3836 update_last_pte_bitmap(vcpu, g_context);
3876
3877 return 0;
3878} 3837}
3879 3838
3880static int init_kvm_mmu(struct kvm_vcpu *vcpu) 3839static void init_kvm_mmu(struct kvm_vcpu *vcpu)
3881{ 3840{
3882 if (mmu_is_nested(vcpu)) 3841 if (mmu_is_nested(vcpu))
3883 return init_kvm_nested_mmu(vcpu); 3842 return init_kvm_nested_mmu(vcpu);
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
3887 return init_kvm_softmmu(vcpu); 3846 return init_kvm_softmmu(vcpu);
3888} 3847}
3889 3848
3890static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 3849void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
3891{ 3850{
3892 ASSERT(vcpu); 3851 ASSERT(vcpu);
3893 if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
3894 /* mmu.free() should set root_hpa = INVALID_PAGE */
3895 vcpu->arch.mmu.free(vcpu);
3896}
3897 3852
3898int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 3853 kvm_mmu_unload(vcpu);
3899{ 3854 init_kvm_mmu(vcpu);
3900 destroy_kvm_mmu(vcpu);
3901 return init_kvm_mmu(vcpu);
3902} 3855}
3903EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); 3856EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
3904 3857
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
3923void kvm_mmu_unload(struct kvm_vcpu *vcpu) 3876void kvm_mmu_unload(struct kvm_vcpu *vcpu)
3924{ 3877{
3925 mmu_free_roots(vcpu); 3878 mmu_free_roots(vcpu);
3879 WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
3926} 3880}
3927EXPORT_SYMBOL_GPL(kvm_mmu_unload); 3881EXPORT_SYMBOL_GPL(kvm_mmu_unload);
3928 3882
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
4281 return alloc_mmu_pages(vcpu); 4235 return alloc_mmu_pages(vcpu);
4282} 4236}
4283 4237
4284int kvm_mmu_setup(struct kvm_vcpu *vcpu) 4238void kvm_mmu_setup(struct kvm_vcpu *vcpu)
4285{ 4239{
4286 ASSERT(vcpu); 4240 ASSERT(vcpu);
4287 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 4241 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
4288 4242
4289 return init_kvm_mmu(vcpu); 4243 init_kvm_mmu(vcpu);
4290} 4244}
4291 4245
4292void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 4246void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
4428 int nr_to_scan = sc->nr_to_scan; 4382 int nr_to_scan = sc->nr_to_scan;
4429 unsigned long freed = 0; 4383 unsigned long freed = 0;
4430 4384
4431 raw_spin_lock(&kvm_lock); 4385 spin_lock(&kvm_lock);
4432 4386
4433 list_for_each_entry(kvm, &vm_list, vm_list) { 4387 list_for_each_entry(kvm, &vm_list, vm_list) {
4434 int idx; 4388 int idx;
@@ -4478,9 +4432,8 @@ unlock:
4478 break; 4432 break;
4479 } 4433 }
4480 4434
4481 raw_spin_unlock(&kvm_lock); 4435 spin_unlock(&kvm_lock);
4482 return freed; 4436 return freed;
4483
4484} 4437}
4485 4438
4486static unsigned long 4439static unsigned long
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
4574{ 4527{
4575 ASSERT(vcpu); 4528 ASSERT(vcpu);
4576 4529
4577 destroy_kvm_mmu(vcpu); 4530 kvm_mmu_unload(vcpu);
4578 free_mmu_pages(vcpu); 4531 free_mmu_pages(vcpu);
4579 mmu_free_memory_caches(vcpu); 4532 mmu_free_memory_caches(vcpu);
4580} 4533}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 77e044a0f5f7..292615274358 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -70,8 +70,8 @@ enum {
70}; 70};
71 71
72int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 72int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
73int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 73void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
74int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 74void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
75 bool execonly); 75 bool execonly);
76 76
77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) 77static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c0bc80391e40..c7168a5cff1b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1959 nested_svm_vmexit(svm); 1959 nested_svm_vmexit(svm);
1960} 1960}
1961 1961
1962static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1962static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1963{ 1963{
1964 int r; 1964 kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1965
1966 r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1967 1965
1968 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 1966 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
1969 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; 1967 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1971 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 1969 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1972 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 1970 vcpu->arch.mmu.shadow_root_level = get_npt_level();
1973 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 1971 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
1974
1975 return r;
1976} 1972}
1977 1973
1978static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 1974static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2b2fce1b2009..b2fe1c252f35 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1498,7 +1498,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1498 break; 1498 break;
1499 1499
1500 if (i == NR_AUTOLOAD_MSRS) { 1500 if (i == NR_AUTOLOAD_MSRS) {
1501 printk_once(KERN_WARNING"Not enough mst switch entries. " 1501 printk_once(KERN_WARNING "Not enough msr switch entries. "
1502 "Can't add msr %x\n", msr); 1502 "Can't add msr %x\n", msr);
1503 return; 1503 return;
1504 } else if (i == m->nr) { 1504 } else if (i == m->nr) {
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
1898/* 1898/*
1899 * KVM wants to inject page-faults which it got to the guest. This function 1899 * KVM wants to inject page-faults which it got to the guest. This function
1900 * checks whether in a nested guest, we need to inject them to L1 or L2. 1900 * checks whether in a nested guest, we need to inject them to L1 or L2.
1901 * This function assumes it is called with the exit reason in vmcs02 being
1902 * a #PF exception (this is the only case in which KVM injects a #PF when L2
1903 * is running).
1904 */ 1901 */
1905static int nested_pf_handled(struct kvm_vcpu *vcpu) 1902static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906{ 1903{
1907 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 1904 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1908 1905
1909 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 1906 if (!(vmcs12->exception_bitmap & (1u << nr)))
1910 if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
1911 return 0; 1907 return 0;
1912 1908
1913 nested_vmx_vmexit(vcpu); 1909 nested_vmx_vmexit(vcpu);
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
1921 struct vcpu_vmx *vmx = to_vmx(vcpu); 1917 struct vcpu_vmx *vmx = to_vmx(vcpu);
1922 u32 intr_info = nr | INTR_INFO_VALID_MASK; 1918 u32 intr_info = nr | INTR_INFO_VALID_MASK;
1923 1919
1924 if (nr == PF_VECTOR && is_guest_mode(vcpu) && 1920 if (!reinject && is_guest_mode(vcpu) &&
1925 !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) 1921 nested_vmx_check_exception(vcpu, nr))
1926 return; 1922 return;
1927 1923
1928 if (has_error_code) { 1924 if (has_error_code) {
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2204#ifdef CONFIG_X86_64 2200#ifdef CONFIG_X86_64
2205 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2201 VM_EXIT_HOST_ADDR_SPACE_SIZE |
2206#endif 2202#endif
2207 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; 2203 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
2204 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2205 if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
2206 !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
2207 nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2208 nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2209 }
2208 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 2210 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2209 VM_EXIT_LOAD_IA32_EFER); 2211 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
2210 2212
2211 /* entry controls */ 2213 /* entry controls */
2212 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 2214 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2226,7 +2228,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2226 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); 2228 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
2227 nested_vmx_procbased_ctls_low = 0; 2229 nested_vmx_procbased_ctls_low = 0;
2228 nested_vmx_procbased_ctls_high &= 2230 nested_vmx_procbased_ctls_high &=
2229 CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2231 CPU_BASED_VIRTUAL_INTR_PENDING |
2232 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
2230 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | 2233 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
2231 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | 2234 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
2232 CPU_BASED_CR3_STORE_EXITING | 2235 CPU_BASED_CR3_STORE_EXITING |
@@ -2252,13 +2255,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2252 nested_vmx_secondary_ctls_low = 0; 2255 nested_vmx_secondary_ctls_low = 0;
2253 nested_vmx_secondary_ctls_high &= 2256 nested_vmx_secondary_ctls_high &=
2254 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2257 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2258 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2255 SECONDARY_EXEC_WBINVD_EXITING; 2259 SECONDARY_EXEC_WBINVD_EXITING;
2256 2260
2257 if (enable_ept) { 2261 if (enable_ept) {
2258 /* nested EPT: emulate EPT also to L1 */ 2262 /* nested EPT: emulate EPT also to L1 */
2259 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2263 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
2260 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2264 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
2261 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; 2265 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
2266 VMX_EPT_INVEPT_BIT;
2262 nested_vmx_ept_caps &= vmx_capability.ept; 2267 nested_vmx_ept_caps &= vmx_capability.ept;
2263 /* 2268 /*
2264 * Since invept is completely emulated we support both global 2269 * Since invept is completely emulated we support both global
@@ -3380,8 +3385,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
3380 if (enable_ept) { 3385 if (enable_ept) {
3381 eptp = construct_eptp(cr3); 3386 eptp = construct_eptp(cr3);
3382 vmcs_write64(EPT_POINTER, eptp); 3387 vmcs_write64(EPT_POINTER, eptp);
3383 guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : 3388 if (is_paging(vcpu) || is_guest_mode(vcpu))
3384 vcpu->kvm->arch.ept_identity_map_addr; 3389 guest_cr3 = kvm_read_cr3(vcpu);
3390 else
3391 guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
3385 ept_load_pdptrs(vcpu); 3392 ept_load_pdptrs(vcpu);
3386 } 3393 }
3387 3394
@@ -4879,6 +4886,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4879 hypercall[2] = 0xc1; 4886 hypercall[2] = 0xc1;
4880} 4887}
4881 4888
4889static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
4890{
4891 unsigned long always_on = VMXON_CR0_ALWAYSON;
4892
4893 if (nested_vmx_secondary_ctls_high &
4894 SECONDARY_EXEC_UNRESTRICTED_GUEST &&
4895 nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
4896 always_on &= ~(X86_CR0_PE | X86_CR0_PG);
4897 return (val & always_on) == always_on;
4898}
4899
4882/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ 4900/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
4883static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 4901static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4884{ 4902{
@@ -4897,9 +4915,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4897 val = (val & ~vmcs12->cr0_guest_host_mask) | 4915 val = (val & ~vmcs12->cr0_guest_host_mask) |
4898 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); 4916 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
4899 4917
4900 /* TODO: will have to take unrestricted guest mode into 4918 if (!nested_cr0_valid(vmcs12, val))
4901 * account */
4902 if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
4903 return 1; 4919 return 1;
4904 4920
4905 if (kvm_set_cr0(vcpu, val)) 4921 if (kvm_set_cr0(vcpu, val))
@@ -6627,6 +6643,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6627 return 0; 6643 return 0;
6628 else if (is_page_fault(intr_info)) 6644 else if (is_page_fault(intr_info))
6629 return enable_ept; 6645 return enable_ept;
6646 else if (is_no_device(intr_info) &&
6647 !(nested_read_cr0(vmcs12) & X86_CR0_TS))
6648 return 0;
6630 return vmcs12->exception_bitmap & 6649 return vmcs12->exception_bitmap &
6631 (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 6650 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
6632 case EXIT_REASON_EXTERNAL_INTERRUPT: 6651 case EXIT_REASON_EXTERNAL_INTERRUPT:
@@ -6722,6 +6741,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6722 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 6741 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6723} 6742}
6724 6743
6744static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
6745{
6746 u64 delta_tsc_l1;
6747 u32 preempt_val_l1, preempt_val_l2, preempt_scale;
6748
6749 if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
6750 PIN_BASED_VMX_PREEMPTION_TIMER))
6751 return;
6752 preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
6753 MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
6754 preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
6755 delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
6756 - vcpu->arch.last_guest_tsc;
6757 preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
6758 if (preempt_val_l2 <= preempt_val_l1)
6759 preempt_val_l2 = 0;
6760 else
6761 preempt_val_l2 -= preempt_val_l1;
6762 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
6763}
6764
6725/* 6765/*
6726 * The guest has exited. See if we can fix it or if we need userspace 6766 * The guest has exited. See if we can fix it or if we need userspace
6727 * assistance. 6767 * assistance.
@@ -6736,20 +6776,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6736 if (vmx->emulation_required) 6776 if (vmx->emulation_required)
6737 return handle_invalid_guest_state(vcpu); 6777 return handle_invalid_guest_state(vcpu);
6738 6778
6739 /*
6740 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
6741 * we did not inject a still-pending event to L1 now because of
6742 * nested_run_pending, we need to re-enable this bit.
6743 */
6744 if (vmx->nested.nested_run_pending)
6745 kvm_make_request(KVM_REQ_EVENT, vcpu);
6746
6747 if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
6748 exit_reason == EXIT_REASON_VMRESUME))
6749 vmx->nested.nested_run_pending = 1;
6750 else
6751 vmx->nested.nested_run_pending = 0;
6752
6753 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6754 nested_vmx_vmexit(vcpu); 6780 nested_vmx_vmexit(vcpu);
6755 return 1; 6781 return 1;
@@ -7061,9 +7087,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
7061 case INTR_TYPE_HARD_EXCEPTION: 7087 case INTR_TYPE_HARD_EXCEPTION:
7062 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 7088 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
7063 u32 err = vmcs_read32(error_code_field); 7089 u32 err = vmcs_read32(error_code_field);
7064 kvm_queue_exception_e(vcpu, vector, err); 7090 kvm_requeue_exception_e(vcpu, vector, err);
7065 } else 7091 } else
7066 kvm_queue_exception(vcpu, vector); 7092 kvm_requeue_exception(vcpu, vector);
7067 break; 7093 break;
7068 case INTR_TYPE_SOFT_INTR: 7094 case INTR_TYPE_SOFT_INTR:
7069 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); 7095 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@ -7146,6 +7172,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7146 atomic_switch_perf_msrs(vmx); 7172 atomic_switch_perf_msrs(vmx);
7147 debugctlmsr = get_debugctlmsr(); 7173 debugctlmsr = get_debugctlmsr();
7148 7174
7175 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
7176 nested_adjust_preemption_timer(vcpu);
7149 vmx->__launched = vmx->loaded_vmcs->launched; 7177 vmx->__launched = vmx->loaded_vmcs->launched;
7150 asm( 7178 asm(
7151 /* Store host registers */ 7179 /* Store host registers */
@@ -7284,6 +7312,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7284 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 7312 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
7285 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); 7313 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
7286 7314
7315 /*
7316 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
7317 * we did not inject a still-pending event to L1 now because of
7318 * nested_run_pending, we need to re-enable this bit.
7319 */
7320 if (vmx->nested.nested_run_pending)
7321 kvm_make_request(KVM_REQ_EVENT, vcpu);
7322
7323 vmx->nested.nested_run_pending = 0;
7324
7287 vmx_complete_atomic_exit(vmx); 7325 vmx_complete_atomic_exit(vmx);
7288 vmx_recover_nmi_blocking(vmx); 7326 vmx_recover_nmi_blocking(vmx);
7289 vmx_complete_interrupts(vmx); 7327 vmx_complete_interrupts(vmx);
@@ -7410,8 +7448,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
7410 */ 7448 */
7411 if (is_mmio) 7449 if (is_mmio)
7412 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; 7450 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
7413 else if (vcpu->kvm->arch.iommu_domain && 7451 else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
7414 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
7415 ret = kvm_get_guest_memory_type(vcpu, gfn) << 7452 ret = kvm_get_guest_memory_type(vcpu, gfn) <<
7416 VMX_EPT_MT_EPTE_SHIFT; 7453 VMX_EPT_MT_EPTE_SHIFT;
7417 else 7454 else
@@ -7501,9 +7538,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
7501 return get_vmcs12(vcpu)->ept_pointer; 7538 return get_vmcs12(vcpu)->ept_pointer;
7502} 7539}
7503 7540
7504static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 7541static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
7505{ 7542{
7506 int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, 7543 kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
7507 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); 7544 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
7508 7545
7509 vcpu->arch.mmu.set_cr3 = vmx_set_cr3; 7546 vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
@@ -7511,8 +7548,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
7511 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; 7548 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
7512 7549
7513 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 7550 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
7514
7515 return r;
7516} 7551}
7517 7552
7518static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 7553static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -7520,6 +7555,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
7520 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 7555 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
7521} 7556}
7522 7557
7558static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7559 struct x86_exception *fault)
7560{
7561 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7562
7563 WARN_ON(!is_guest_mode(vcpu));
7564
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu);
7568 else
7569 kvm_inject_page_fault(vcpu, fault);
7570}
7571
7523/* 7572/*
7524 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 7573 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
7525 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 7574 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7533,6 +7582,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7533{ 7582{
7534 struct vcpu_vmx *vmx = to_vmx(vcpu); 7583 struct vcpu_vmx *vmx = to_vmx(vcpu);
7535 u32 exec_control; 7584 u32 exec_control;
7585 u32 exit_control;
7536 7586
7537 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 7587 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
7538 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 7588 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7706,7 +7756,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 7756 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
7707 * bits are further modified by vmx_set_efer() below. 7757 * bits are further modified by vmx_set_efer() below.
7708 */ 7758 */
7709 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 7759 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control);
7710 7763
7711 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7712 * emulated by vmx_set_efer(), below. 7765 * emulated by vmx_set_efer(), below.
@@ -7773,6 +7826,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7773 kvm_set_cr3(vcpu, vmcs12->guest_cr3); 7826 kvm_set_cr3(vcpu, vmcs12->guest_cr3);
7774 kvm_mmu_reset_context(vcpu); 7827 kvm_mmu_reset_context(vcpu);
7775 7828
7829 if (!enable_ept)
7830 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
7831
7776 /* 7832 /*
7777 * L1 may access the L2's PDPTR, so save them to construct vmcs12 7833 * L1 may access the L2's PDPTR, so save them to construct vmcs12
7778 */ 7834 */
@@ -7876,7 +7932,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7876 return 1; 7932 return 1;
7877 } 7933 }
7878 7934
7879 if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || 7935 if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
7880 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { 7936 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
7881 nested_vmx_entry_failure(vcpu, vmcs12, 7937 nested_vmx_entry_failure(vcpu, vmcs12,
7882 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); 7938 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@ -7938,6 +7994,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7938 7994
7939 enter_guest_mode(vcpu); 7995 enter_guest_mode(vcpu);
7940 7996
7997 vmx->nested.nested_run_pending = 1;
7998
7941 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
7942 8000
7943 cpu = get_cpu(); 8001 cpu = get_cpu();
@@ -8005,7 +8063,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8005 u32 idt_vectoring; 8063 u32 idt_vectoring;
8006 unsigned int nr; 8064 unsigned int nr;
8007 8065
8008 if (vcpu->arch.exception.pending) { 8066 if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
8009 nr = vcpu->arch.exception.nr; 8067 nr = vcpu->arch.exception.nr;
8010 idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 8068 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
8011 8069
@@ -8023,7 +8081,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8023 } 8081 }
8024 8082
8025 vmcs12->idt_vectoring_info_field = idt_vectoring; 8083 vmcs12->idt_vectoring_info_field = idt_vectoring;
8026 } else if (vcpu->arch.nmi_pending) { 8084 } else if (vcpu->arch.nmi_injected) {
8027 vmcs12->idt_vectoring_info_field = 8085 vmcs12->idt_vectoring_info_field =
8028 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; 8086 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
8029 } else if (vcpu->arch.interrupt.pending) { 8087 } else if (vcpu->arch.interrupt.pending) {
@@ -8105,6 +8163,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8105 vmcs12->guest_pending_dbg_exceptions = 8163 vmcs12->guest_pending_dbg_exceptions =
8106 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8107 8165
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
8168 vmcs12->vmx_preemption_timer_value =
8169 vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
8170
8108 /* 8171 /*
8109 * In some cases (usually, nested EPT), L2 is allowed to change its 8172 * In some cases (usually, nested EPT), L2 is allowed to change its
8110 * own CR3 without exiting. If it has changed it, we must keep it. 8173 * own CR3 without exiting. If it has changed it, we must keep it.
@@ -8130,6 +8193,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8130 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 8193 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8131 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 8194 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
8132 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 8195 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
8196 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
8197 vmcs12->guest_ia32_efer = vcpu->arch.efer;
8133 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 8198 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
8134 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 8199 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
8135 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 8200 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@ -8201,7 +8266,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8201 * fpu_active (which may have changed). 8266 * fpu_active (which may have changed).
8202 * Note that vmx_set_cr0 refers to efer set above. 8267 * Note that vmx_set_cr0 refers to efer set above.
8203 */ 8268 */
8204 kvm_set_cr0(vcpu, vmcs12->host_cr0); 8269 vmx_set_cr0(vcpu, vmcs12->host_cr0);
8205 /* 8270 /*
8206 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need 8271 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
8207 * to apply the same changes to L1's vmcs. We just set cr0 correctly, 8272 * to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -8224,6 +8289,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8224 kvm_set_cr3(vcpu, vmcs12->host_cr3); 8289 kvm_set_cr3(vcpu, vmcs12->host_cr3);
8225 kvm_mmu_reset_context(vcpu); 8290 kvm_mmu_reset_context(vcpu);
8226 8291
8292 if (!enable_ept)
8293 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
8294
8227 if (enable_vpid) { 8295 if (enable_vpid) {
8228 /* 8296 /*
8229 * Trivially support vpid by letting L2s share their parent 8297 * Trivially support vpid by letting L2s share their parent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a5cdb6..21ef1ba184ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -577,6 +577,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
578{ 578{
579 u64 xcr0; 579 u64 xcr0;
580 u64 valid_bits;
580 581
581 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 582 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
582 if (index != XCR_XFEATURE_ENABLED_MASK) 583 if (index != XCR_XFEATURE_ENABLED_MASK)
@@ -586,8 +587,16 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
586 return 1; 587 return 1;
587 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 588 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
588 return 1; 589 return 1;
589 if (xcr0 & ~host_xcr0) 590
591 /*
592 * Do not allow the guest to set bits that we do not support
593 * saving. However, xcr0 bit 0 is always set, even if the
594 * emulated CPU does not support XSAVE (see fx_init).
595 */
596 valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
597 if (xcr0 & ~valid_bits)
590 return 1; 598 return 1;
599
591 kvm_put_guest_xcr0(vcpu); 600 kvm_put_guest_xcr0(vcpu);
592 vcpu->arch.xcr0 = xcr0; 601 vcpu->arch.xcr0 = xcr0;
593 return 0; 602 return 0;
@@ -684,7 +693,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
684 693
685 vcpu->arch.cr3 = cr3; 694 vcpu->arch.cr3 = cr3;
686 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); 695 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
687 vcpu->arch.mmu.new_cr3(vcpu); 696 kvm_mmu_new_cr3(vcpu);
688 return 0; 697 return 0;
689} 698}
690EXPORT_SYMBOL_GPL(kvm_set_cr3); 699EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2564,6 +2573,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2564 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 2573 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2565 case KVM_CAP_SET_TSS_ADDR: 2574 case KVM_CAP_SET_TSS_ADDR:
2566 case KVM_CAP_EXT_CPUID: 2575 case KVM_CAP_EXT_CPUID:
2576 case KVM_CAP_EXT_EMUL_CPUID:
2567 case KVM_CAP_CLOCKSOURCE: 2577 case KVM_CAP_CLOCKSOURCE:
2568 case KVM_CAP_PIT: 2578 case KVM_CAP_PIT:
2569 case KVM_CAP_NOP_IO_DELAY: 2579 case KVM_CAP_NOP_IO_DELAY:
@@ -2673,15 +2683,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
2673 r = 0; 2683 r = 0;
2674 break; 2684 break;
2675 } 2685 }
2676 case KVM_GET_SUPPORTED_CPUID: { 2686 case KVM_GET_SUPPORTED_CPUID:
2687 case KVM_GET_EMULATED_CPUID: {
2677 struct kvm_cpuid2 __user *cpuid_arg = argp; 2688 struct kvm_cpuid2 __user *cpuid_arg = argp;
2678 struct kvm_cpuid2 cpuid; 2689 struct kvm_cpuid2 cpuid;
2679 2690
2680 r = -EFAULT; 2691 r = -EFAULT;
2681 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2692 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2682 goto out; 2693 goto out;
2683 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 2694
2684 cpuid_arg->entries); 2695 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
2696 ioctl);
2685 if (r) 2697 if (r)
2686 goto out; 2698 goto out;
2687 2699
@@ -2715,8 +2727,7 @@ static void wbinvd_ipi(void *garbage)
2715 2727
2716static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) 2728static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2717{ 2729{
2718 return vcpu->kvm->arch.iommu_domain && 2730 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
2719 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2720} 2731}
2721 2732
2722void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2733void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -2984,11 +2995,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2984static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, 2995static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2985 struct kvm_xsave *guest_xsave) 2996 struct kvm_xsave *guest_xsave)
2986{ 2997{
2987 if (cpu_has_xsave) 2998 if (cpu_has_xsave) {
2988 memcpy(guest_xsave->region, 2999 memcpy(guest_xsave->region,
2989 &vcpu->arch.guest_fpu.state->xsave, 3000 &vcpu->arch.guest_fpu.state->xsave,
2990 xstate_size); 3001 vcpu->arch.guest_xstate_size);
2991 else { 3002 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
3003 vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
3004 } else {
2992 memcpy(guest_xsave->region, 3005 memcpy(guest_xsave->region,
2993 &vcpu->arch.guest_fpu.state->fxsave, 3006 &vcpu->arch.guest_fpu.state->fxsave,
2994 sizeof(struct i387_fxsave_struct)); 3007 sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3016,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3003 u64 xstate_bv = 3016 u64 xstate_bv =
3004 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; 3017 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3005 3018
3006 if (cpu_has_xsave) 3019 if (cpu_has_xsave) {
3020 /*
3021 * Here we allow setting states that are not present in
3022 * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
3023 * with old userspace.
3024 */
3025 if (xstate_bv & ~KVM_SUPPORTED_XCR0)
3026 return -EINVAL;
3027 if (xstate_bv & ~host_xcr0)
3028 return -EINVAL;
3007 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3029 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3008 guest_xsave->region, xstate_size); 3030 guest_xsave->region, vcpu->arch.guest_xstate_size);
3009 else { 3031 } else {
3010 if (xstate_bv & ~XSTATE_FPSSE) 3032 if (xstate_bv & ~XSTATE_FPSSE)
3011 return -EINVAL; 3033 return -EINVAL;
3012 memcpy(&vcpu->arch.guest_fpu.state->fxsave, 3034 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -3042,9 +3064,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3042 3064
3043 for (i = 0; i < guest_xcrs->nr_xcrs; i++) 3065 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3044 /* Only support XCR0 currently */ 3066 /* Only support XCR0 currently */
3045 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { 3067 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3046 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, 3068 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3047 guest_xcrs->xcrs[0].value); 3069 guest_xcrs->xcrs[i].value);
3048 break; 3070 break;
3049 } 3071 }
3050 if (r) 3072 if (r)
@@ -4775,8 +4797,8 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4775 4797
4776static void init_decode_cache(struct x86_emulate_ctxt *ctxt) 4798static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4777{ 4799{
4778 memset(&ctxt->twobyte, 0, 4800 memset(&ctxt->opcode_len, 0,
4779 (void *)&ctxt->_regs - (void *)&ctxt->twobyte); 4801 (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
4780 4802
4781 ctxt->fetch.start = 0; 4803 ctxt->fetch.start = 0;
4782 ctxt->fetch.end = 0; 4804 ctxt->fetch.end = 0;
@@ -5094,8 +5116,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5094 ctxt->have_exception = false; 5116 ctxt->have_exception = false;
5095 ctxt->perm_ok = false; 5117 ctxt->perm_ok = false;
5096 5118
5097 ctxt->only_vendor_specific_insn 5119 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
5098 = emulation_type & EMULTYPE_TRAP_UD;
5099 5120
5100 r = x86_decode_insn(ctxt, insn, insn_len); 5121 r = x86_decode_insn(ctxt, insn, insn_len);
5101 5122
@@ -5263,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
5263 5284
5264 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); 5285 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5265 5286
5266 raw_spin_lock(&kvm_lock); 5287 spin_lock(&kvm_lock);
5267 list_for_each_entry(kvm, &vm_list, vm_list) { 5288 list_for_each_entry(kvm, &vm_list, vm_list) {
5268 kvm_for_each_vcpu(i, vcpu, kvm) { 5289 kvm_for_each_vcpu(i, vcpu, kvm) {
5269 if (vcpu->cpu != freq->cpu) 5290 if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5294,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
5273 send_ipi = 1; 5294 send_ipi = 1;
5274 } 5295 }
5275 } 5296 }
5276 raw_spin_unlock(&kvm_lock); 5297 spin_unlock(&kvm_lock);
5277 5298
5278 if (freq->old < freq->new && send_ipi) { 5299 if (freq->old < freq->new && send_ipi) {
5279 /* 5300 /*
@@ -5426,12 +5447,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
5426 struct kvm_vcpu *vcpu; 5447 struct kvm_vcpu *vcpu;
5427 int i; 5448 int i;
5428 5449
5429 raw_spin_lock(&kvm_lock); 5450 spin_lock(&kvm_lock);
5430 list_for_each_entry(kvm, &vm_list, vm_list) 5451 list_for_each_entry(kvm, &vm_list, vm_list)
5431 kvm_for_each_vcpu(i, vcpu, kvm) 5452 kvm_for_each_vcpu(i, vcpu, kvm)
5432 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); 5453 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5433 atomic_set(&kvm_guest_has_master_clock, 0); 5454 atomic_set(&kvm_guest_has_master_clock, 0);
5434 raw_spin_unlock(&kvm_lock); 5455 spin_unlock(&kvm_lock);
5435} 5456}
5436 5457
5437static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); 5458static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5945,10 +5966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5945 5966
5946 vcpu->mode = IN_GUEST_MODE; 5967 vcpu->mode = IN_GUEST_MODE;
5947 5968
5969 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5970
5948 /* We should set ->mode before check ->requests, 5971 /* We should set ->mode before check ->requests,
5949 * see the comment in make_all_cpus_request. 5972 * see the comment in make_all_cpus_request.
5950 */ 5973 */
5951 smp_mb(); 5974 smp_mb__after_srcu_read_unlock();
5952 5975
5953 local_irq_disable(); 5976 local_irq_disable();
5954 5977
@@ -5958,12 +5981,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5958 smp_wmb(); 5981 smp_wmb();
5959 local_irq_enable(); 5982 local_irq_enable();
5960 preempt_enable(); 5983 preempt_enable();
5984 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5961 r = 1; 5985 r = 1;
5962 goto cancel_injection; 5986 goto cancel_injection;
5963 } 5987 }
5964 5988
5965 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5966
5967 if (req_immediate_exit) 5989 if (req_immediate_exit)
5968 smp_send_reschedule(vcpu->cpu); 5990 smp_send_reschedule(vcpu->cpu);
5969 5991
@@ -6688,7 +6710,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6688 if (r) 6710 if (r)
6689 return r; 6711 return r;
6690 kvm_vcpu_reset(vcpu); 6712 kvm_vcpu_reset(vcpu);
6691 r = kvm_mmu_setup(vcpu); 6713 kvm_mmu_setup(vcpu);
6692 vcpu_put(vcpu); 6714 vcpu_put(vcpu);
6693 6715
6694 return r; 6716 return r;
@@ -6940,6 +6962,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6940 6962
6941 vcpu->arch.ia32_tsc_adjust_msr = 0x0; 6963 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6942 vcpu->arch.pv_time_enabled = false; 6964 vcpu->arch.pv_time_enabled = false;
6965
6966 vcpu->arch.guest_supported_xcr0 = 0;
6967 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
6968
6943 kvm_async_pf_hash_reset(vcpu); 6969 kvm_async_pf_hash_reset(vcpu);
6944 kvm_pmu_init(vcpu); 6970 kvm_pmu_init(vcpu);
6945 6971
@@ -6981,6 +7007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6981 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 7007 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6982 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 7008 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
6983 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 7009 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
7010 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
6984 7011
6985 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 7012 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
6986 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 7013 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
@@ -7065,7 +7092,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
7065 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7092 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7066} 7093}
7067 7094
7068void kvm_arch_free_memslot(struct kvm_memory_slot *free, 7095void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
7069 struct kvm_memory_slot *dont) 7096 struct kvm_memory_slot *dont)
7070{ 7097{
7071 int i; 7098 int i;
@@ -7086,7 +7113,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
7086 } 7113 }
7087} 7114}
7088 7115
7089int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 7116int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7117 unsigned long npages)
7090{ 7118{
7091 int i; 7119 int i;
7092 7120
@@ -7283,7 +7311,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
7283 int r; 7311 int r;
7284 7312
7285 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || 7313 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
7286 is_error_page(work->page)) 7314 work->wakeup_all)
7287 return; 7315 return;
7288 7316
7289 r = kvm_mmu_reload(vcpu); 7317 r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7421,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7393 struct x86_exception fault; 7421 struct x86_exception fault;
7394 7422
7395 trace_kvm_async_pf_ready(work->arch.token, work->gva); 7423 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7396 if (is_error_page(work->page)) 7424 if (work->wakeup_all)
7397 work->arch.token = ~0; /* broadcast wakeup */ 7425 work->arch.token = ~0; /* broadcast wakeup */
7398 else 7426 else
7399 kvm_del_async_pf_gfn(vcpu, work->arch.gfn); 7427 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
@@ -7420,6 +7448,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7420 kvm_x86_ops->interrupt_allowed(vcpu); 7448 kvm_x86_ops->interrupt_allowed(vcpu);
7421} 7449}
7422 7450
7451void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
7452{
7453 atomic_inc(&kvm->arch.noncoherent_dma_count);
7454}
7455EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
7456
7457void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
7458{
7459 atomic_dec(&kvm->arch.noncoherent_dma_count);
7460}
7461EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
7462
7463bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
7464{
7465 return atomic_read(&kvm->arch.noncoherent_dma_count);
7466}
7467EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
7468
7423EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 7469EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7424EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 7470EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7425EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 7471EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e224f7a671b6..587fb9ede436 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
122 gva_t addr, void *val, unsigned int bytes, 122 gva_t addr, void *val, unsigned int bytes,
123 struct x86_exception *exception); 123 struct x86_exception *exception);
124 124
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
125extern u64 host_xcr0; 126extern u64 host_xcr0;
126 127
127extern struct static_key kvm_no_apic_vcpu; 128extern struct static_key kvm_no_apic_vcpu;