diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 18:35:14 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 18:35:14 -0400 |
commit | 39d7530d7494b4e47ba1856e741f513dafd17e3d (patch) | |
tree | 6b16a744047cff9ff77f26bc5811fe9d953a9b91 /arch/x86 | |
parent | 16c97650a56abdd067f7da079007b7e00b307083 (diff) | |
parent | a45ff5994c9cde41af627c46abb9f32beae68943 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"ARM:
- support for chained PMU counters in guests
- improved SError handling
- handle Neoverse N1 erratum #1349291
- allow side-channel mitigation status to be migrated
- standardise most AArch64 system register accesses to msr_s/mrs_s
- fix host MPIDR corruption on 32bit
- selftests ckleanups
x86:
- PMU event {white,black}listing
- ability for the guest to disable host-side interrupt polling
- fixes for enlightened VMCS (Hyper-V pv nested virtualization),
- new hypercall to yield to IPI target
- support for passing cstate MSRs through to the guest
- lots of cleanups and optimizations
Generic:
- Some txt->rST conversions for the documentation"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (128 commits)
Documentation: virtual: Add toctree hooks
Documentation: kvm: Convert cpuid.txt to .rst
Documentation: virtual: Convert paravirt_ops.txt to .rst
KVM: x86: Unconditionally enable irqs in guest context
KVM: x86: PMU Event Filter
kvm: x86: Fix -Wmissing-prototypes warnings
KVM: Properly check if "page" is valid in kvm_vcpu_unmap
KVM: arm/arm64: Initialise host's MPIDRs by reading the actual register
KVM: LAPIC: Retry tune per-vCPU timer_advance_ns if adaptive tuning goes insane
kvm: LAPIC: write down valid APIC registers
KVM: arm64: Migrate _elx sysreg accessors to msr_s/mrs_s
KVM: doc: Add API documentation on the KVM_REG_ARM_WORKAROUNDS register
KVM: arm/arm64: Add save/restore support for firmware workaround state
arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests
KVM: arm/arm64: Support chained PMU counters
KVM: arm/arm64: Remove pmc->bitmask
KVM: arm/arm64: Re-create event when setting counter value
KVM: arm/arm64: Extract duplicated code to own function
KVM: arm/arm64: Rename kvm_pmu_{enable/disable}_counter functions
KVM: LAPIC: ARBPRI is a reserved register for x2APIC
...
Diffstat (limited to 'arch/x86')
31 files changed, 1611 insertions, 980 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26d1eb83f72a..0cc5b611a113 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -686,6 +686,7 @@ struct kvm_vcpu_arch { | |||
686 | u32 virtual_tsc_mult; | 686 | u32 virtual_tsc_mult; |
687 | u32 virtual_tsc_khz; | 687 | u32 virtual_tsc_khz; |
688 | s64 ia32_tsc_adjust_msr; | 688 | s64 ia32_tsc_adjust_msr; |
689 | u64 msr_ia32_power_ctl; | ||
689 | u64 tsc_scaling_ratio; | 690 | u64 tsc_scaling_ratio; |
690 | 691 | ||
691 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ | 692 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
@@ -752,6 +753,8 @@ struct kvm_vcpu_arch { | |||
752 | struct gfn_to_hva_cache data; | 753 | struct gfn_to_hva_cache data; |
753 | } pv_eoi; | 754 | } pv_eoi; |
754 | 755 | ||
756 | u64 msr_kvm_poll_control; | ||
757 | |||
755 | /* | 758 | /* |
756 | * Indicate whether the access faults on its page table in guest | 759 | * Indicate whether the access faults on its page table in guest |
757 | * which is set when fix page fault and used to detect unhandeable | 760 | * which is set when fix page fault and used to detect unhandeable |
@@ -879,6 +882,7 @@ struct kvm_arch { | |||
879 | bool mwait_in_guest; | 882 | bool mwait_in_guest; |
880 | bool hlt_in_guest; | 883 | bool hlt_in_guest; |
881 | bool pause_in_guest; | 884 | bool pause_in_guest; |
885 | bool cstate_in_guest; | ||
882 | 886 | ||
883 | unsigned long irq_sources_bitmap; | 887 | unsigned long irq_sources_bitmap; |
884 | s64 kvmclock_offset; | 888 | s64 kvmclock_offset; |
@@ -926,6 +930,8 @@ struct kvm_arch { | |||
926 | 930 | ||
927 | bool guest_can_read_msr_platform_info; | 931 | bool guest_can_read_msr_platform_info; |
928 | bool exception_payload_enabled; | 932 | bool exception_payload_enabled; |
933 | |||
934 | struct kvm_pmu_event_filter *pmu_event_filter; | ||
929 | }; | 935 | }; |
930 | 936 | ||
931 | struct kvm_vm_stat { | 937 | struct kvm_vm_stat { |
@@ -996,7 +1002,7 @@ struct kvm_x86_ops { | |||
996 | int (*disabled_by_bios)(void); /* __init */ | 1002 | int (*disabled_by_bios)(void); /* __init */ |
997 | int (*hardware_enable)(void); | 1003 | int (*hardware_enable)(void); |
998 | void (*hardware_disable)(void); | 1004 | void (*hardware_disable)(void); |
999 | void (*check_processor_compatibility)(void *rtn); | 1005 | int (*check_processor_compatibility)(void);/* __init */ |
1000 | int (*hardware_setup)(void); /* __init */ | 1006 | int (*hardware_setup)(void); /* __init */ |
1001 | void (*hardware_unsetup)(void); /* __exit */ | 1007 | void (*hardware_unsetup)(void); /* __exit */ |
1002 | bool (*cpu_has_accelerated_tpr)(void); | 1008 | bool (*cpu_has_accelerated_tpr)(void); |
@@ -1110,7 +1116,7 @@ struct kvm_x86_ops { | |||
1110 | int (*check_intercept)(struct kvm_vcpu *vcpu, | 1116 | int (*check_intercept)(struct kvm_vcpu *vcpu, |
1111 | struct x86_instruction_info *info, | 1117 | struct x86_instruction_info *info, |
1112 | enum x86_intercept_stage stage); | 1118 | enum x86_intercept_stage stage); |
1113 | void (*handle_external_intr)(struct kvm_vcpu *vcpu); | 1119 | void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); |
1114 | bool (*mpx_supported)(void); | 1120 | bool (*mpx_supported)(void); |
1115 | bool (*xsaves_supported)(void); | 1121 | bool (*xsaves_supported)(void); |
1116 | bool (*umip_emulated)(void); | 1122 | bool (*umip_emulated)(void); |
@@ -1529,7 +1535,6 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, | |||
1529 | unsigned long ipi_bitmap_high, u32 min, | 1535 | unsigned long ipi_bitmap_high, u32 min, |
1530 | unsigned long icr, int op_64_bit); | 1536 | unsigned long icr, int op_64_bit); |
1531 | 1537 | ||
1532 | u64 kvm_get_arch_capabilities(void); | ||
1533 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1538 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1534 | int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1539 | int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
1535 | 1540 | ||
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d6ab5b4d15e5..e901b0ab116f 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
@@ -378,10 +378,11 @@ struct kvm_sync_regs { | |||
378 | struct kvm_vcpu_events events; | 378 | struct kvm_vcpu_events events; |
379 | }; | 379 | }; |
380 | 380 | ||
381 | #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) | 381 | #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) |
382 | #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) | 382 | #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) |
383 | #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) | 383 | #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) |
384 | #define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) | 384 | #define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) |
385 | #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) | ||
385 | 386 | ||
386 | #define KVM_STATE_NESTED_FORMAT_VMX 0 | 387 | #define KVM_STATE_NESTED_FORMAT_VMX 0 |
387 | #define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ | 388 | #define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ |
@@ -432,4 +433,14 @@ struct kvm_nested_state { | |||
432 | } data; | 433 | } data; |
433 | }; | 434 | }; |
434 | 435 | ||
436 | /* for KVM_CAP_PMU_EVENT_FILTER */ | ||
437 | struct kvm_pmu_event_filter { | ||
438 | __u32 action; | ||
439 | __u32 nevents; | ||
440 | __u64 events[0]; | ||
441 | }; | ||
442 | |||
443 | #define KVM_PMU_EVENT_ALLOW 0 | ||
444 | #define KVM_PMU_EVENT_DENY 1 | ||
445 | |||
435 | #endif /* _ASM_X86_KVM_H */ | 446 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 19980ec1a316..2a8e0b6b9805 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
@@ -29,6 +29,8 @@ | |||
29 | #define KVM_FEATURE_PV_TLB_FLUSH 9 | 29 | #define KVM_FEATURE_PV_TLB_FLUSH 9 |
30 | #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 | 30 | #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 |
31 | #define KVM_FEATURE_PV_SEND_IPI 11 | 31 | #define KVM_FEATURE_PV_SEND_IPI 11 |
32 | #define KVM_FEATURE_POLL_CONTROL 12 | ||
33 | #define KVM_FEATURE_PV_SCHED_YIELD 13 | ||
32 | 34 | ||
33 | #define KVM_HINTS_REALTIME 0 | 35 | #define KVM_HINTS_REALTIME 0 |
34 | 36 | ||
@@ -47,6 +49,7 @@ | |||
47 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 | 49 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 |
48 | #define MSR_KVM_STEAL_TIME 0x4b564d03 | 50 | #define MSR_KVM_STEAL_TIME 0x4b564d03 |
49 | #define MSR_KVM_PV_EOI_EN 0x4b564d04 | 51 | #define MSR_KVM_PV_EOI_EN 0x4b564d04 |
52 | #define MSR_KVM_POLL_CONTROL 0x4b564d05 | ||
50 | 53 | ||
51 | struct kvm_steal_time { | 54 | struct kvm_steal_time { |
52 | __u64 steal; | 55 | __u64 steal; |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d213ec5c3766..f0b0c90dd398 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -146,7 +146,6 @@ | |||
146 | 146 | ||
147 | #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 | 147 | #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 |
148 | #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 | 148 | #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 |
149 | #define VMX_ABORT_VMCS_CORRUPTED 3 | ||
150 | #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 | 149 | #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 |
151 | 150 | ||
152 | #endif /* _UAPIVMX_H */ | 151 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5169b8cc35bb..82caf01b63dd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -527,6 +527,21 @@ static void kvm_setup_pv_ipi(void) | |||
527 | pr_info("KVM setup pv IPIs\n"); | 527 | pr_info("KVM setup pv IPIs\n"); |
528 | } | 528 | } |
529 | 529 | ||
530 | static void kvm_smp_send_call_func_ipi(const struct cpumask *mask) | ||
531 | { | ||
532 | int cpu; | ||
533 | |||
534 | native_send_call_func_ipi(mask); | ||
535 | |||
536 | /* Make sure other vCPUs get a chance to run if they need to. */ | ||
537 | for_each_cpu(cpu, mask) { | ||
538 | if (vcpu_is_preempted(cpu)) { | ||
539 | kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); | ||
540 | break; | ||
541 | } | ||
542 | } | ||
543 | } | ||
544 | |||
530 | static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) | 545 | static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) |
531 | { | 546 | { |
532 | native_smp_prepare_cpus(max_cpus); | 547 | native_smp_prepare_cpus(max_cpus); |
@@ -638,6 +653,12 @@ static void __init kvm_guest_init(void) | |||
638 | #ifdef CONFIG_SMP | 653 | #ifdef CONFIG_SMP |
639 | smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; | 654 | smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; |
640 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 655 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
656 | if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && | ||
657 | !kvm_para_has_hint(KVM_HINTS_REALTIME) && | ||
658 | kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { | ||
659 | smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; | ||
660 | pr_info("KVM setup pv sched yield\n"); | ||
661 | } | ||
641 | if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", | 662 | if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", |
642 | kvm_cpu_online, kvm_cpu_down_prepare) < 0) | 663 | kvm_cpu_online, kvm_cpu_down_prepare) < 0) |
643 | pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); | 664 | pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index fc042419e670..840e12583b85 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -41,6 +41,7 @@ config KVM | |||
41 | select PERF_EVENTS | 41 | select PERF_EVENTS |
42 | select HAVE_KVM_MSI | 42 | select HAVE_KVM_MSI |
43 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 43 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
44 | select HAVE_KVM_NO_POLL | ||
44 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT | 45 | select KVM_GENERIC_DIRTYLOG_READ_PROTECT |
45 | select KVM_VFIO | 46 | select KVM_VFIO |
46 | select SRCU | 47 | select SRCU |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 4992e7c99588..ead681210306 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -134,6 +134,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
134 | (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) | 134 | (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) |
135 | best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); | 135 | best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); |
136 | 136 | ||
137 | if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { | ||
138 | best = kvm_find_cpuid_entry(vcpu, 0x1, 0); | ||
139 | if (best) { | ||
140 | if (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT) | ||
141 | best->ecx |= F(MWAIT); | ||
142 | else | ||
143 | best->ecx &= ~F(MWAIT); | ||
144 | } | ||
145 | } | ||
146 | |||
137 | /* Update physical-address width */ | 147 | /* Update physical-address width */ |
138 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | 148 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); |
139 | kvm_mmu_reset_context(vcpu); | 149 | kvm_mmu_reset_context(vcpu); |
@@ -276,19 +286,38 @@ static void cpuid_mask(u32 *word, int wordnum) | |||
276 | *word &= boot_cpu_data.x86_capability[wordnum]; | 286 | *word &= boot_cpu_data.x86_capability[wordnum]; |
277 | } | 287 | } |
278 | 288 | ||
279 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 289 | static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function, |
280 | u32 index) | 290 | u32 index) |
281 | { | 291 | { |
282 | entry->function = function; | 292 | entry->function = function; |
283 | entry->index = index; | 293 | entry->index = index; |
294 | entry->flags = 0; | ||
295 | |||
284 | cpuid_count(entry->function, entry->index, | 296 | cpuid_count(entry->function, entry->index, |
285 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); | 297 | &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); |
286 | entry->flags = 0; | 298 | |
299 | switch (function) { | ||
300 | case 2: | ||
301 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
302 | break; | ||
303 | case 4: | ||
304 | case 7: | ||
305 | case 0xb: | ||
306 | case 0xd: | ||
307 | case 0x14: | ||
308 | case 0x8000001d: | ||
309 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
310 | break; | ||
311 | } | ||
287 | } | 312 | } |
288 | 313 | ||
289 | static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, | 314 | static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, |
290 | u32 func, u32 index, int *nent, int maxnent) | 315 | u32 func, int *nent, int maxnent) |
291 | { | 316 | { |
317 | entry->function = func; | ||
318 | entry->index = 0; | ||
319 | entry->flags = 0; | ||
320 | |||
292 | switch (func) { | 321 | switch (func) { |
293 | case 0: | 322 | case 0: |
294 | entry->eax = 7; | 323 | entry->eax = 7; |
@@ -300,21 +329,83 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, | |||
300 | break; | 329 | break; |
301 | case 7: | 330 | case 7: |
302 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 331 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
303 | if (index == 0) | 332 | entry->eax = 0; |
304 | entry->ecx = F(RDPID); | 333 | entry->ecx = F(RDPID); |
305 | ++*nent; | 334 | ++*nent; |
306 | default: | 335 | default: |
307 | break; | 336 | break; |
308 | } | 337 | } |
309 | 338 | ||
310 | entry->function = func; | ||
311 | entry->index = index; | ||
312 | |||
313 | return 0; | 339 | return 0; |
314 | } | 340 | } |
315 | 341 | ||
316 | static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 342 | static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) |
317 | u32 index, int *nent, int maxnent) | 343 | { |
344 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; | ||
345 | unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; | ||
346 | unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; | ||
347 | unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; | ||
348 | unsigned f_la57; | ||
349 | |||
350 | /* cpuid 7.0.ebx */ | ||
351 | const u32 kvm_cpuid_7_0_ebx_x86_features = | ||
352 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | | ||
353 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | | ||
354 | F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | | ||
355 | F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | | ||
356 | F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt; | ||
357 | |||
358 | /* cpuid 7.0.ecx*/ | ||
359 | const u32 kvm_cpuid_7_0_ecx_x86_features = | ||
360 | F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | | ||
361 | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | | ||
362 | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | | ||
363 | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); | ||
364 | |||
365 | /* cpuid 7.0.edx*/ | ||
366 | const u32 kvm_cpuid_7_0_edx_x86_features = | ||
367 | F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | | ||
368 | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | | ||
369 | F(MD_CLEAR); | ||
370 | |||
371 | switch (index) { | ||
372 | case 0: | ||
373 | entry->eax = 0; | ||
374 | entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; | ||
375 | cpuid_mask(&entry->ebx, CPUID_7_0_EBX); | ||
376 | /* TSC_ADJUST is emulated */ | ||
377 | entry->ebx |= F(TSC_ADJUST); | ||
378 | |||
379 | entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; | ||
380 | f_la57 = entry->ecx & F(LA57); | ||
381 | cpuid_mask(&entry->ecx, CPUID_7_ECX); | ||
382 | /* Set LA57 based on hardware capability. */ | ||
383 | entry->ecx |= f_la57; | ||
384 | entry->ecx |= f_umip; | ||
385 | /* PKU is not yet implemented for shadow paging. */ | ||
386 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) | ||
387 | entry->ecx &= ~F(PKU); | ||
388 | |||
389 | entry->edx &= kvm_cpuid_7_0_edx_x86_features; | ||
390 | cpuid_mask(&entry->edx, CPUID_7_EDX); | ||
391 | /* | ||
392 | * We emulate ARCH_CAPABILITIES in software even | ||
393 | * if the host doesn't support it. | ||
394 | */ | ||
395 | entry->edx |= F(ARCH_CAPABILITIES); | ||
396 | break; | ||
397 | default: | ||
398 | WARN_ON_ONCE(1); | ||
399 | entry->eax = 0; | ||
400 | entry->ebx = 0; | ||
401 | entry->ecx = 0; | ||
402 | entry->edx = 0; | ||
403 | break; | ||
404 | } | ||
405 | } | ||
406 | |||
407 | static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, | ||
408 | int *nent, int maxnent) | ||
318 | { | 409 | { |
319 | int r; | 410 | int r; |
320 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | 411 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
@@ -327,12 +418,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
327 | unsigned f_lm = 0; | 418 | unsigned f_lm = 0; |
328 | #endif | 419 | #endif |
329 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | 420 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; |
330 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; | ||
331 | unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; | ||
332 | unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; | 421 | unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; |
333 | unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; | ||
334 | unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; | 422 | unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; |
335 | unsigned f_la57 = 0; | ||
336 | 423 | ||
337 | /* cpuid 1.edx */ | 424 | /* cpuid 1.edx */ |
338 | const u32 kvm_cpuid_1_edx_x86_features = | 425 | const u32 kvm_cpuid_1_edx_x86_features = |
@@ -377,7 +464,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
377 | /* cpuid 0x80000008.ebx */ | 464 | /* cpuid 0x80000008.ebx */ |
378 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = | 465 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = |
379 | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | | 466 | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | |
380 | F(AMD_SSB_NO) | F(AMD_STIBP); | 467 | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); |
381 | 468 | ||
382 | /* cpuid 0xC0000001.edx */ | 469 | /* cpuid 0xC0000001.edx */ |
383 | const u32 kvm_cpuid_C000_0001_edx_x86_features = | 470 | const u32 kvm_cpuid_C000_0001_edx_x86_features = |
@@ -385,31 +472,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
385 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | 472 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | |
386 | F(PMM) | F(PMM_EN); | 473 | F(PMM) | F(PMM_EN); |
387 | 474 | ||
388 | /* cpuid 7.0.ebx */ | ||
389 | const u32 kvm_cpuid_7_0_ebx_x86_features = | ||
390 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | | ||
391 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | | ||
392 | F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | | ||
393 | F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | | ||
394 | F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt; | ||
395 | |||
396 | /* cpuid 0xD.1.eax */ | 475 | /* cpuid 0xD.1.eax */ |
397 | const u32 kvm_cpuid_D_1_eax_x86_features = | 476 | const u32 kvm_cpuid_D_1_eax_x86_features = |
398 | F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; | 477 | F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; |
399 | 478 | ||
400 | /* cpuid 7.0.ecx*/ | ||
401 | const u32 kvm_cpuid_7_0_ecx_x86_features = | ||
402 | F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | | ||
403 | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | | ||
404 | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | | ||
405 | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); | ||
406 | |||
407 | /* cpuid 7.0.edx*/ | ||
408 | const u32 kvm_cpuid_7_0_edx_x86_features = | ||
409 | F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | | ||
410 | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | | ||
411 | F(MD_CLEAR); | ||
412 | |||
413 | /* all calls to cpuid_count() should be made on the same cpu */ | 479 | /* all calls to cpuid_count() should be made on the same cpu */ |
414 | get_cpu(); | 480 | get_cpu(); |
415 | 481 | ||
@@ -418,12 +484,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
418 | if (*nent >= maxnent) | 484 | if (*nent >= maxnent) |
419 | goto out; | 485 | goto out; |
420 | 486 | ||
421 | do_cpuid_1_ent(entry, function, index); | 487 | do_host_cpuid(entry, function, 0); |
422 | ++*nent; | 488 | ++*nent; |
423 | 489 | ||
424 | switch (function) { | 490 | switch (function) { |
425 | case 0: | 491 | case 0: |
426 | entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd)); | 492 | /* Limited to the highest leaf implemented in KVM. */ |
493 | entry->eax = min(entry->eax, 0x1fU); | ||
427 | break; | 494 | break; |
428 | case 1: | 495 | case 1: |
429 | entry->edx &= kvm_cpuid_1_edx_x86_features; | 496 | entry->edx &= kvm_cpuid_1_edx_x86_features; |
@@ -441,14 +508,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
441 | case 2: { | 508 | case 2: { |
442 | int t, times = entry->eax & 0xff; | 509 | int t, times = entry->eax & 0xff; |
443 | 510 | ||
444 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
445 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | 511 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; |
446 | for (t = 1; t < times; ++t) { | 512 | for (t = 1; t < times; ++t) { |
447 | if (*nent >= maxnent) | 513 | if (*nent >= maxnent) |
448 | goto out; | 514 | goto out; |
449 | 515 | ||
450 | do_cpuid_1_ent(&entry[t], function, 0); | 516 | do_host_cpuid(&entry[t], function, 0); |
451 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | ||
452 | ++*nent; | 517 | ++*nent; |
453 | } | 518 | } |
454 | break; | 519 | break; |
@@ -458,7 +523,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
458 | case 0x8000001d: { | 523 | case 0x8000001d: { |
459 | int i, cache_type; | 524 | int i, cache_type; |
460 | 525 | ||
461 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
462 | /* read more entries until cache_type is zero */ | 526 | /* read more entries until cache_type is zero */ |
463 | for (i = 1; ; ++i) { | 527 | for (i = 1; ; ++i) { |
464 | if (*nent >= maxnent) | 528 | if (*nent >= maxnent) |
@@ -467,9 +531,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
467 | cache_type = entry[i - 1].eax & 0x1f; | 531 | cache_type = entry[i - 1].eax & 0x1f; |
468 | if (!cache_type) | 532 | if (!cache_type) |
469 | break; | 533 | break; |
470 | do_cpuid_1_ent(&entry[i], function, i); | 534 | do_host_cpuid(&entry[i], function, i); |
471 | entry[i].flags |= | ||
472 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
473 | ++*nent; | 535 | ++*nent; |
474 | } | 536 | } |
475 | break; | 537 | break; |
@@ -480,36 +542,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
480 | entry->ecx = 0; | 542 | entry->ecx = 0; |
481 | entry->edx = 0; | 543 | entry->edx = 0; |
482 | break; | 544 | break; |
545 | /* function 7 has additional index. */ | ||
483 | case 7: { | 546 | case 7: { |
484 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 547 | int i; |
485 | /* Mask ebx against host capability word 9 */ | 548 | |
486 | if (index == 0) { | 549 | for (i = 0; ; ) { |
487 | entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; | 550 | do_cpuid_7_mask(&entry[i], i); |
488 | cpuid_mask(&entry->ebx, CPUID_7_0_EBX); | 551 | if (i == entry->eax) |
489 | // TSC_ADJUST is emulated | 552 | break; |
490 | entry->ebx |= F(TSC_ADJUST); | 553 | if (*nent >= maxnent) |
491 | entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; | 554 | goto out; |
492 | f_la57 = entry->ecx & F(LA57); | 555 | |
493 | cpuid_mask(&entry->ecx, CPUID_7_ECX); | 556 | ++i; |
494 | /* Set LA57 based on hardware capability. */ | 557 | do_host_cpuid(&entry[i], function, i); |
495 | entry->ecx |= f_la57; | 558 | ++*nent; |
496 | entry->ecx |= f_umip; | ||
497 | /* PKU is not yet implemented for shadow paging. */ | ||
498 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) | ||
499 | entry->ecx &= ~F(PKU); | ||
500 | entry->edx &= kvm_cpuid_7_0_edx_x86_features; | ||
501 | cpuid_mask(&entry->edx, CPUID_7_EDX); | ||
502 | /* | ||
503 | * We emulate ARCH_CAPABILITIES in software even | ||
504 | * if the host doesn't support it. | ||
505 | */ | ||
506 | entry->edx |= F(ARCH_CAPABILITIES); | ||
507 | } else { | ||
508 | entry->ebx = 0; | ||
509 | entry->ecx = 0; | ||
510 | entry->edx = 0; | ||
511 | } | 559 | } |
512 | entry->eax = 0; | ||
513 | break; | 560 | break; |
514 | } | 561 | } |
515 | case 9: | 562 | case 9: |
@@ -543,11 +590,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
543 | entry->edx = edx.full; | 590 | entry->edx = edx.full; |
544 | break; | 591 | break; |
545 | } | 592 | } |
546 | /* function 0xb has additional index. */ | 593 | /* |
594 | * Per Intel's SDM, the 0x1f is a superset of 0xb, | ||
595 | * thus they can be handled by common code. | ||
596 | */ | ||
597 | case 0x1f: | ||
547 | case 0xb: { | 598 | case 0xb: { |
548 | int i, level_type; | 599 | int i, level_type; |
549 | 600 | ||
550 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
551 | /* read more entries until level_type is zero */ | 601 | /* read more entries until level_type is zero */ |
552 | for (i = 1; ; ++i) { | 602 | for (i = 1; ; ++i) { |
553 | if (*nent >= maxnent) | 603 | if (*nent >= maxnent) |
@@ -556,9 +606,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
556 | level_type = entry[i - 1].ecx & 0xff00; | 606 | level_type = entry[i - 1].ecx & 0xff00; |
557 | if (!level_type) | 607 | if (!level_type) |
558 | break; | 608 | break; |
559 | do_cpuid_1_ent(&entry[i], function, i); | 609 | do_host_cpuid(&entry[i], function, i); |
560 | entry[i].flags |= | ||
561 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
562 | ++*nent; | 610 | ++*nent; |
563 | } | 611 | } |
564 | break; | 612 | break; |
@@ -571,7 +619,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
571 | entry->ebx = xstate_required_size(supported, false); | 619 | entry->ebx = xstate_required_size(supported, false); |
572 | entry->ecx = entry->ebx; | 620 | entry->ecx = entry->ebx; |
573 | entry->edx &= supported >> 32; | 621 | entry->edx &= supported >> 32; |
574 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
575 | if (!supported) | 622 | if (!supported) |
576 | break; | 623 | break; |
577 | 624 | ||
@@ -580,7 +627,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
580 | if (*nent >= maxnent) | 627 | if (*nent >= maxnent) |
581 | goto out; | 628 | goto out; |
582 | 629 | ||
583 | do_cpuid_1_ent(&entry[i], function, idx); | 630 | do_host_cpuid(&entry[i], function, idx); |
584 | if (idx == 1) { | 631 | if (idx == 1) { |
585 | entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; | 632 | entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; |
586 | cpuid_mask(&entry[i].eax, CPUID_D_1_EAX); | 633 | cpuid_mask(&entry[i].eax, CPUID_D_1_EAX); |
@@ -597,8 +644,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
597 | } | 644 | } |
598 | entry[i].ecx = 0; | 645 | entry[i].ecx = 0; |
599 | entry[i].edx = 0; | 646 | entry[i].edx = 0; |
600 | entry[i].flags |= | ||
601 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
602 | ++*nent; | 647 | ++*nent; |
603 | ++i; | 648 | ++i; |
604 | } | 649 | } |
@@ -611,12 +656,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
611 | if (!f_intel_pt) | 656 | if (!f_intel_pt) |
612 | break; | 657 | break; |
613 | 658 | ||
614 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
615 | for (t = 1; t <= times; ++t) { | 659 | for (t = 1; t <= times; ++t) { |
616 | if (*nent >= maxnent) | 660 | if (*nent >= maxnent) |
617 | goto out; | 661 | goto out; |
618 | do_cpuid_1_ent(&entry[t], function, t); | 662 | do_host_cpuid(&entry[t], function, t); |
619 | entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
620 | ++*nent; | 663 | ++*nent; |
621 | } | 664 | } |
622 | break; | 665 | break; |
@@ -640,7 +683,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
640 | (1 << KVM_FEATURE_PV_UNHALT) | | 683 | (1 << KVM_FEATURE_PV_UNHALT) | |
641 | (1 << KVM_FEATURE_PV_TLB_FLUSH) | | 684 | (1 << KVM_FEATURE_PV_TLB_FLUSH) | |
642 | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | | 685 | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | |
643 | (1 << KVM_FEATURE_PV_SEND_IPI); | 686 | (1 << KVM_FEATURE_PV_SEND_IPI) | |
687 | (1 << KVM_FEATURE_POLL_CONTROL) | | ||
688 | (1 << KVM_FEATURE_PV_SCHED_YIELD); | ||
644 | 689 | ||
645 | if (sched_info_on()) | 690 | if (sched_info_on()) |
646 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | 691 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); |
@@ -730,21 +775,19 @@ out: | |||
730 | return r; | 775 | return r; |
731 | } | 776 | } |
732 | 777 | ||
733 | static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, | 778 | static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func, |
734 | u32 idx, int *nent, int maxnent, unsigned int type) | 779 | int *nent, int maxnent, unsigned int type) |
735 | { | 780 | { |
736 | if (type == KVM_GET_EMULATED_CPUID) | 781 | if (type == KVM_GET_EMULATED_CPUID) |
737 | return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); | 782 | return __do_cpuid_func_emulated(entry, func, nent, maxnent); |
738 | 783 | ||
739 | return __do_cpuid_ent(entry, func, idx, nent, maxnent); | 784 | return __do_cpuid_func(entry, func, nent, maxnent); |
740 | } | 785 | } |
741 | 786 | ||
742 | #undef F | 787 | #undef F |
743 | 788 | ||
744 | struct kvm_cpuid_param { | 789 | struct kvm_cpuid_param { |
745 | u32 func; | 790 | u32 func; |
746 | u32 idx; | ||
747 | bool has_leaf_count; | ||
748 | bool (*qualifier)(const struct kvm_cpuid_param *param); | 791 | bool (*qualifier)(const struct kvm_cpuid_param *param); |
749 | }; | 792 | }; |
750 | 793 | ||
@@ -788,11 +831,10 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, | |||
788 | int limit, nent = 0, r = -E2BIG, i; | 831 | int limit, nent = 0, r = -E2BIG, i; |
789 | u32 func; | 832 | u32 func; |
790 | static const struct kvm_cpuid_param param[] = { | 833 | static const struct kvm_cpuid_param param[] = { |
791 | { .func = 0, .has_leaf_count = true }, | 834 | { .func = 0 }, |
792 | { .func = 0x80000000, .has_leaf_count = true }, | 835 | { .func = 0x80000000 }, |
793 | { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, | 836 | { .func = 0xC0000000, .qualifier = is_centaur_cpu }, |
794 | { .func = KVM_CPUID_SIGNATURE }, | 837 | { .func = KVM_CPUID_SIGNATURE }, |
795 | { .func = KVM_CPUID_FEATURES }, | ||
796 | }; | 838 | }; |
797 | 839 | ||
798 | if (cpuid->nent < 1) | 840 | if (cpuid->nent < 1) |
@@ -816,19 +858,16 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, | |||
816 | if (ent->qualifier && !ent->qualifier(ent)) | 858 | if (ent->qualifier && !ent->qualifier(ent)) |
817 | continue; | 859 | continue; |
818 | 860 | ||
819 | r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, | 861 | r = do_cpuid_func(&cpuid_entries[nent], ent->func, |
820 | &nent, cpuid->nent, type); | 862 | &nent, cpuid->nent, type); |
821 | 863 | ||
822 | if (r) | 864 | if (r) |
823 | goto out_free; | 865 | goto out_free; |
824 | 866 | ||
825 | if (!ent->has_leaf_count) | ||
826 | continue; | ||
827 | |||
828 | limit = cpuid_entries[nent - 1].eax; | 867 | limit = cpuid_entries[nent - 1].eax; |
829 | for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) | 868 | for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) |
830 | r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, | 869 | r = do_cpuid_func(&cpuid_entries[nent], func, |
831 | &nent, cpuid->nent, type); | 870 | &nent, cpuid->nent, type); |
832 | 871 | ||
833 | if (r) | 872 | if (r) |
834 | goto out_free; | 873 | goto out_free; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4a387a235424..8e409ad448f9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -4258,7 +4258,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) | |||
4258 | ulong dr6; | 4258 | ulong dr6; |
4259 | 4259 | ||
4260 | ctxt->ops->get_dr(ctxt, 6, &dr6); | 4260 | ctxt->ops->get_dr(ctxt, 6, &dr6); |
4261 | dr6 &= ~15; | 4261 | dr6 &= ~DR_TRAP_BITS; |
4262 | dr6 |= DR6_BD | DR6_RTM; | 4262 | dr6 |= DR6_BD | DR6_RTM; |
4263 | ctxt->ops->set_dr(ctxt, 6, dr6); | 4263 | ctxt->ops->set_dr(ctxt, 6, dr6); |
4264 | return emulate_db(ctxt); | 4264 | return emulate_db(ctxt); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index d6519a3aa959..7c6233d37c64 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -102,7 +102,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) | |||
102 | return mode != KVM_IRQCHIP_NONE; | 102 | return mode != KVM_IRQCHIP_NONE; |
103 | } | 103 | } |
104 | 104 | ||
105 | bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); | ||
106 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | 105 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); |
107 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | 106 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); |
108 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); | 107 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 924b3bd5a7b7..8ecd48d31800 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c | |||
@@ -75,7 +75,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
75 | if (r < 0) | 75 | if (r < 0) |
76 | r = 0; | 76 | r = 0; |
77 | r += kvm_apic_set_irq(vcpu, irq, dest_map); | 77 | r += kvm_apic_set_irq(vcpu, irq, dest_map); |
78 | } else if (kvm_lapic_enabled(vcpu)) { | 78 | } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { |
79 | if (!kvm_vector_hashing_enabled()) { | 79 | if (!kvm_vector_hashing_enabled()) { |
80 | if (!lowest) | 80 | if (!lowest) |
81 | lowest = vcpu; | 81 | lowest = vcpu; |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4dabc318adb8..a232e76d8f23 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #define X2APIC_BROADCAST 0xFFFFFFFFul | 69 | #define X2APIC_BROADCAST 0xFFFFFFFFul |
70 | 70 | ||
71 | #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 | 71 | #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 |
72 | #define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 | ||
72 | /* step-by-step approximation to mitigate fluctuation */ | 73 | /* step-by-step approximation to mitigate fluctuation */ |
73 | #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 | 74 | #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 |
74 | 75 | ||
@@ -85,11 +86,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) | |||
85 | apic_test_vector(vector, apic->regs + APIC_IRR); | 86 | apic_test_vector(vector, apic->regs + APIC_IRR); |
86 | } | 87 | } |
87 | 88 | ||
88 | static inline void apic_clear_vector(int vec, void *bitmap) | ||
89 | { | ||
90 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
91 | } | ||
92 | |||
93 | static inline int __apic_test_and_set_vector(int vec, void *bitmap) | 89 | static inline int __apic_test_and_set_vector(int vec, void *bitmap) |
94 | { | 90 | { |
95 | return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 91 | return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -443,12 +439,12 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
443 | 439 | ||
444 | if (unlikely(vcpu->arch.apicv_active)) { | 440 | if (unlikely(vcpu->arch.apicv_active)) { |
445 | /* need to update RVI */ | 441 | /* need to update RVI */ |
446 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 442 | kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); |
447 | kvm_x86_ops->hwapic_irr_update(vcpu, | 443 | kvm_x86_ops->hwapic_irr_update(vcpu, |
448 | apic_find_highest_irr(apic)); | 444 | apic_find_highest_irr(apic)); |
449 | } else { | 445 | } else { |
450 | apic->irr_pending = false; | 446 | apic->irr_pending = false; |
451 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 447 | kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); |
452 | if (apic_search_irr(apic) != -1) | 448 | if (apic_search_irr(apic) != -1) |
453 | apic->irr_pending = true; | 449 | apic->irr_pending = true; |
454 | } | 450 | } |
@@ -1053,9 +1049,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
1053 | 1049 | ||
1054 | if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { | 1050 | if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { |
1055 | if (trig_mode) | 1051 | if (trig_mode) |
1056 | kvm_lapic_set_vector(vector, apic->regs + APIC_TMR); | 1052 | kvm_lapic_set_vector(vector, |
1053 | apic->regs + APIC_TMR); | ||
1057 | else | 1054 | else |
1058 | apic_clear_vector(vector, apic->regs + APIC_TMR); | 1055 | kvm_lapic_clear_vector(vector, |
1056 | apic->regs + APIC_TMR); | ||
1059 | } | 1057 | } |
1060 | 1058 | ||
1061 | if (vcpu->arch.apicv_active) | 1059 | if (vcpu->arch.apicv_active) |
@@ -1313,21 +1311,45 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) | |||
1313 | return container_of(dev, struct kvm_lapic, dev); | 1311 | return container_of(dev, struct kvm_lapic, dev); |
1314 | } | 1312 | } |
1315 | 1313 | ||
1314 | #define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) | ||
1315 | #define APIC_REGS_MASK(first, count) \ | ||
1316 | (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) | ||
1317 | |||
1316 | int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, | 1318 | int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, |
1317 | void *data) | 1319 | void *data) |
1318 | { | 1320 | { |
1319 | unsigned char alignment = offset & 0xf; | 1321 | unsigned char alignment = offset & 0xf; |
1320 | u32 result; | 1322 | u32 result; |
1321 | /* this bitmask has a bit cleared for each reserved register */ | 1323 | /* this bitmask has a bit cleared for each reserved register */ |
1322 | static const u64 rmask = 0x43ff01ffffffe70cULL; | 1324 | u64 valid_reg_mask = |
1323 | 1325 | APIC_REG_MASK(APIC_ID) | | |
1324 | if ((alignment + len) > 4) { | 1326 | APIC_REG_MASK(APIC_LVR) | |
1325 | apic_debug("KVM_APIC_READ: alignment error %x %d\n", | 1327 | APIC_REG_MASK(APIC_TASKPRI) | |
1326 | offset, len); | 1328 | APIC_REG_MASK(APIC_PROCPRI) | |
1327 | return 1; | 1329 | APIC_REG_MASK(APIC_LDR) | |
1328 | } | 1330 | APIC_REG_MASK(APIC_DFR) | |
1331 | APIC_REG_MASK(APIC_SPIV) | | ||
1332 | APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | | ||
1333 | APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | | ||
1334 | APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | | ||
1335 | APIC_REG_MASK(APIC_ESR) | | ||
1336 | APIC_REG_MASK(APIC_ICR) | | ||
1337 | APIC_REG_MASK(APIC_ICR2) | | ||
1338 | APIC_REG_MASK(APIC_LVTT) | | ||
1339 | APIC_REG_MASK(APIC_LVTTHMR) | | ||
1340 | APIC_REG_MASK(APIC_LVTPC) | | ||
1341 | APIC_REG_MASK(APIC_LVT0) | | ||
1342 | APIC_REG_MASK(APIC_LVT1) | | ||
1343 | APIC_REG_MASK(APIC_LVTERR) | | ||
1344 | APIC_REG_MASK(APIC_TMICT) | | ||
1345 | APIC_REG_MASK(APIC_TMCCT) | | ||
1346 | APIC_REG_MASK(APIC_TDCR); | ||
1347 | |||
1348 | /* ARBPRI is not valid on x2APIC */ | ||
1349 | if (!apic_x2apic_mode(apic)) | ||
1350 | valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); | ||
1329 | 1351 | ||
1330 | if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { | 1352 | if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) { |
1331 | apic_debug("KVM_APIC_READ: read reserved register %x\n", | 1353 | apic_debug("KVM_APIC_READ: read reserved register %x\n", |
1332 | offset); | 1354 | offset); |
1333 | return 1; | 1355 | return 1; |
@@ -1499,11 +1521,40 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) | |||
1499 | } | 1521 | } |
1500 | } | 1522 | } |
1501 | 1523 | ||
1502 | void wait_lapic_expire(struct kvm_vcpu *vcpu) | 1524 | static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, |
1525 | s64 advance_expire_delta) | ||
1503 | { | 1526 | { |
1504 | struct kvm_lapic *apic = vcpu->arch.apic; | 1527 | struct kvm_lapic *apic = vcpu->arch.apic; |
1505 | u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; | 1528 | u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; |
1506 | u64 guest_tsc, tsc_deadline, ns; | 1529 | u64 ns; |
1530 | |||
1531 | /* too early */ | ||
1532 | if (advance_expire_delta < 0) { | ||
1533 | ns = -advance_expire_delta * 1000000ULL; | ||
1534 | do_div(ns, vcpu->arch.virtual_tsc_khz); | ||
1535 | timer_advance_ns -= min((u32)ns, | ||
1536 | timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); | ||
1537 | } else { | ||
1538 | /* too late */ | ||
1539 | ns = advance_expire_delta * 1000000ULL; | ||
1540 | do_div(ns, vcpu->arch.virtual_tsc_khz); | ||
1541 | timer_advance_ns += min((u32)ns, | ||
1542 | timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); | ||
1543 | } | ||
1544 | |||
1545 | if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) | ||
1546 | apic->lapic_timer.timer_advance_adjust_done = true; | ||
1547 | if (unlikely(timer_advance_ns > 5000)) { | ||
1548 | timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; | ||
1549 | apic->lapic_timer.timer_advance_adjust_done = false; | ||
1550 | } | ||
1551 | apic->lapic_timer.timer_advance_ns = timer_advance_ns; | ||
1552 | } | ||
1553 | |||
1554 | void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) | ||
1555 | { | ||
1556 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1557 | u64 guest_tsc, tsc_deadline; | ||
1507 | 1558 | ||
1508 | if (apic->lapic_timer.expired_tscdeadline == 0) | 1559 | if (apic->lapic_timer.expired_tscdeadline == 0) |
1509 | return; | 1560 | return; |
@@ -1514,34 +1565,15 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) | |||
1514 | tsc_deadline = apic->lapic_timer.expired_tscdeadline; | 1565 | tsc_deadline = apic->lapic_timer.expired_tscdeadline; |
1515 | apic->lapic_timer.expired_tscdeadline = 0; | 1566 | apic->lapic_timer.expired_tscdeadline = 0; |
1516 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); | 1567 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); |
1517 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); | 1568 | apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; |
1518 | 1569 | ||
1519 | if (guest_tsc < tsc_deadline) | 1570 | if (guest_tsc < tsc_deadline) |
1520 | __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); | 1571 | __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); |
1521 | 1572 | ||
1522 | if (!apic->lapic_timer.timer_advance_adjust_done) { | 1573 | if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) |
1523 | /* too early */ | 1574 | adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); |
1524 | if (guest_tsc < tsc_deadline) { | ||
1525 | ns = (tsc_deadline - guest_tsc) * 1000000ULL; | ||
1526 | do_div(ns, vcpu->arch.virtual_tsc_khz); | ||
1527 | timer_advance_ns -= min((u32)ns, | ||
1528 | timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); | ||
1529 | } else { | ||
1530 | /* too late */ | ||
1531 | ns = (guest_tsc - tsc_deadline) * 1000000ULL; | ||
1532 | do_div(ns, vcpu->arch.virtual_tsc_khz); | ||
1533 | timer_advance_ns += min((u32)ns, | ||
1534 | timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); | ||
1535 | } | ||
1536 | if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) | ||
1537 | apic->lapic_timer.timer_advance_adjust_done = true; | ||
1538 | if (unlikely(timer_advance_ns > 5000)) { | ||
1539 | timer_advance_ns = 0; | ||
1540 | apic->lapic_timer.timer_advance_adjust_done = true; | ||
1541 | } | ||
1542 | apic->lapic_timer.timer_advance_ns = timer_advance_ns; | ||
1543 | } | ||
1544 | } | 1575 | } |
1576 | EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); | ||
1545 | 1577 | ||
1546 | static void start_sw_tscdeadline(struct kvm_lapic *apic) | 1578 | static void start_sw_tscdeadline(struct kvm_lapic *apic) |
1547 | { | 1579 | { |
@@ -2014,7 +2046,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, | |||
2014 | apic_debug("%s: offset 0x%x with length 0x%x, and value is " | 2046 | apic_debug("%s: offset 0x%x with length 0x%x, and value is " |
2015 | "0x%x\n", __func__, offset, len, val); | 2047 | "0x%x\n", __func__, offset, len, val); |
2016 | 2048 | ||
2017 | kvm_lapic_reg_write(apic, offset & 0xff0, val); | 2049 | kvm_lapic_reg_write(apic, offset, val); |
2018 | 2050 | ||
2019 | return 0; | 2051 | return 0; |
2020 | } | 2052 | } |
@@ -2311,7 +2343,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) | |||
2311 | HRTIMER_MODE_ABS_PINNED); | 2343 | HRTIMER_MODE_ABS_PINNED); |
2312 | apic->lapic_timer.timer.function = apic_timer_fn; | 2344 | apic->lapic_timer.timer.function = apic_timer_fn; |
2313 | if (timer_advance_ns == -1) { | 2345 | if (timer_advance_ns == -1) { |
2314 | apic->lapic_timer.timer_advance_ns = 1000; | 2346 | apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; |
2315 | apic->lapic_timer.timer_advance_adjust_done = false; | 2347 | apic->lapic_timer.timer_advance_adjust_done = false; |
2316 | } else { | 2348 | } else { |
2317 | apic->lapic_timer.timer_advance_ns = timer_advance_ns; | 2349 | apic->lapic_timer.timer_advance_ns = timer_advance_ns; |
@@ -2321,7 +2353,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) | |||
2321 | 2353 | ||
2322 | /* | 2354 | /* |
2323 | * APIC is created enabled. This will prevent kvm_lapic_set_base from | 2355 | * APIC is created enabled. This will prevent kvm_lapic_set_base from |
2324 | * thinking that APIC satet has changed. | 2356 | * thinking that APIC state has changed. |
2325 | */ | 2357 | */ |
2326 | vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; | 2358 | vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; |
2327 | static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ | 2359 | static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ |
@@ -2330,6 +2362,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) | |||
2330 | return 0; | 2362 | return 0; |
2331 | nomem_free_apic: | 2363 | nomem_free_apic: |
2332 | kfree(apic); | 2364 | kfree(apic); |
2365 | vcpu->arch.apic = NULL; | ||
2333 | nomem: | 2366 | nomem: |
2334 | return -ENOMEM; | 2367 | return -ENOMEM; |
2335 | } | 2368 | } |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index d6d049ba3045..36747174e4a8 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -32,6 +32,7 @@ struct kvm_timer { | |||
32 | u64 tscdeadline; | 32 | u64 tscdeadline; |
33 | u64 expired_tscdeadline; | 33 | u64 expired_tscdeadline; |
34 | u32 timer_advance_ns; | 34 | u32 timer_advance_ns; |
35 | s64 advance_expire_delta; | ||
35 | atomic_t pending; /* accumulated triggered timers */ | 36 | atomic_t pending; /* accumulated triggered timers */ |
36 | bool hv_timer_in_use; | 37 | bool hv_timer_in_use; |
37 | bool timer_advance_adjust_done; | 38 | bool timer_advance_adjust_done; |
@@ -129,6 +130,11 @@ void kvm_lapic_exit(void); | |||
129 | #define VEC_POS(v) ((v) & (32 - 1)) | 130 | #define VEC_POS(v) ((v) & (32 - 1)) |
130 | #define REG_POS(v) (((v) >> 5) << 4) | 131 | #define REG_POS(v) (((v) >> 5) << 4) |
131 | 132 | ||
133 | static inline void kvm_lapic_clear_vector(int vec, void *bitmap) | ||
134 | { | ||
135 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
136 | } | ||
137 | |||
132 | static inline void kvm_lapic_set_vector(int vec, void *bitmap) | 138 | static inline void kvm_lapic_set_vector(int vec, void *bitmap) |
133 | { | 139 | { |
134 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 140 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -219,7 +225,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) | |||
219 | 225 | ||
220 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | 226 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); |
221 | 227 | ||
222 | void wait_lapic_expire(struct kvm_vcpu *vcpu); | 228 | void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); |
223 | 229 | ||
224 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | 230 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, |
225 | struct kvm_vcpu **dest_vcpu); | 231 | struct kvm_vcpu **dest_vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4a9c63d1c20a..9a5814d8d194 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -140,9 +140,6 @@ module_param(dbg, bool, 0644); | |||
140 | 140 | ||
141 | #include <trace/events/kvm.h> | 141 | #include <trace/events/kvm.h> |
142 | 142 | ||
143 | #define CREATE_TRACE_POINTS | ||
144 | #include "mmutrace.h" | ||
145 | |||
146 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | 143 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) |
147 | #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) | 144 | #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) |
148 | 145 | ||
@@ -259,11 +256,20 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; | |||
259 | */ | 256 | */ |
260 | static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; | 257 | static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; |
261 | 258 | ||
259 | /* | ||
260 | * The number of non-reserved physical address bits irrespective of features | ||
261 | * that repurpose legal bits, e.g. MKTME. | ||
262 | */ | ||
263 | static u8 __read_mostly shadow_phys_bits; | ||
262 | 264 | ||
263 | static void mmu_spte_set(u64 *sptep, u64 spte); | 265 | static void mmu_spte_set(u64 *sptep, u64 spte); |
266 | static bool is_executable_pte(u64 spte); | ||
264 | static union kvm_mmu_page_role | 267 | static union kvm_mmu_page_role |
265 | kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); | 268 | kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); |
266 | 269 | ||
270 | #define CREATE_TRACE_POINTS | ||
271 | #include "mmutrace.h" | ||
272 | |||
267 | 273 | ||
268 | static inline bool kvm_available_flush_tlb_with_range(void) | 274 | static inline bool kvm_available_flush_tlb_with_range(void) |
269 | { | 275 | { |
@@ -468,6 +474,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
468 | } | 474 | } |
469 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 475 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
470 | 476 | ||
477 | static u8 kvm_get_shadow_phys_bits(void) | ||
478 | { | ||
479 | /* | ||
480 | * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected | ||
481 | * in CPU detection code, but MKTME treats those reduced bits as | ||
482 | * 'keyID' thus they are not reserved bits. Therefore for MKTME | ||
483 | * we should still return physical address bits reported by CPUID. | ||
484 | */ | ||
485 | if (!boot_cpu_has(X86_FEATURE_TME) || | ||
486 | WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) | ||
487 | return boot_cpu_data.x86_phys_bits; | ||
488 | |||
489 | return cpuid_eax(0x80000008) & 0xff; | ||
490 | } | ||
491 | |||
471 | static void kvm_mmu_reset_all_pte_masks(void) | 492 | static void kvm_mmu_reset_all_pte_masks(void) |
472 | { | 493 | { |
473 | u8 low_phys_bits; | 494 | u8 low_phys_bits; |
@@ -481,6 +502,8 @@ static void kvm_mmu_reset_all_pte_masks(void) | |||
481 | shadow_present_mask = 0; | 502 | shadow_present_mask = 0; |
482 | shadow_acc_track_mask = 0; | 503 | shadow_acc_track_mask = 0; |
483 | 504 | ||
505 | shadow_phys_bits = kvm_get_shadow_phys_bits(); | ||
506 | |||
484 | /* | 507 | /* |
485 | * If the CPU has 46 or less physical address bits, then set an | 508 | * If the CPU has 46 or less physical address bits, then set an |
486 | * appropriate mask to guard against L1TF attacks. Otherwise, it is | 509 | * appropriate mask to guard against L1TF attacks. Otherwise, it is |
@@ -1073,10 +1096,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) | |||
1073 | 1096 | ||
1074 | static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | 1097 | static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) |
1075 | { | 1098 | { |
1076 | if (sp->role.direct) | 1099 | if (!sp->role.direct) { |
1077 | BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); | ||
1078 | else | ||
1079 | sp->gfns[index] = gfn; | 1100 | sp->gfns[index] = gfn; |
1101 | return; | ||
1102 | } | ||
1103 | |||
1104 | if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index))) | ||
1105 | pr_err_ratelimited("gfn mismatch under direct page %llx " | ||
1106 | "(expected %llx, got %llx)\n", | ||
1107 | sp->gfn, | ||
1108 | kvm_mmu_page_get_gfn(sp, index), gfn); | ||
1080 | } | 1109 | } |
1081 | 1110 | ||
1082 | /* | 1111 | /* |
@@ -3055,10 +3084,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, | |||
3055 | ret = RET_PF_EMULATE; | 3084 | ret = RET_PF_EMULATE; |
3056 | 3085 | ||
3057 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 3086 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
3058 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", | 3087 | trace_kvm_mmu_set_spte(level, gfn, sptep); |
3059 | is_large_pte(*sptep)? "2MB" : "4kB", | ||
3060 | *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, | ||
3061 | *sptep, sptep); | ||
3062 | if (!was_rmapped && is_large_pte(*sptep)) | 3088 | if (!was_rmapped && is_large_pte(*sptep)) |
3063 | ++vcpu->kvm->stat.lpages; | 3089 | ++vcpu->kvm->stat.lpages; |
3064 | 3090 | ||
@@ -3070,8 +3096,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, | |||
3070 | } | 3096 | } |
3071 | } | 3097 | } |
3072 | 3098 | ||
3073 | kvm_release_pfn_clean(pfn); | ||
3074 | |||
3075 | return ret; | 3099 | return ret; |
3076 | } | 3100 | } |
3077 | 3101 | ||
@@ -3106,9 +3130,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
3106 | if (ret <= 0) | 3130 | if (ret <= 0) |
3107 | return -1; | 3131 | return -1; |
3108 | 3132 | ||
3109 | for (i = 0; i < ret; i++, gfn++, start++) | 3133 | for (i = 0; i < ret; i++, gfn++, start++) { |
3110 | mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, | 3134 | mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, |
3111 | page_to_pfn(pages[i]), true, true); | 3135 | page_to_pfn(pages[i]), true, true); |
3136 | put_page(pages[i]); | ||
3137 | } | ||
3112 | 3138 | ||
3113 | return 0; | 3139 | return 0; |
3114 | } | 3140 | } |
@@ -3156,40 +3182,40 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | |||
3156 | __direct_pte_prefetch(vcpu, sp, sptep); | 3182 | __direct_pte_prefetch(vcpu, sp, sptep); |
3157 | } | 3183 | } |
3158 | 3184 | ||
3159 | static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, | 3185 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, |
3160 | int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) | 3186 | int map_writable, int level, kvm_pfn_t pfn, |
3187 | bool prefault) | ||
3161 | { | 3188 | { |
3162 | struct kvm_shadow_walk_iterator iterator; | 3189 | struct kvm_shadow_walk_iterator it; |
3163 | struct kvm_mmu_page *sp; | 3190 | struct kvm_mmu_page *sp; |
3164 | int emulate = 0; | 3191 | int ret; |
3165 | gfn_t pseudo_gfn; | 3192 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3193 | gfn_t base_gfn = gfn; | ||
3166 | 3194 | ||
3167 | if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) | 3195 | if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) |
3168 | return 0; | 3196 | return RET_PF_RETRY; |
3169 | 3197 | ||
3170 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 3198 | trace_kvm_mmu_spte_requested(gpa, level, pfn); |
3171 | if (iterator.level == level) { | 3199 | for_each_shadow_entry(vcpu, gpa, it) { |
3172 | emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, | 3200 | base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); |
3173 | write, level, gfn, pfn, prefault, | 3201 | if (it.level == level) |
3174 | map_writable); | ||
3175 | direct_pte_prefetch(vcpu, iterator.sptep); | ||
3176 | ++vcpu->stat.pf_fixed; | ||
3177 | break; | 3202 | break; |
3178 | } | ||
3179 | 3203 | ||
3180 | drop_large_spte(vcpu, iterator.sptep); | 3204 | drop_large_spte(vcpu, it.sptep); |
3181 | if (!is_shadow_present_pte(*iterator.sptep)) { | 3205 | if (!is_shadow_present_pte(*it.sptep)) { |
3182 | u64 base_addr = iterator.addr; | 3206 | sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, |
3207 | it.level - 1, true, ACC_ALL); | ||
3183 | 3208 | ||
3184 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); | 3209 | link_shadow_page(vcpu, it.sptep, sp); |
3185 | pseudo_gfn = base_addr >> PAGE_SHIFT; | ||
3186 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, | ||
3187 | iterator.level - 1, 1, ACC_ALL); | ||
3188 | |||
3189 | link_shadow_page(vcpu, iterator.sptep, sp); | ||
3190 | } | 3210 | } |
3191 | } | 3211 | } |
3192 | return emulate; | 3212 | |
3213 | ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, | ||
3214 | write, level, base_gfn, pfn, prefault, | ||
3215 | map_writable); | ||
3216 | direct_pte_prefetch(vcpu, it.sptep); | ||
3217 | ++vcpu->stat.pf_fixed; | ||
3218 | return ret; | ||
3193 | } | 3219 | } |
3194 | 3220 | ||
3195 | static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) | 3221 | static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) |
@@ -3216,11 +3242,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) | |||
3216 | } | 3242 | } |
3217 | 3243 | ||
3218 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | 3244 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, |
3219 | gfn_t *gfnp, kvm_pfn_t *pfnp, | 3245 | gfn_t gfn, kvm_pfn_t *pfnp, |
3220 | int *levelp) | 3246 | int *levelp) |
3221 | { | 3247 | { |
3222 | kvm_pfn_t pfn = *pfnp; | 3248 | kvm_pfn_t pfn = *pfnp; |
3223 | gfn_t gfn = *gfnp; | ||
3224 | int level = *levelp; | 3249 | int level = *levelp; |
3225 | 3250 | ||
3226 | /* | 3251 | /* |
@@ -3247,8 +3272,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
3247 | mask = KVM_PAGES_PER_HPAGE(level) - 1; | 3272 | mask = KVM_PAGES_PER_HPAGE(level) - 1; |
3248 | VM_BUG_ON((gfn & mask) != (pfn & mask)); | 3273 | VM_BUG_ON((gfn & mask) != (pfn & mask)); |
3249 | if (pfn & mask) { | 3274 | if (pfn & mask) { |
3250 | gfn &= ~mask; | ||
3251 | *gfnp = gfn; | ||
3252 | kvm_release_pfn_clean(pfn); | 3275 | kvm_release_pfn_clean(pfn); |
3253 | pfn &= ~mask; | 3276 | pfn &= ~mask; |
3254 | kvm_get_pfn(pfn); | 3277 | kvm_get_pfn(pfn); |
@@ -3505,22 +3528,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
3505 | if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) | 3528 | if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) |
3506 | return r; | 3529 | return r; |
3507 | 3530 | ||
3531 | r = RET_PF_RETRY; | ||
3508 | spin_lock(&vcpu->kvm->mmu_lock); | 3532 | spin_lock(&vcpu->kvm->mmu_lock); |
3509 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3533 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
3510 | goto out_unlock; | 3534 | goto out_unlock; |
3511 | if (make_mmu_pages_available(vcpu) < 0) | 3535 | if (make_mmu_pages_available(vcpu) < 0) |
3512 | goto out_unlock; | 3536 | goto out_unlock; |
3513 | if (likely(!force_pt_level)) | 3537 | if (likely(!force_pt_level)) |
3514 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3538 | transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); |
3515 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); | 3539 | r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); |
3516 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
3517 | |||
3518 | return r; | ||
3519 | |||
3520 | out_unlock: | 3540 | out_unlock: |
3521 | spin_unlock(&vcpu->kvm->mmu_lock); | 3541 | spin_unlock(&vcpu->kvm->mmu_lock); |
3522 | kvm_release_pfn_clean(pfn); | 3542 | kvm_release_pfn_clean(pfn); |
3523 | return RET_PF_RETRY; | 3543 | return r; |
3524 | } | 3544 | } |
3525 | 3545 | ||
3526 | static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, | 3546 | static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, |
@@ -4015,19 +4035,6 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | |||
4015 | return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); | 4035 | return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); |
4016 | } | 4036 | } |
4017 | 4037 | ||
4018 | bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) | ||
4019 | { | ||
4020 | if (unlikely(!lapic_in_kernel(vcpu) || | ||
4021 | kvm_event_needs_reinjection(vcpu) || | ||
4022 | vcpu->arch.exception.pending)) | ||
4023 | return false; | ||
4024 | |||
4025 | if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) | ||
4026 | return false; | ||
4027 | |||
4028 | return kvm_x86_ops->interrupt_allowed(vcpu); | ||
4029 | } | ||
4030 | |||
4031 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 4038 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
4032 | gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) | 4039 | gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) |
4033 | { | 4040 | { |
@@ -4147,22 +4154,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
4147 | if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) | 4154 | if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) |
4148 | return r; | 4155 | return r; |
4149 | 4156 | ||
4157 | r = RET_PF_RETRY; | ||
4150 | spin_lock(&vcpu->kvm->mmu_lock); | 4158 | spin_lock(&vcpu->kvm->mmu_lock); |
4151 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 4159 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
4152 | goto out_unlock; | 4160 | goto out_unlock; |
4153 | if (make_mmu_pages_available(vcpu) < 0) | 4161 | if (make_mmu_pages_available(vcpu) < 0) |
4154 | goto out_unlock; | 4162 | goto out_unlock; |
4155 | if (likely(!force_pt_level)) | 4163 | if (likely(!force_pt_level)) |
4156 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 4164 | transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); |
4157 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); | 4165 | r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); |
4158 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
4159 | |||
4160 | return r; | ||
4161 | |||
4162 | out_unlock: | 4166 | out_unlock: |
4163 | spin_unlock(&vcpu->kvm->mmu_lock); | 4167 | spin_unlock(&vcpu->kvm->mmu_lock); |
4164 | kvm_release_pfn_clean(pfn); | 4168 | kvm_release_pfn_clean(pfn); |
4165 | return RET_PF_RETRY; | 4169 | return r; |
4166 | } | 4170 | } |
4167 | 4171 | ||
4168 | static void nonpaging_init_context(struct kvm_vcpu *vcpu, | 4172 | static void nonpaging_init_context(struct kvm_vcpu *vcpu, |
@@ -4494,7 +4498,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
4494 | */ | 4498 | */ |
4495 | shadow_zero_check = &context->shadow_zero_check; | 4499 | shadow_zero_check = &context->shadow_zero_check; |
4496 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, | 4500 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, |
4497 | boot_cpu_data.x86_phys_bits, | 4501 | shadow_phys_bits, |
4498 | context->shadow_root_level, uses_nx, | 4502 | context->shadow_root_level, uses_nx, |
4499 | guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), | 4503 | guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), |
4500 | is_pse(vcpu), true); | 4504 | is_pse(vcpu), true); |
@@ -4531,13 +4535,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, | |||
4531 | 4535 | ||
4532 | if (boot_cpu_is_amd()) | 4536 | if (boot_cpu_is_amd()) |
4533 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, | 4537 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, |
4534 | boot_cpu_data.x86_phys_bits, | 4538 | shadow_phys_bits, |
4535 | context->shadow_root_level, false, | 4539 | context->shadow_root_level, false, |
4536 | boot_cpu_has(X86_FEATURE_GBPAGES), | 4540 | boot_cpu_has(X86_FEATURE_GBPAGES), |
4537 | true, true); | 4541 | true, true); |
4538 | else | 4542 | else |
4539 | __reset_rsvds_bits_mask_ept(shadow_zero_check, | 4543 | __reset_rsvds_bits_mask_ept(shadow_zero_check, |
4540 | boot_cpu_data.x86_phys_bits, | 4544 | shadow_phys_bits, |
4541 | false); | 4545 | false); |
4542 | 4546 | ||
4543 | if (!shadow_me_mask) | 4547 | if (!shadow_me_mask) |
@@ -4558,7 +4562,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, | |||
4558 | struct kvm_mmu *context, bool execonly) | 4562 | struct kvm_mmu *context, bool execonly) |
4559 | { | 4563 | { |
4560 | __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, | 4564 | __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, |
4561 | boot_cpu_data.x86_phys_bits, execonly); | 4565 | shadow_phys_bits, execonly); |
4562 | } | 4566 | } |
4563 | 4567 | ||
4564 | #define BYTE_MASK(access) \ | 4568 | #define BYTE_MASK(access) \ |
@@ -5935,7 +5939,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
5935 | int nr_to_scan = sc->nr_to_scan; | 5939 | int nr_to_scan = sc->nr_to_scan; |
5936 | unsigned long freed = 0; | 5940 | unsigned long freed = 0; |
5937 | 5941 | ||
5938 | spin_lock(&kvm_lock); | 5942 | mutex_lock(&kvm_lock); |
5939 | 5943 | ||
5940 | list_for_each_entry(kvm, &vm_list, vm_list) { | 5944 | list_for_each_entry(kvm, &vm_list, vm_list) { |
5941 | int idx; | 5945 | int idx; |
@@ -5977,7 +5981,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
5977 | break; | 5981 | break; |
5978 | } | 5982 | } |
5979 | 5983 | ||
5980 | spin_unlock(&kvm_lock); | 5984 | mutex_unlock(&kvm_lock); |
5981 | return freed; | 5985 | return freed; |
5982 | } | 5986 | } |
5983 | 5987 | ||
@@ -5999,6 +6003,34 @@ static void mmu_destroy_caches(void) | |||
5999 | kmem_cache_destroy(mmu_page_header_cache); | 6003 | kmem_cache_destroy(mmu_page_header_cache); |
6000 | } | 6004 | } |
6001 | 6005 | ||
6006 | static void kvm_set_mmio_spte_mask(void) | ||
6007 | { | ||
6008 | u64 mask; | ||
6009 | |||
6010 | /* | ||
6011 | * Set the reserved bits and the present bit of an paging-structure | ||
6012 | * entry to generate page fault with PFER.RSV = 1. | ||
6013 | */ | ||
6014 | |||
6015 | /* | ||
6016 | * Mask the uppermost physical address bit, which would be reserved as | ||
6017 | * long as the supported physical address width is less than 52. | ||
6018 | */ | ||
6019 | mask = 1ull << 51; | ||
6020 | |||
6021 | /* Set the present bit. */ | ||
6022 | mask |= 1ull; | ||
6023 | |||
6024 | /* | ||
6025 | * If reserved bit is not supported, clear the present bit to disable | ||
6026 | * mmio page fault. | ||
6027 | */ | ||
6028 | if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) | ||
6029 | mask &= ~1ull; | ||
6030 | |||
6031 | kvm_mmu_set_mmio_spte_mask(mask, mask); | ||
6032 | } | ||
6033 | |||
6002 | int kvm_mmu_module_init(void) | 6034 | int kvm_mmu_module_init(void) |
6003 | { | 6035 | { |
6004 | int ret = -ENOMEM; | 6036 | int ret = -ENOMEM; |
@@ -6015,6 +6047,8 @@ int kvm_mmu_module_init(void) | |||
6015 | 6047 | ||
6016 | kvm_mmu_reset_all_pte_masks(); | 6048 | kvm_mmu_reset_all_pte_masks(); |
6017 | 6049 | ||
6050 | kvm_set_mmio_spte_mask(); | ||
6051 | |||
6018 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", | 6052 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", |
6019 | sizeof(struct pte_list_desc), | 6053 | sizeof(struct pte_list_desc), |
6020 | 0, SLAB_ACCOUNT, NULL); | 6054 | 0, SLAB_ACCOUNT, NULL); |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index dd30dccd2ad5..d8001b4bca05 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -301,6 +301,65 @@ TRACE_EVENT( | |||
301 | __entry->kvm_gen == __entry->spte_gen | 301 | __entry->kvm_gen == __entry->spte_gen |
302 | ) | 302 | ) |
303 | ); | 303 | ); |
304 | |||
305 | TRACE_EVENT( | ||
306 | kvm_mmu_set_spte, | ||
307 | TP_PROTO(int level, gfn_t gfn, u64 *sptep), | ||
308 | TP_ARGS(level, gfn, sptep), | ||
309 | |||
310 | TP_STRUCT__entry( | ||
311 | __field(u64, gfn) | ||
312 | __field(u64, spte) | ||
313 | __field(u64, sptep) | ||
314 | __field(u8, level) | ||
315 | /* These depend on page entry type, so compute them now. */ | ||
316 | __field(bool, r) | ||
317 | __field(bool, x) | ||
318 | __field(u8, u) | ||
319 | ), | ||
320 | |||
321 | TP_fast_assign( | ||
322 | __entry->gfn = gfn; | ||
323 | __entry->spte = *sptep; | ||
324 | __entry->sptep = virt_to_phys(sptep); | ||
325 | __entry->level = level; | ||
326 | __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); | ||
327 | __entry->x = is_executable_pte(__entry->spte); | ||
328 | __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; | ||
329 | ), | ||
330 | |||
331 | TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", | ||
332 | __entry->gfn, __entry->spte, | ||
333 | __entry->r ? "r" : "-", | ||
334 | __entry->spte & PT_WRITABLE_MASK ? "w" : "-", | ||
335 | __entry->x ? "x" : "-", | ||
336 | __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), | ||
337 | __entry->level, __entry->sptep | ||
338 | ) | ||
339 | ); | ||
340 | |||
341 | TRACE_EVENT( | ||
342 | kvm_mmu_spte_requested, | ||
343 | TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), | ||
344 | TP_ARGS(addr, level, pfn), | ||
345 | |||
346 | TP_STRUCT__entry( | ||
347 | __field(u64, gfn) | ||
348 | __field(u64, pfn) | ||
349 | __field(u8, level) | ||
350 | ), | ||
351 | |||
352 | TP_fast_assign( | ||
353 | __entry->gfn = addr >> PAGE_SHIFT; | ||
354 | __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); | ||
355 | __entry->level = level; | ||
356 | ), | ||
357 | |||
358 | TP_printk("gfn %llx pfn %llx level %d", | ||
359 | __entry->gfn, __entry->pfn, __entry->level | ||
360 | ) | ||
361 | ); | ||
362 | |||
304 | #endif /* _TRACE_KVMMMU_H */ | 363 | #endif /* _TRACE_KVMMMU_H */ |
305 | 364 | ||
306 | #undef TRACE_INCLUDE_PATH | 365 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d583bcd119fc..7d5cdb3af594 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -540,6 +540,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
540 | mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, | 540 | mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, |
541 | true, true); | 541 | true, true); |
542 | 542 | ||
543 | kvm_release_pfn_clean(pfn); | ||
543 | return true; | 544 | return true; |
544 | } | 545 | } |
545 | 546 | ||
@@ -619,6 +620,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
619 | struct kvm_shadow_walk_iterator it; | 620 | struct kvm_shadow_walk_iterator it; |
620 | unsigned direct_access, access = gw->pt_access; | 621 | unsigned direct_access, access = gw->pt_access; |
621 | int top_level, ret; | 622 | int top_level, ret; |
623 | gfn_t base_gfn; | ||
622 | 624 | ||
623 | direct_access = gw->pte_access; | 625 | direct_access = gw->pte_access; |
624 | 626 | ||
@@ -663,35 +665,34 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
663 | link_shadow_page(vcpu, it.sptep, sp); | 665 | link_shadow_page(vcpu, it.sptep, sp); |
664 | } | 666 | } |
665 | 667 | ||
666 | for (; | 668 | base_gfn = gw->gfn; |
667 | shadow_walk_okay(&it) && it.level > hlevel; | 669 | |
668 | shadow_walk_next(&it)) { | 670 | trace_kvm_mmu_spte_requested(addr, gw->level, pfn); |
669 | gfn_t direct_gfn; | ||
670 | 671 | ||
672 | for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { | ||
671 | clear_sp_write_flooding_count(it.sptep); | 673 | clear_sp_write_flooding_count(it.sptep); |
674 | base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | ||
675 | if (it.level == hlevel) | ||
676 | break; | ||
677 | |||
672 | validate_direct_spte(vcpu, it.sptep, direct_access); | 678 | validate_direct_spte(vcpu, it.sptep, direct_access); |
673 | 679 | ||
674 | drop_large_spte(vcpu, it.sptep); | 680 | drop_large_spte(vcpu, it.sptep); |
675 | 681 | ||
676 | if (is_shadow_present_pte(*it.sptep)) | 682 | if (!is_shadow_present_pte(*it.sptep)) { |
677 | continue; | 683 | sp = kvm_mmu_get_page(vcpu, base_gfn, addr, |
678 | 684 | it.level - 1, true, direct_access); | |
679 | direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | 685 | link_shadow_page(vcpu, it.sptep, sp); |
680 | 686 | } | |
681 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | ||
682 | true, direct_access); | ||
683 | link_shadow_page(vcpu, it.sptep, sp); | ||
684 | } | 687 | } |
685 | 688 | ||
686 | clear_sp_write_flooding_count(it.sptep); | ||
687 | ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, | 689 | ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, |
688 | it.level, gw->gfn, pfn, prefault, map_writable); | 690 | it.level, base_gfn, pfn, prefault, map_writable); |
689 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); | 691 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); |
690 | 692 | ++vcpu->stat.pf_fixed; | |
691 | return ret; | 693 | return ret; |
692 | 694 | ||
693 | out_gpte_changed: | 695 | out_gpte_changed: |
694 | kvm_release_pfn_clean(pfn); | ||
695 | return RET_PF_RETRY; | 696 | return RET_PF_RETRY; |
696 | } | 697 | } |
697 | 698 | ||
@@ -839,6 +840,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
839 | walker.pte_access &= ~ACC_EXEC_MASK; | 840 | walker.pte_access &= ~ACC_EXEC_MASK; |
840 | } | 841 | } |
841 | 842 | ||
843 | r = RET_PF_RETRY; | ||
842 | spin_lock(&vcpu->kvm->mmu_lock); | 844 | spin_lock(&vcpu->kvm->mmu_lock); |
843 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 845 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
844 | goto out_unlock; | 846 | goto out_unlock; |
@@ -847,19 +849,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
847 | if (make_mmu_pages_available(vcpu) < 0) | 849 | if (make_mmu_pages_available(vcpu) < 0) |
848 | goto out_unlock; | 850 | goto out_unlock; |
849 | if (!force_pt_level) | 851 | if (!force_pt_level) |
850 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 852 | transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); |
851 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, | 853 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
852 | level, pfn, map_writable, prefault); | 854 | level, pfn, map_writable, prefault); |
853 | ++vcpu->stat.pf_fixed; | ||
854 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 855 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
855 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
856 | |||
857 | return r; | ||
858 | 856 | ||
859 | out_unlock: | 857 | out_unlock: |
860 | spin_unlock(&vcpu->kvm->mmu_lock); | 858 | spin_unlock(&vcpu->kvm->mmu_lock); |
861 | kvm_release_pfn_clean(pfn); | 859 | kvm_release_pfn_clean(pfn); |
862 | return RET_PF_RETRY; | 860 | return r; |
863 | } | 861 | } |
864 | 862 | ||
865 | static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) | 863 | static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index ab73a9a639ae..aa5a2597305a 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -19,6 +19,9 @@ | |||
19 | #include "lapic.h" | 19 | #include "lapic.h" |
20 | #include "pmu.h" | 20 | #include "pmu.h" |
21 | 21 | ||
22 | /* This keeps the total size of the filter under 4k. */ | ||
23 | #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63 | ||
24 | |||
22 | /* NOTE: | 25 | /* NOTE: |
23 | * - Each perf counter is defined as "struct kvm_pmc"; | 26 | * - Each perf counter is defined as "struct kvm_pmc"; |
24 | * - There are two types of perf counters: general purpose (gp) and fixed. | 27 | * - There are two types of perf counters: general purpose (gp) and fixed. |
@@ -141,6 +144,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
141 | { | 144 | { |
142 | unsigned config, type = PERF_TYPE_RAW; | 145 | unsigned config, type = PERF_TYPE_RAW; |
143 | u8 event_select, unit_mask; | 146 | u8 event_select, unit_mask; |
147 | struct kvm *kvm = pmc->vcpu->kvm; | ||
148 | struct kvm_pmu_event_filter *filter; | ||
149 | int i; | ||
150 | bool allow_event = true; | ||
144 | 151 | ||
145 | if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) | 152 | if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) |
146 | printk_once("kvm pmu: pin control bit is ignored\n"); | 153 | printk_once("kvm pmu: pin control bit is ignored\n"); |
@@ -152,6 +159,22 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
152 | if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) | 159 | if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) |
153 | return; | 160 | return; |
154 | 161 | ||
162 | filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); | ||
163 | if (filter) { | ||
164 | for (i = 0; i < filter->nevents; i++) | ||
165 | if (filter->events[i] == | ||
166 | (eventsel & AMD64_RAW_EVENT_MASK_NB)) | ||
167 | break; | ||
168 | if (filter->action == KVM_PMU_EVENT_ALLOW && | ||
169 | i == filter->nevents) | ||
170 | allow_event = false; | ||
171 | if (filter->action == KVM_PMU_EVENT_DENY && | ||
172 | i < filter->nevents) | ||
173 | allow_event = false; | ||
174 | } | ||
175 | if (!allow_event) | ||
176 | return; | ||
177 | |||
155 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; | 178 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; |
156 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | 179 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; |
157 | 180 | ||
@@ -348,3 +371,43 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) | |||
348 | { | 371 | { |
349 | kvm_pmu_reset(vcpu); | 372 | kvm_pmu_reset(vcpu); |
350 | } | 373 | } |
374 | |||
375 | int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) | ||
376 | { | ||
377 | struct kvm_pmu_event_filter tmp, *filter; | ||
378 | size_t size; | ||
379 | int r; | ||
380 | |||
381 | if (copy_from_user(&tmp, argp, sizeof(tmp))) | ||
382 | return -EFAULT; | ||
383 | |||
384 | if (tmp.action != KVM_PMU_EVENT_ALLOW && | ||
385 | tmp.action != KVM_PMU_EVENT_DENY) | ||
386 | return -EINVAL; | ||
387 | |||
388 | if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) | ||
389 | return -E2BIG; | ||
390 | |||
391 | size = struct_size(filter, events, tmp.nevents); | ||
392 | filter = kmalloc(size, GFP_KERNEL_ACCOUNT); | ||
393 | if (!filter) | ||
394 | return -ENOMEM; | ||
395 | |||
396 | r = -EFAULT; | ||
397 | if (copy_from_user(filter, argp, size)) | ||
398 | goto cleanup; | ||
399 | |||
400 | /* Ensure nevents can't be changed between the user copies. */ | ||
401 | *filter = tmp; | ||
402 | |||
403 | mutex_lock(&kvm->lock); | ||
404 | rcu_swap_protected(kvm->arch.pmu_event_filter, filter, | ||
405 | mutex_is_locked(&kvm->lock)); | ||
406 | mutex_unlock(&kvm->lock); | ||
407 | |||
408 | synchronize_srcu_expedited(&kvm->srcu); | ||
409 | r = 0; | ||
410 | cleanup: | ||
411 | kfree(filter); | ||
412 | return r; | ||
413 | } | ||
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 22dff661145a..58265f761c3b 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h | |||
@@ -118,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu); | |||
118 | void kvm_pmu_reset(struct kvm_vcpu *vcpu); | 118 | void kvm_pmu_reset(struct kvm_vcpu *vcpu); |
119 | void kvm_pmu_init(struct kvm_vcpu *vcpu); | 119 | void kvm_pmu_init(struct kvm_vcpu *vcpu); |
120 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | 120 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); |
121 | int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); | ||
121 | 122 | ||
122 | bool is_vmware_backdoor_pmc(u32 pmc_idx); | 123 | bool is_vmware_backdoor_pmc(u32 pmc_idx); |
123 | 124 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 48c865a4e5dd..583b9fa656f3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -364,6 +364,10 @@ static int avic; | |||
364 | module_param(avic, int, S_IRUGO); | 364 | module_param(avic, int, S_IRUGO); |
365 | #endif | 365 | #endif |
366 | 366 | ||
367 | /* enable/disable Next RIP Save */ | ||
368 | static int nrips = true; | ||
369 | module_param(nrips, int, 0444); | ||
370 | |||
367 | /* enable/disable Virtual VMLOAD VMSAVE */ | 371 | /* enable/disable Virtual VMLOAD VMSAVE */ |
368 | static int vls = true; | 372 | static int vls = true; |
369 | module_param(vls, int, 0444); | 373 | module_param(vls, int, 0444); |
@@ -770,7 +774,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
770 | { | 774 | { |
771 | struct vcpu_svm *svm = to_svm(vcpu); | 775 | struct vcpu_svm *svm = to_svm(vcpu); |
772 | 776 | ||
773 | if (svm->vmcb->control.next_rip != 0) { | 777 | if (nrips && svm->vmcb->control.next_rip != 0) { |
774 | WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); | 778 | WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); |
775 | svm->next_rip = svm->vmcb->control.next_rip; | 779 | svm->next_rip = svm->vmcb->control.next_rip; |
776 | } | 780 | } |
@@ -807,7 +811,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) | |||
807 | 811 | ||
808 | kvm_deliver_exception_payload(&svm->vcpu); | 812 | kvm_deliver_exception_payload(&svm->vcpu); |
809 | 813 | ||
810 | if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { | 814 | if (nr == BP_VECTOR && !nrips) { |
811 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | 815 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); |
812 | 816 | ||
813 | /* | 817 | /* |
@@ -1364,6 +1368,11 @@ static __init int svm_hardware_setup(void) | |||
1364 | } else | 1368 | } else |
1365 | kvm_disable_tdp(); | 1369 | kvm_disable_tdp(); |
1366 | 1370 | ||
1371 | if (nrips) { | ||
1372 | if (!boot_cpu_has(X86_FEATURE_NRIPS)) | ||
1373 | nrips = false; | ||
1374 | } | ||
1375 | |||
1367 | if (avic) { | 1376 | if (avic) { |
1368 | if (!npt_enabled || | 1377 | if (!npt_enabled || |
1369 | !boot_cpu_has(X86_FEATURE_AVIC) || | 1378 | !boot_cpu_has(X86_FEATURE_AVIC) || |
@@ -3290,7 +3299,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
3290 | vmcb->control.exit_int_info_err, | 3299 | vmcb->control.exit_int_info_err, |
3291 | KVM_ISA_SVM); | 3300 | KVM_ISA_SVM); |
3292 | 3301 | ||
3293 | rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map); | 3302 | rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); |
3294 | if (rc) { | 3303 | if (rc) { |
3295 | if (rc == -EINVAL) | 3304 | if (rc == -EINVAL) |
3296 | kvm_inject_gp(&svm->vcpu, 0); | 3305 | kvm_inject_gp(&svm->vcpu, 0); |
@@ -3580,7 +3589,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
3580 | 3589 | ||
3581 | vmcb_gpa = svm->vmcb->save.rax; | 3590 | vmcb_gpa = svm->vmcb->save.rax; |
3582 | 3591 | ||
3583 | rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map); | 3592 | rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); |
3584 | if (rc) { | 3593 | if (rc) { |
3585 | if (rc == -EINVAL) | 3594 | if (rc == -EINVAL) |
3586 | kvm_inject_gp(&svm->vcpu, 0); | 3595 | kvm_inject_gp(&svm->vcpu, 0); |
@@ -3935,7 +3944,7 @@ static int rdpmc_interception(struct vcpu_svm *svm) | |||
3935 | { | 3944 | { |
3936 | int err; | 3945 | int err; |
3937 | 3946 | ||
3938 | if (!static_cpu_has(X86_FEATURE_NRIPS)) | 3947 | if (!nrips) |
3939 | return emulate_on_interception(svm); | 3948 | return emulate_on_interception(svm); |
3940 | 3949 | ||
3941 | err = kvm_rdpmc(&svm->vcpu); | 3950 | err = kvm_rdpmc(&svm->vcpu); |
@@ -5160,10 +5169,13 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) | |||
5160 | kvm_lapic_set_irr(vec, vcpu->arch.apic); | 5169 | kvm_lapic_set_irr(vec, vcpu->arch.apic); |
5161 | smp_mb__after_atomic(); | 5170 | smp_mb__after_atomic(); |
5162 | 5171 | ||
5163 | if (avic_vcpu_is_running(vcpu)) | 5172 | if (avic_vcpu_is_running(vcpu)) { |
5164 | wrmsrl(SVM_AVIC_DOORBELL, | 5173 | int cpuid = vcpu->cpu; |
5165 | kvm_cpu_get_apicid(vcpu->cpu)); | 5174 | |
5166 | else | 5175 | if (cpuid != get_cpu()) |
5176 | wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid)); | ||
5177 | put_cpu(); | ||
5178 | } else | ||
5167 | kvm_vcpu_wake_up(vcpu); | 5179 | kvm_vcpu_wake_up(vcpu); |
5168 | } | 5180 | } |
5169 | 5181 | ||
@@ -5640,6 +5652,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
5640 | clgi(); | 5652 | clgi(); |
5641 | kvm_load_guest_xcr0(vcpu); | 5653 | kvm_load_guest_xcr0(vcpu); |
5642 | 5654 | ||
5655 | if (lapic_in_kernel(vcpu) && | ||
5656 | vcpu->arch.apic->lapic_timer.timer_advance_ns) | ||
5657 | kvm_wait_lapic_expire(vcpu); | ||
5658 | |||
5643 | /* | 5659 | /* |
5644 | * If this vCPU has touched SPEC_CTRL, restore the guest's value if | 5660 | * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
5645 | * it's non-zero. Since vmentry is serialising on affected CPUs, there | 5661 | * it's non-zero. Since vmentry is serialising on affected CPUs, there |
@@ -5861,9 +5877,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
5861 | hypercall[2] = 0xd9; | 5877 | hypercall[2] = 0xd9; |
5862 | } | 5878 | } |
5863 | 5879 | ||
5864 | static void svm_check_processor_compat(void *rtn) | 5880 | static int __init svm_check_processor_compat(void) |
5865 | { | 5881 | { |
5866 | *(int *)rtn = 0; | 5882 | return 0; |
5867 | } | 5883 | } |
5868 | 5884 | ||
5869 | static bool svm_cpu_has_accelerated_tpr(void) | 5885 | static bool svm_cpu_has_accelerated_tpr(void) |
@@ -5875,6 +5891,7 @@ static bool svm_has_emulated_msr(int index) | |||
5875 | { | 5891 | { |
5876 | switch (index) { | 5892 | switch (index) { |
5877 | case MSR_IA32_MCG_EXT_CTL: | 5893 | case MSR_IA32_MCG_EXT_CTL: |
5894 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
5878 | return false; | 5895 | return false; |
5879 | default: | 5896 | default: |
5880 | break; | 5897 | break; |
@@ -6162,15 +6179,9 @@ out: | |||
6162 | return ret; | 6179 | return ret; |
6163 | } | 6180 | } |
6164 | 6181 | ||
6165 | static void svm_handle_external_intr(struct kvm_vcpu *vcpu) | 6182 | static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) |
6166 | { | 6183 | { |
6167 | local_irq_enable(); | 6184 | |
6168 | /* | ||
6169 | * We must have an instruction with interrupts enabled, so | ||
6170 | * the timer interrupt isn't delayed by the interrupt shadow. | ||
6171 | */ | ||
6172 | asm("nop"); | ||
6173 | local_irq_disable(); | ||
6174 | } | 6185 | } |
6175 | 6186 | ||
6176 | static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) | 6187 | static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) |
@@ -7256,7 +7267,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
7256 | .set_tdp_cr3 = set_tdp_cr3, | 7267 | .set_tdp_cr3 = set_tdp_cr3, |
7257 | 7268 | ||
7258 | .check_intercept = svm_check_intercept, | 7269 | .check_intercept = svm_check_intercept, |
7259 | .handle_external_intr = svm_handle_external_intr, | 7270 | .handle_exit_irqoff = svm_handle_exit_irqoff, |
7260 | 7271 | ||
7261 | .request_immediate_exit = __kvm_request_immediate_exit, | 7272 | .request_immediate_exit = __kvm_request_immediate_exit, |
7262 | 7273 | ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4d47a2631d1f..b5c831e79094 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -1365,7 +1365,7 @@ TRACE_EVENT(kvm_hv_timer_state, | |||
1365 | __entry->vcpu_id = vcpu_id; | 1365 | __entry->vcpu_id = vcpu_id; |
1366 | __entry->hv_timer_in_use = hv_timer_in_use; | 1366 | __entry->hv_timer_in_use = hv_timer_in_use; |
1367 | ), | 1367 | ), |
1368 | TP_printk("vcpu_id %x hv_timer %x\n", | 1368 | TP_printk("vcpu_id %x hv_timer %x", |
1369 | __entry->vcpu_id, | 1369 | __entry->vcpu_id, |
1370 | __entry->hv_timer_in_use) | 1370 | __entry->hv_timer_in_use) |
1371 | ); | 1371 | ); |
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 5466c6d85cf3..72359709cdc1 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/errno.h> | 3 | #include <linux/errno.h> |
4 | #include <linux/smp.h> | 4 | #include <linux/smp.h> |
5 | 5 | ||
6 | #include "../hyperv.h" | ||
6 | #include "evmcs.h" | 7 | #include "evmcs.h" |
7 | #include "vmcs.h" | 8 | #include "vmcs.h" |
8 | #include "vmx.h" | 9 | #include "vmx.h" |
@@ -313,6 +314,23 @@ void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) | |||
313 | } | 314 | } |
314 | #endif | 315 | #endif |
315 | 316 | ||
317 | bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) | ||
318 | { | ||
319 | struct hv_vp_assist_page assist_page; | ||
320 | |||
321 | *evmcs_gpa = -1ull; | ||
322 | |||
323 | if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page))) | ||
324 | return false; | ||
325 | |||
326 | if (unlikely(!assist_page.enlighten_vmentry)) | ||
327 | return false; | ||
328 | |||
329 | *evmcs_gpa = assist_page.current_nested_vmcs; | ||
330 | |||
331 | return true; | ||
332 | } | ||
333 | |||
316 | uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) | 334 | uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) |
317 | { | 335 | { |
318 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 336 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index e0fcef85b332..39a24eec8884 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h | |||
@@ -195,6 +195,7 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} | |||
195 | static inline void evmcs_touch_msr_bitmap(void) {} | 195 | static inline void evmcs_touch_msr_bitmap(void) {} |
196 | #endif /* IS_ENABLED(CONFIG_HYPERV) */ | 196 | #endif /* IS_ENABLED(CONFIG_HYPERV) */ |
197 | 197 | ||
198 | bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa); | ||
198 | uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); | 199 | uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); |
199 | int nested_enable_evmcs(struct kvm_vcpu *vcpu, | 200 | int nested_enable_evmcs(struct kvm_vcpu *vcpu, |
200 | uint16_t *vmcs_version); | 201 | uint16_t *vmcs_version); |
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 46af3a5e9209..bb509c254939 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c | |||
@@ -41,15 +41,19 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; | |||
41 | #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) | 41 | #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) |
42 | #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) | 42 | #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) |
43 | 43 | ||
44 | static u16 shadow_read_only_fields[] = { | 44 | struct shadow_vmcs_field { |
45 | #define SHADOW_FIELD_RO(x) x, | 45 | u16 encoding; |
46 | u16 offset; | ||
47 | }; | ||
48 | static struct shadow_vmcs_field shadow_read_only_fields[] = { | ||
49 | #define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) }, | ||
46 | #include "vmcs_shadow_fields.h" | 50 | #include "vmcs_shadow_fields.h" |
47 | }; | 51 | }; |
48 | static int max_shadow_read_only_fields = | 52 | static int max_shadow_read_only_fields = |
49 | ARRAY_SIZE(shadow_read_only_fields); | 53 | ARRAY_SIZE(shadow_read_only_fields); |
50 | 54 | ||
51 | static u16 shadow_read_write_fields[] = { | 55 | static struct shadow_vmcs_field shadow_read_write_fields[] = { |
52 | #define SHADOW_FIELD_RW(x) x, | 56 | #define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) }, |
53 | #include "vmcs_shadow_fields.h" | 57 | #include "vmcs_shadow_fields.h" |
54 | }; | 58 | }; |
55 | static int max_shadow_read_write_fields = | 59 | static int max_shadow_read_write_fields = |
@@ -63,34 +67,40 @@ static void init_vmcs_shadow_fields(void) | |||
63 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | 67 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); |
64 | 68 | ||
65 | for (i = j = 0; i < max_shadow_read_only_fields; i++) { | 69 | for (i = j = 0; i < max_shadow_read_only_fields; i++) { |
66 | u16 field = shadow_read_only_fields[i]; | 70 | struct shadow_vmcs_field entry = shadow_read_only_fields[i]; |
71 | u16 field = entry.encoding; | ||
67 | 72 | ||
68 | if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && | 73 | if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && |
69 | (i + 1 == max_shadow_read_only_fields || | 74 | (i + 1 == max_shadow_read_only_fields || |
70 | shadow_read_only_fields[i + 1] != field + 1)) | 75 | shadow_read_only_fields[i + 1].encoding != field + 1)) |
71 | pr_err("Missing field from shadow_read_only_field %x\n", | 76 | pr_err("Missing field from shadow_read_only_field %x\n", |
72 | field + 1); | 77 | field + 1); |
73 | 78 | ||
74 | clear_bit(field, vmx_vmread_bitmap); | 79 | clear_bit(field, vmx_vmread_bitmap); |
75 | #ifdef CONFIG_X86_64 | ||
76 | if (field & 1) | 80 | if (field & 1) |
81 | #ifdef CONFIG_X86_64 | ||
77 | continue; | 82 | continue; |
83 | #else | ||
84 | entry.offset += sizeof(u32); | ||
78 | #endif | 85 | #endif |
79 | if (j < i) | 86 | shadow_read_only_fields[j++] = entry; |
80 | shadow_read_only_fields[j] = field; | ||
81 | j++; | ||
82 | } | 87 | } |
83 | max_shadow_read_only_fields = j; | 88 | max_shadow_read_only_fields = j; |
84 | 89 | ||
85 | for (i = j = 0; i < max_shadow_read_write_fields; i++) { | 90 | for (i = j = 0; i < max_shadow_read_write_fields; i++) { |
86 | u16 field = shadow_read_write_fields[i]; | 91 | struct shadow_vmcs_field entry = shadow_read_write_fields[i]; |
92 | u16 field = entry.encoding; | ||
87 | 93 | ||
88 | if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && | 94 | if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && |
89 | (i + 1 == max_shadow_read_write_fields || | 95 | (i + 1 == max_shadow_read_write_fields || |
90 | shadow_read_write_fields[i + 1] != field + 1)) | 96 | shadow_read_write_fields[i + 1].encoding != field + 1)) |
91 | pr_err("Missing field from shadow_read_write_field %x\n", | 97 | pr_err("Missing field from shadow_read_write_field %x\n", |
92 | field + 1); | 98 | field + 1); |
93 | 99 | ||
100 | WARN_ONCE(field >= GUEST_ES_AR_BYTES && | ||
101 | field <= GUEST_TR_AR_BYTES, | ||
102 | "Update vmcs12_write_any() to drop reserved bits from AR_BYTES"); | ||
103 | |||
94 | /* | 104 | /* |
95 | * PML and the preemption timer can be emulated, but the | 105 | * PML and the preemption timer can be emulated, but the |
96 | * processor cannot vmwrite to fields that don't exist | 106 | * processor cannot vmwrite to fields that don't exist |
@@ -115,13 +125,13 @@ static void init_vmcs_shadow_fields(void) | |||
115 | 125 | ||
116 | clear_bit(field, vmx_vmwrite_bitmap); | 126 | clear_bit(field, vmx_vmwrite_bitmap); |
117 | clear_bit(field, vmx_vmread_bitmap); | 127 | clear_bit(field, vmx_vmread_bitmap); |
118 | #ifdef CONFIG_X86_64 | ||
119 | if (field & 1) | 128 | if (field & 1) |
129 | #ifdef CONFIG_X86_64 | ||
120 | continue; | 130 | continue; |
131 | #else | ||
132 | entry.offset += sizeof(u32); | ||
121 | #endif | 133 | #endif |
122 | if (j < i) | 134 | shadow_read_write_fields[j++] = entry; |
123 | shadow_read_write_fields[j] = field; | ||
124 | j++; | ||
125 | } | 135 | } |
126 | max_shadow_read_write_fields = j; | 136 | max_shadow_read_write_fields = j; |
127 | } | 137 | } |
@@ -182,7 +192,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) | |||
182 | 192 | ||
183 | static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) | 193 | static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) |
184 | { | 194 | { |
185 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); | 195 | secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); |
186 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | 196 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
187 | } | 197 | } |
188 | 198 | ||
@@ -238,22 +248,41 @@ static void free_nested(struct kvm_vcpu *vcpu) | |||
238 | free_loaded_vmcs(&vmx->nested.vmcs02); | 248 | free_loaded_vmcs(&vmx->nested.vmcs02); |
239 | } | 249 | } |
240 | 250 | ||
251 | static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, | ||
252 | struct loaded_vmcs *prev) | ||
253 | { | ||
254 | struct vmcs_host_state *dest, *src; | ||
255 | |||
256 | if (unlikely(!vmx->guest_state_loaded)) | ||
257 | return; | ||
258 | |||
259 | src = &prev->host_state; | ||
260 | dest = &vmx->loaded_vmcs->host_state; | ||
261 | |||
262 | vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); | ||
263 | dest->ldt_sel = src->ldt_sel; | ||
264 | #ifdef CONFIG_X86_64 | ||
265 | dest->ds_sel = src->ds_sel; | ||
266 | dest->es_sel = src->es_sel; | ||
267 | #endif | ||
268 | } | ||
269 | |||
241 | static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) | 270 | static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) |
242 | { | 271 | { |
243 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 272 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
273 | struct loaded_vmcs *prev; | ||
244 | int cpu; | 274 | int cpu; |
245 | 275 | ||
246 | if (vmx->loaded_vmcs == vmcs) | 276 | if (vmx->loaded_vmcs == vmcs) |
247 | return; | 277 | return; |
248 | 278 | ||
249 | cpu = get_cpu(); | 279 | cpu = get_cpu(); |
250 | vmx_vcpu_put(vcpu); | 280 | prev = vmx->loaded_vmcs; |
251 | vmx->loaded_vmcs = vmcs; | 281 | vmx->loaded_vmcs = vmcs; |
252 | vmx_vcpu_load(vcpu, cpu); | 282 | vmx_vcpu_load_vmcs(vcpu, cpu); |
283 | vmx_sync_vmcs_host_state(vmx, prev); | ||
253 | put_cpu(); | 284 | put_cpu(); |
254 | 285 | ||
255 | vm_entry_controls_reset_shadow(vmx); | ||
256 | vm_exit_controls_reset_shadow(vmx); | ||
257 | vmx_segment_cache_clear(vmx); | 286 | vmx_segment_cache_clear(vmx); |
258 | } | 287 | } |
259 | 288 | ||
@@ -930,8 +959,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne | |||
930 | * If PAE paging and EPT are both on, CR3 is not used by the CPU and | 959 | * If PAE paging and EPT are both on, CR3 is not used by the CPU and |
931 | * must not be dereferenced. | 960 | * must not be dereferenced. |
932 | */ | 961 | */ |
933 | if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) && | 962 | if (is_pae_paging(vcpu) && !nested_ept) { |
934 | !nested_ept) { | ||
935 | if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { | 963 | if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { |
936 | *entry_failure_code = ENTRY_FAIL_PDPTE; | 964 | *entry_failure_code = ENTRY_FAIL_PDPTE; |
937 | return -EINVAL; | 965 | return -EINVAL; |
@@ -1105,14 +1133,6 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) | |||
1105 | vmx->nested.msrs.misc_low = data; | 1133 | vmx->nested.msrs.misc_low = data; |
1106 | vmx->nested.msrs.misc_high = data >> 32; | 1134 | vmx->nested.msrs.misc_high = data >> 32; |
1107 | 1135 | ||
1108 | /* | ||
1109 | * If L1 has read-only VM-exit information fields, use the | ||
1110 | * less permissive vmx_vmwrite_bitmap to specify write | ||
1111 | * permissions for the shadow VMCS. | ||
1112 | */ | ||
1113 | if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) | ||
1114 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); | ||
1115 | |||
1116 | return 0; | 1136 | return 0; |
1117 | } | 1137 | } |
1118 | 1138 | ||
@@ -1214,6 +1234,11 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1214 | case MSR_IA32_VMX_VMCS_ENUM: | 1234 | case MSR_IA32_VMX_VMCS_ENUM: |
1215 | vmx->nested.msrs.vmcs_enum = data; | 1235 | vmx->nested.msrs.vmcs_enum = data; |
1216 | return 0; | 1236 | return 0; |
1237 | case MSR_IA32_VMX_VMFUNC: | ||
1238 | if (data & ~vmx->nested.msrs.vmfunc_controls) | ||
1239 | return -EINVAL; | ||
1240 | vmx->nested.msrs.vmfunc_controls = data; | ||
1241 | return 0; | ||
1217 | default: | 1242 | default: |
1218 | /* | 1243 | /* |
1219 | * The rest of the VMX capability MSRs do not support restore. | 1244 | * The rest of the VMX capability MSRs do not support restore. |
@@ -1301,41 +1326,29 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) | |||
1301 | } | 1326 | } |
1302 | 1327 | ||
1303 | /* | 1328 | /* |
1304 | * Copy the writable VMCS shadow fields back to the VMCS12, in case | 1329 | * Copy the writable VMCS shadow fields back to the VMCS12, in case they have |
1305 | * they have been modified by the L1 guest. Note that the "read-only" | 1330 | * been modified by the L1 guest. Note, "writable" in this context means |
1306 | * VM-exit information fields are actually writable if the vCPU is | 1331 | * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of |
1307 | * configured to support "VMWRITE to any supported field in the VMCS." | 1332 | * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only" |
1333 | * VM-exit information fields (which are actually writable if the vCPU is | ||
1334 | * configured to support "VMWRITE to any supported field in the VMCS"). | ||
1308 | */ | 1335 | */ |
1309 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | 1336 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) |
1310 | { | 1337 | { |
1311 | const u16 *fields[] = { | ||
1312 | shadow_read_write_fields, | ||
1313 | shadow_read_only_fields | ||
1314 | }; | ||
1315 | const int max_fields[] = { | ||
1316 | max_shadow_read_write_fields, | ||
1317 | max_shadow_read_only_fields | ||
1318 | }; | ||
1319 | int i, q; | ||
1320 | unsigned long field; | ||
1321 | u64 field_value; | ||
1322 | struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; | 1338 | struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; |
1339 | struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); | ||
1340 | struct shadow_vmcs_field field; | ||
1341 | unsigned long val; | ||
1342 | int i; | ||
1323 | 1343 | ||
1324 | preempt_disable(); | 1344 | preempt_disable(); |
1325 | 1345 | ||
1326 | vmcs_load(shadow_vmcs); | 1346 | vmcs_load(shadow_vmcs); |
1327 | 1347 | ||
1328 | for (q = 0; q < ARRAY_SIZE(fields); q++) { | 1348 | for (i = 0; i < max_shadow_read_write_fields; i++) { |
1329 | for (i = 0; i < max_fields[q]; i++) { | 1349 | field = shadow_read_write_fields[i]; |
1330 | field = fields[q][i]; | 1350 | val = __vmcs_readl(field.encoding); |
1331 | field_value = __vmcs_readl(field); | 1351 | vmcs12_write_any(vmcs12, field.encoding, field.offset, val); |
1332 | vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value); | ||
1333 | } | ||
1334 | /* | ||
1335 | * Skip the VM-exit information fields if they are read-only. | ||
1336 | */ | ||
1337 | if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) | ||
1338 | break; | ||
1339 | } | 1352 | } |
1340 | 1353 | ||
1341 | vmcs_clear(shadow_vmcs); | 1354 | vmcs_clear(shadow_vmcs); |
@@ -1346,7 +1359,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
1346 | 1359 | ||
1347 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | 1360 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) |
1348 | { | 1361 | { |
1349 | const u16 *fields[] = { | 1362 | const struct shadow_vmcs_field *fields[] = { |
1350 | shadow_read_write_fields, | 1363 | shadow_read_write_fields, |
1351 | shadow_read_only_fields | 1364 | shadow_read_only_fields |
1352 | }; | 1365 | }; |
@@ -1354,18 +1367,20 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | |||
1354 | max_shadow_read_write_fields, | 1367 | max_shadow_read_write_fields, |
1355 | max_shadow_read_only_fields | 1368 | max_shadow_read_only_fields |
1356 | }; | 1369 | }; |
1357 | int i, q; | ||
1358 | unsigned long field; | ||
1359 | u64 field_value = 0; | ||
1360 | struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; | 1370 | struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; |
1371 | struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); | ||
1372 | struct shadow_vmcs_field field; | ||
1373 | unsigned long val; | ||
1374 | int i, q; | ||
1361 | 1375 | ||
1362 | vmcs_load(shadow_vmcs); | 1376 | vmcs_load(shadow_vmcs); |
1363 | 1377 | ||
1364 | for (q = 0; q < ARRAY_SIZE(fields); q++) { | 1378 | for (q = 0; q < ARRAY_SIZE(fields); q++) { |
1365 | for (i = 0; i < max_fields[q]; i++) { | 1379 | for (i = 0; i < max_fields[q]; i++) { |
1366 | field = fields[q][i]; | 1380 | field = fields[q][i]; |
1367 | vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value); | 1381 | val = vmcs12_read_any(vmcs12, field.encoding, |
1368 | __vmcs_writel(field, field_value); | 1382 | field.offset); |
1383 | __vmcs_writel(field.encoding, val); | ||
1369 | } | 1384 | } |
1370 | } | 1385 | } |
1371 | 1386 | ||
@@ -1623,7 +1638,7 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) | |||
1623 | * evmcs->host_gdtr_base = vmcs12->host_gdtr_base; | 1638 | * evmcs->host_gdtr_base = vmcs12->host_gdtr_base; |
1624 | * evmcs->host_idtr_base = vmcs12->host_idtr_base; | 1639 | * evmcs->host_idtr_base = vmcs12->host_idtr_base; |
1625 | * evmcs->host_rsp = vmcs12->host_rsp; | 1640 | * evmcs->host_rsp = vmcs12->host_rsp; |
1626 | * sync_vmcs12() doesn't read these: | 1641 | * sync_vmcs02_to_vmcs12() doesn't read these: |
1627 | * evmcs->io_bitmap_a = vmcs12->io_bitmap_a; | 1642 | * evmcs->io_bitmap_a = vmcs12->io_bitmap_a; |
1628 | * evmcs->io_bitmap_b = vmcs12->io_bitmap_b; | 1643 | * evmcs->io_bitmap_b = vmcs12->io_bitmap_b; |
1629 | * evmcs->msr_bitmap = vmcs12->msr_bitmap; | 1644 | * evmcs->msr_bitmap = vmcs12->msr_bitmap; |
@@ -1768,26 +1783,22 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, | |||
1768 | bool from_launch) | 1783 | bool from_launch) |
1769 | { | 1784 | { |
1770 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1785 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1771 | struct hv_vp_assist_page assist_page; | 1786 | bool evmcs_gpa_changed = false; |
1787 | u64 evmcs_gpa; | ||
1772 | 1788 | ||
1773 | if (likely(!vmx->nested.enlightened_vmcs_enabled)) | 1789 | if (likely(!vmx->nested.enlightened_vmcs_enabled)) |
1774 | return 1; | 1790 | return 1; |
1775 | 1791 | ||
1776 | if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page))) | 1792 | if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) |
1777 | return 1; | ||
1778 | |||
1779 | if (unlikely(!assist_page.enlighten_vmentry)) | ||
1780 | return 1; | 1793 | return 1; |
1781 | 1794 | ||
1782 | if (unlikely(assist_page.current_nested_vmcs != | 1795 | if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { |
1783 | vmx->nested.hv_evmcs_vmptr)) { | ||
1784 | |||
1785 | if (!vmx->nested.hv_evmcs) | 1796 | if (!vmx->nested.hv_evmcs) |
1786 | vmx->nested.current_vmptr = -1ull; | 1797 | vmx->nested.current_vmptr = -1ull; |
1787 | 1798 | ||
1788 | nested_release_evmcs(vcpu); | 1799 | nested_release_evmcs(vcpu); |
1789 | 1800 | ||
1790 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs), | 1801 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa), |
1791 | &vmx->nested.hv_evmcs_map)) | 1802 | &vmx->nested.hv_evmcs_map)) |
1792 | return 0; | 1803 | return 0; |
1793 | 1804 | ||
@@ -1822,15 +1833,9 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, | |||
1822 | } | 1833 | } |
1823 | 1834 | ||
1824 | vmx->nested.dirty_vmcs12 = true; | 1835 | vmx->nested.dirty_vmcs12 = true; |
1825 | /* | 1836 | vmx->nested.hv_evmcs_vmptr = evmcs_gpa; |
1826 | * As we keep L2 state for one guest only 'hv_clean_fields' mask | ||
1827 | * can't be used when we switch between them. Reset it here for | ||
1828 | * simplicity. | ||
1829 | */ | ||
1830 | vmx->nested.hv_evmcs->hv_clean_fields &= | ||
1831 | ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; | ||
1832 | vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs; | ||
1833 | 1837 | ||
1838 | evmcs_gpa_changed = true; | ||
1834 | /* | 1839 | /* |
1835 | * Unlike normal vmcs12, enlightened vmcs12 is not fully | 1840 | * Unlike normal vmcs12, enlightened vmcs12 is not fully |
1836 | * reloaded from guest's memory (read only fields, fields not | 1841 | * reloaded from guest's memory (read only fields, fields not |
@@ -1844,10 +1849,19 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, | |||
1844 | } | 1849 | } |
1845 | 1850 | ||
1846 | } | 1851 | } |
1852 | |||
1853 | /* | ||
1854 | * Clean fields data can't de used on VMLAUNCH and when we switch | ||
1855 | * between different L2 guests as KVM keeps a single VMCS12 per L1. | ||
1856 | */ | ||
1857 | if (from_launch || evmcs_gpa_changed) | ||
1858 | vmx->nested.hv_evmcs->hv_clean_fields &= | ||
1859 | ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; | ||
1860 | |||
1847 | return 1; | 1861 | return 1; |
1848 | } | 1862 | } |
1849 | 1863 | ||
1850 | void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu) | 1864 | void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu) |
1851 | { | 1865 | { |
1852 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1866 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1853 | 1867 | ||
@@ -1868,7 +1882,7 @@ void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu) | |||
1868 | copy_vmcs12_to_shadow(vmx); | 1882 | copy_vmcs12_to_shadow(vmx); |
1869 | } | 1883 | } |
1870 | 1884 | ||
1871 | vmx->nested.need_vmcs12_sync = false; | 1885 | vmx->nested.need_vmcs12_to_shadow_sync = false; |
1872 | } | 1886 | } |
1873 | 1887 | ||
1874 | static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) | 1888 | static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) |
@@ -1948,8 +1962,20 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) | |||
1948 | if (cpu_has_vmx_msr_bitmap()) | 1962 | if (cpu_has_vmx_msr_bitmap()) |
1949 | vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); | 1963 | vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); |
1950 | 1964 | ||
1951 | if (enable_pml) | 1965 | /* |
1966 | * The PML address never changes, so it is constant in vmcs02. | ||
1967 | * Conceptually we want to copy the PML index from vmcs01 here, | ||
1968 | * and then back to vmcs01 on nested vmexit. But since we flush | ||
1969 | * the log and reset GUEST_PML_INDEX on each vmexit, the PML | ||
1970 | * index is also effectively constant in vmcs02. | ||
1971 | */ | ||
1972 | if (enable_pml) { | ||
1952 | vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); | 1973 | vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); |
1974 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); | ||
1975 | } | ||
1976 | |||
1977 | if (cpu_has_vmx_encls_vmexit()) | ||
1978 | vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); | ||
1953 | 1979 | ||
1954 | /* | 1980 | /* |
1955 | * Set the MSR load/store lists to match L0's settings. Only the | 1981 | * Set the MSR load/store lists to match L0's settings. Only the |
@@ -1963,7 +1989,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) | |||
1963 | vmx_set_constant_host_state(vmx); | 1989 | vmx_set_constant_host_state(vmx); |
1964 | } | 1990 | } |
1965 | 1991 | ||
1966 | static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx, | 1992 | static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, |
1967 | struct vmcs12 *vmcs12) | 1993 | struct vmcs12 *vmcs12) |
1968 | { | 1994 | { |
1969 | prepare_vmcs02_constant_state(vmx); | 1995 | prepare_vmcs02_constant_state(vmx); |
@@ -1984,17 +2010,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
1984 | u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); | 2010 | u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); |
1985 | 2011 | ||
1986 | if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) | 2012 | if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) |
1987 | prepare_vmcs02_early_full(vmx, vmcs12); | 2013 | prepare_vmcs02_early_rare(vmx, vmcs12); |
1988 | 2014 | ||
1989 | /* | 2015 | /* |
1990 | * PIN CONTROLS | 2016 | * PIN CONTROLS |
1991 | */ | 2017 | */ |
1992 | exec_control = vmcs12->pin_based_vm_exec_control; | 2018 | exec_control = vmx_pin_based_exec_ctrl(vmx); |
1993 | 2019 | exec_control |= (vmcs12->pin_based_vm_exec_control & | |
1994 | /* Preemption timer setting is computed directly in vmx_vcpu_run. */ | 2020 | ~PIN_BASED_VMX_PREEMPTION_TIMER); |
1995 | exec_control |= vmcs_config.pin_based_exec_ctrl; | ||
1996 | exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; | ||
1997 | vmx->loaded_vmcs->hv_timer_armed = false; | ||
1998 | 2021 | ||
1999 | /* Posted interrupts setting is only taken from vmcs12. */ | 2022 | /* Posted interrupts setting is only taken from vmcs12. */ |
2000 | if (nested_cpu_has_posted_intr(vmcs12)) { | 2023 | if (nested_cpu_has_posted_intr(vmcs12)) { |
@@ -2003,7 +2026,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2003 | } else { | 2026 | } else { |
2004 | exec_control &= ~PIN_BASED_POSTED_INTR; | 2027 | exec_control &= ~PIN_BASED_POSTED_INTR; |
2005 | } | 2028 | } |
2006 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); | 2029 | pin_controls_set(vmx, exec_control); |
2007 | 2030 | ||
2008 | /* | 2031 | /* |
2009 | * EXEC CONTROLS | 2032 | * EXEC CONTROLS |
@@ -2014,28 +2037,31 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2014 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 2037 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
2015 | exec_control |= vmcs12->cpu_based_vm_exec_control; | 2038 | exec_control |= vmcs12->cpu_based_vm_exec_control; |
2016 | 2039 | ||
2017 | /* | 2040 | if (exec_control & CPU_BASED_TPR_SHADOW) |
2018 | * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if | ||
2019 | * nested_get_vmcs12_pages can't fix it up, the illegal value | ||
2020 | * will result in a VM entry failure. | ||
2021 | */ | ||
2022 | if (exec_control & CPU_BASED_TPR_SHADOW) { | ||
2023 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); | ||
2024 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); | 2041 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); |
2025 | } else { | ||
2026 | #ifdef CONFIG_X86_64 | 2042 | #ifdef CONFIG_X86_64 |
2043 | else | ||
2027 | exec_control |= CPU_BASED_CR8_LOAD_EXITING | | 2044 | exec_control |= CPU_BASED_CR8_LOAD_EXITING | |
2028 | CPU_BASED_CR8_STORE_EXITING; | 2045 | CPU_BASED_CR8_STORE_EXITING; |
2029 | #endif | 2046 | #endif |
2030 | } | ||
2031 | 2047 | ||
2032 | /* | 2048 | /* |
2033 | * A vmexit (to either L1 hypervisor or L0 userspace) is always needed | 2049 | * A vmexit (to either L1 hypervisor or L0 userspace) is always needed |
2034 | * for I/O port accesses. | 2050 | * for I/O port accesses. |
2035 | */ | 2051 | */ |
2036 | exec_control &= ~CPU_BASED_USE_IO_BITMAPS; | ||
2037 | exec_control |= CPU_BASED_UNCOND_IO_EXITING; | 2052 | exec_control |= CPU_BASED_UNCOND_IO_EXITING; |
2038 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | 2053 | exec_control &= ~CPU_BASED_USE_IO_BITMAPS; |
2054 | |||
2055 | /* | ||
2056 | * This bit will be computed in nested_get_vmcs12_pages, because | ||
2057 | * we do not have access to L1's MSR bitmap yet. For now, keep | ||
2058 | * the same bit as before, hoping to avoid multiple VMWRITEs that | ||
2059 | * only set/clear this bit. | ||
2060 | */ | ||
2061 | exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; | ||
2062 | exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS; | ||
2063 | |||
2064 | exec_controls_set(vmx, exec_control); | ||
2039 | 2065 | ||
2040 | /* | 2066 | /* |
2041 | * SECONDARY EXEC CONTROLS | 2067 | * SECONDARY EXEC CONTROLS |
@@ -2061,22 +2087,19 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2061 | /* VMCS shadowing for L2 is emulated for now */ | 2087 | /* VMCS shadowing for L2 is emulated for now */ |
2062 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | 2088 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; |
2063 | 2089 | ||
2064 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) | ||
2065 | vmcs_write16(GUEST_INTR_STATUS, | ||
2066 | vmcs12->guest_intr_status); | ||
2067 | |||
2068 | /* | 2090 | /* |
2069 | * Write an illegal value to APIC_ACCESS_ADDR. Later, | 2091 | * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4() |
2070 | * nested_get_vmcs12_pages will either fix it up or | 2092 | * will not have to rewrite the controls just for this bit. |
2071 | * remove the VM execution control. | ||
2072 | */ | 2093 | */ |
2073 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) | 2094 | if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() && |
2074 | vmcs_write64(APIC_ACCESS_ADDR, -1ull); | 2095 | (vmcs12->guest_cr4 & X86_CR4_UMIP)) |
2096 | exec_control |= SECONDARY_EXEC_DESC; | ||
2075 | 2097 | ||
2076 | if (exec_control & SECONDARY_EXEC_ENCLS_EXITING) | 2098 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) |
2077 | vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); | 2099 | vmcs_write16(GUEST_INTR_STATUS, |
2100 | vmcs12->guest_intr_status); | ||
2078 | 2101 | ||
2079 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2102 | secondary_exec_controls_set(vmx, exec_control); |
2080 | } | 2103 | } |
2081 | 2104 | ||
2082 | /* | 2105 | /* |
@@ -2095,7 +2118,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2095 | if (guest_efer != host_efer) | 2118 | if (guest_efer != host_efer) |
2096 | exec_control |= VM_ENTRY_LOAD_IA32_EFER; | 2119 | exec_control |= VM_ENTRY_LOAD_IA32_EFER; |
2097 | } | 2120 | } |
2098 | vm_entry_controls_init(vmx, exec_control); | 2121 | vm_entry_controls_set(vmx, exec_control); |
2099 | 2122 | ||
2100 | /* | 2123 | /* |
2101 | * EXIT CONTROLS | 2124 | * EXIT CONTROLS |
@@ -2107,17 +2130,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2107 | exec_control = vmx_vmexit_ctrl(); | 2130 | exec_control = vmx_vmexit_ctrl(); |
2108 | if (cpu_has_load_ia32_efer() && guest_efer != host_efer) | 2131 | if (cpu_has_load_ia32_efer() && guest_efer != host_efer) |
2109 | exec_control |= VM_EXIT_LOAD_IA32_EFER; | 2132 | exec_control |= VM_EXIT_LOAD_IA32_EFER; |
2110 | vm_exit_controls_init(vmx, exec_control); | 2133 | vm_exit_controls_set(vmx, exec_control); |
2111 | |||
2112 | /* | ||
2113 | * Conceptually we want to copy the PML address and index from | ||
2114 | * vmcs01 here, and then back to vmcs01 on nested vmexit. But, | ||
2115 | * since we always flush the log on each vmexit and never change | ||
2116 | * the PML address (once set), this happens to be equivalent to | ||
2117 | * simply resetting the index in vmcs02. | ||
2118 | */ | ||
2119 | if (enable_pml) | ||
2120 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); | ||
2121 | 2134 | ||
2122 | /* | 2135 | /* |
2123 | * Interrupt/Exception Fields | 2136 | * Interrupt/Exception Fields |
@@ -2138,7 +2151,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2138 | } | 2151 | } |
2139 | } | 2152 | } |
2140 | 2153 | ||
2141 | static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | 2154 | static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) |
2142 | { | 2155 | { |
2143 | struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; | 2156 | struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; |
2144 | 2157 | ||
@@ -2162,6 +2175,8 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2162 | vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); | 2175 | vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); |
2163 | vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); | 2176 | vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); |
2164 | vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); | 2177 | vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); |
2178 | vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); | ||
2179 | vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); | ||
2165 | vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); | 2180 | vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); |
2166 | vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); | 2181 | vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); |
2167 | vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); | 2182 | vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); |
@@ -2198,6 +2213,10 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2198 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); | 2213 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); |
2199 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); | 2214 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); |
2200 | } | 2215 | } |
2216 | |||
2217 | if (kvm_mpx_supported() && vmx->nested.nested_run_pending && | ||
2218 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) | ||
2219 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); | ||
2201 | } | 2220 | } |
2202 | 2221 | ||
2203 | if (nested_cpu_has_xsaves(vmcs12)) | 2222 | if (nested_cpu_has_xsaves(vmcs12)) |
@@ -2233,14 +2252,6 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | |||
2233 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); | 2252 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); |
2234 | 2253 | ||
2235 | set_cr4_guest_host_mask(vmx); | 2254 | set_cr4_guest_host_mask(vmx); |
2236 | |||
2237 | if (kvm_mpx_supported()) { | ||
2238 | if (vmx->nested.nested_run_pending && | ||
2239 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) | ||
2240 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); | ||
2241 | else | ||
2242 | vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); | ||
2243 | } | ||
2244 | } | 2255 | } |
2245 | 2256 | ||
2246 | /* | 2257 | /* |
@@ -2259,20 +2270,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
2259 | { | 2270 | { |
2260 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2271 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2261 | struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; | 2272 | struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; |
2273 | bool load_guest_pdptrs_vmcs12 = false; | ||
2262 | 2274 | ||
2263 | if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) { | 2275 | if (vmx->nested.dirty_vmcs12 || hv_evmcs) { |
2264 | prepare_vmcs02_full(vmx, vmcs12); | 2276 | prepare_vmcs02_rare(vmx, vmcs12); |
2265 | vmx->nested.dirty_vmcs12 = false; | 2277 | vmx->nested.dirty_vmcs12 = false; |
2266 | } | ||
2267 | 2278 | ||
2268 | /* | 2279 | load_guest_pdptrs_vmcs12 = !hv_evmcs || |
2269 | * First, the fields that are shadowed. This must be kept in sync | 2280 | !(hv_evmcs->hv_clean_fields & |
2270 | * with vmcs_shadow_fields.h. | 2281 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1); |
2271 | */ | ||
2272 | if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & | ||
2273 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { | ||
2274 | vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); | ||
2275 | vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); | ||
2276 | } | 2282 | } |
2277 | 2283 | ||
2278 | if (vmx->nested.nested_run_pending && | 2284 | if (vmx->nested.nested_run_pending && |
@@ -2283,6 +2289,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
2283 | kvm_set_dr(vcpu, 7, vcpu->arch.dr7); | 2289 | kvm_set_dr(vcpu, 7, vcpu->arch.dr7); |
2284 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); | 2290 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); |
2285 | } | 2291 | } |
2292 | if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || | ||
2293 | !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) | ||
2294 | vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); | ||
2286 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); | 2295 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
2287 | 2296 | ||
2288 | /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the | 2297 | /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the |
@@ -2372,6 +2381,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
2372 | entry_failure_code)) | 2381 | entry_failure_code)) |
2373 | return -EINVAL; | 2382 | return -EINVAL; |
2374 | 2383 | ||
2384 | /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */ | ||
2385 | if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) && | ||
2386 | is_pae_paging(vcpu)) { | ||
2387 | vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); | ||
2388 | vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); | ||
2389 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); | ||
2390 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); | ||
2391 | } | ||
2392 | |||
2375 | if (!enable_ept) | 2393 | if (!enable_ept) |
2376 | vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; | 2394 | vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; |
2377 | 2395 | ||
@@ -2609,6 +2627,30 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, | |||
2609 | !kvm_pat_valid(vmcs12->host_ia32_pat)) | 2627 | !kvm_pat_valid(vmcs12->host_ia32_pat)) |
2610 | return -EINVAL; | 2628 | return -EINVAL; |
2611 | 2629 | ||
2630 | ia32e = (vmcs12->vm_exit_controls & | ||
2631 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; | ||
2632 | |||
2633 | if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || | ||
2634 | vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || | ||
2635 | vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || | ||
2636 | vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || | ||
2637 | vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || | ||
2638 | vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || | ||
2639 | vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || | ||
2640 | vmcs12->host_cs_selector == 0 || | ||
2641 | vmcs12->host_tr_selector == 0 || | ||
2642 | (vmcs12->host_ss_selector == 0 && !ia32e)) | ||
2643 | return -EINVAL; | ||
2644 | |||
2645 | #ifdef CONFIG_X86_64 | ||
2646 | if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) || | ||
2647 | is_noncanonical_address(vmcs12->host_gs_base, vcpu) || | ||
2648 | is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) || | ||
2649 | is_noncanonical_address(vmcs12->host_idtr_base, vcpu) || | ||
2650 | is_noncanonical_address(vmcs12->host_tr_base, vcpu)) | ||
2651 | return -EINVAL; | ||
2652 | #endif | ||
2653 | |||
2612 | /* | 2654 | /* |
2613 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the | 2655 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the |
2614 | * IA32_EFER MSR must be 0 in the field for that register. In addition, | 2656 | * IA32_EFER MSR must be 0 in the field for that register. In addition, |
@@ -2616,8 +2658,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, | |||
2616 | * the host address-space size VM-exit control. | 2658 | * the host address-space size VM-exit control. |
2617 | */ | 2659 | */ |
2618 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { | 2660 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { |
2619 | ia32e = (vmcs12->vm_exit_controls & | ||
2620 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; | ||
2621 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || | 2661 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || |
2622 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || | 2662 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || |
2623 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) | 2663 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) |
@@ -2781,7 +2821,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) | |||
2781 | [launched]"i"(offsetof(struct loaded_vmcs, launched)), | 2821 | [launched]"i"(offsetof(struct loaded_vmcs, launched)), |
2782 | [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), | 2822 | [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), |
2783 | [wordsize]"i"(sizeof(ulong)) | 2823 | [wordsize]"i"(sizeof(ulong)) |
2784 | : "cc", "memory" | 2824 | : "memory" |
2785 | ); | 2825 | ); |
2786 | 2826 | ||
2787 | if (vmx->msr_autoload.host.nr) | 2827 | if (vmx->msr_autoload.host.nr) |
@@ -2851,18 +2891,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) | |||
2851 | hpa = page_to_phys(vmx->nested.apic_access_page); | 2891 | hpa = page_to_phys(vmx->nested.apic_access_page); |
2852 | vmcs_write64(APIC_ACCESS_ADDR, hpa); | 2892 | vmcs_write64(APIC_ACCESS_ADDR, hpa); |
2853 | } else { | 2893 | } else { |
2854 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, | 2894 | secondary_exec_controls_clearbit(vmx, |
2855 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 2895 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
2856 | } | 2896 | } |
2857 | } | 2897 | } |
2858 | 2898 | ||
2859 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | 2899 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
2860 | map = &vmx->nested.virtual_apic_map; | 2900 | map = &vmx->nested.virtual_apic_map; |
2861 | 2901 | ||
2862 | /* | ||
2863 | * If translation failed, VM entry will fail because | ||
2864 | * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. | ||
2865 | */ | ||
2866 | if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { | 2902 | if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { |
2867 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); | 2903 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); |
2868 | } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && | 2904 | } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && |
@@ -2876,11 +2912,13 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) | |||
2876 | * _not_ what the processor does but it's basically the | 2912 | * _not_ what the processor does but it's basically the |
2877 | * only possibility we have. | 2913 | * only possibility we have. |
2878 | */ | 2914 | */ |
2879 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, | 2915 | exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW); |
2880 | CPU_BASED_TPR_SHADOW); | ||
2881 | } else { | 2916 | } else { |
2882 | printk("bad virtual-APIC page address\n"); | 2917 | /* |
2883 | dump_vmcs(); | 2918 | * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to |
2919 | * force VM-Entry to fail. | ||
2920 | */ | ||
2921 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); | ||
2884 | } | 2922 | } |
2885 | } | 2923 | } |
2886 | 2924 | ||
@@ -2896,11 +2934,9 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) | |||
2896 | } | 2934 | } |
2897 | } | 2935 | } |
2898 | if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) | 2936 | if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) |
2899 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, | 2937 | exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); |
2900 | CPU_BASED_USE_MSR_BITMAPS); | ||
2901 | else | 2938 | else |
2902 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, | 2939 | exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); |
2903 | CPU_BASED_USE_MSR_BITMAPS); | ||
2904 | } | 2940 | } |
2905 | 2941 | ||
2906 | /* | 2942 | /* |
@@ -2953,7 +2989,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
2953 | u32 exit_reason = EXIT_REASON_INVALID_STATE; | 2989 | u32 exit_reason = EXIT_REASON_INVALID_STATE; |
2954 | u32 exit_qual; | 2990 | u32 exit_qual; |
2955 | 2991 | ||
2956 | evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & | 2992 | evaluate_pending_interrupts = exec_controls_get(vmx) & |
2957 | (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); | 2993 | (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); |
2958 | if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) | 2994 | if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) |
2959 | evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); | 2995 | evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); |
@@ -2964,6 +3000,25 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
2964 | !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) | 3000 | !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) |
2965 | vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | 3001 | vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); |
2966 | 3002 | ||
3003 | /* | ||
3004 | * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and* | ||
3005 | * nested early checks are disabled. In the event of a "late" VM-Fail, | ||
3006 | * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its | ||
3007 | * software model to the pre-VMEntry host state. When EPT is disabled, | ||
3008 | * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes | ||
3009 | * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing | ||
3010 | * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to | ||
3011 | * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested | ||
3012 | * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is | ||
3013 | * guaranteed to be overwritten with a shadow CR3 prior to re-entering | ||
3014 | * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as | ||
3015 | * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks | ||
3016 | * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail | ||
3017 | * path would need to manually save/restore vmcs01.GUEST_CR3. | ||
3018 | */ | ||
3019 | if (!enable_ept && !nested_early_check) | ||
3020 | vmcs_writel(GUEST_CR3, vcpu->arch.cr3); | ||
3021 | |||
2967 | vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); | 3022 | vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); |
2968 | 3023 | ||
2969 | prepare_vmcs02_early(vmx, vmcs12); | 3024 | prepare_vmcs02_early(vmx, vmcs12); |
@@ -3059,7 +3114,7 @@ vmentry_fail_vmexit: | |||
3059 | vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; | 3114 | vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; |
3060 | vmcs12->exit_qualification = exit_qual; | 3115 | vmcs12->exit_qualification = exit_qual; |
3061 | if (enable_shadow_vmcs || vmx->nested.hv_evmcs) | 3116 | if (enable_shadow_vmcs || vmx->nested.hv_evmcs) |
3062 | vmx->nested.need_vmcs12_sync = true; | 3117 | vmx->nested.need_vmcs12_to_shadow_sync = true; |
3063 | return 1; | 3118 | return 1; |
3064 | } | 3119 | } |
3065 | 3120 | ||
@@ -3077,7 +3132,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
3077 | if (!nested_vmx_check_permission(vcpu)) | 3132 | if (!nested_vmx_check_permission(vcpu)) |
3078 | return 1; | 3133 | return 1; |
3079 | 3134 | ||
3080 | if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true)) | 3135 | if (!nested_vmx_handle_enlightened_vmptrld(vcpu, launch)) |
3081 | return 1; | 3136 | return 1; |
3082 | 3137 | ||
3083 | if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull) | 3138 | if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull) |
@@ -3393,20 +3448,57 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | |||
3393 | return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; | 3448 | return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; |
3394 | } | 3449 | } |
3395 | 3450 | ||
3396 | /* | 3451 | static bool is_vmcs12_ext_field(unsigned long field) |
3397 | * Update the guest state fields of vmcs12 to reflect changes that | 3452 | { |
3398 | * occurred while L2 was running. (The "IA-32e mode guest" bit of the | 3453 | switch (field) { |
3399 | * VM-entry controls is also updated, since this is really a guest | 3454 | case GUEST_ES_SELECTOR: |
3400 | * state bit.) | 3455 | case GUEST_CS_SELECTOR: |
3401 | */ | 3456 | case GUEST_SS_SELECTOR: |
3402 | static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 3457 | case GUEST_DS_SELECTOR: |
3403 | { | 3458 | case GUEST_FS_SELECTOR: |
3404 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 3459 | case GUEST_GS_SELECTOR: |
3405 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); | 3460 | case GUEST_LDTR_SELECTOR: |
3461 | case GUEST_TR_SELECTOR: | ||
3462 | case GUEST_ES_LIMIT: | ||
3463 | case GUEST_CS_LIMIT: | ||
3464 | case GUEST_SS_LIMIT: | ||
3465 | case GUEST_DS_LIMIT: | ||
3466 | case GUEST_FS_LIMIT: | ||
3467 | case GUEST_GS_LIMIT: | ||
3468 | case GUEST_LDTR_LIMIT: | ||
3469 | case GUEST_TR_LIMIT: | ||
3470 | case GUEST_GDTR_LIMIT: | ||
3471 | case GUEST_IDTR_LIMIT: | ||
3472 | case GUEST_ES_AR_BYTES: | ||
3473 | case GUEST_DS_AR_BYTES: | ||
3474 | case GUEST_FS_AR_BYTES: | ||
3475 | case GUEST_GS_AR_BYTES: | ||
3476 | case GUEST_LDTR_AR_BYTES: | ||
3477 | case GUEST_TR_AR_BYTES: | ||
3478 | case GUEST_ES_BASE: | ||
3479 | case GUEST_CS_BASE: | ||
3480 | case GUEST_SS_BASE: | ||
3481 | case GUEST_DS_BASE: | ||
3482 | case GUEST_FS_BASE: | ||
3483 | case GUEST_GS_BASE: | ||
3484 | case GUEST_LDTR_BASE: | ||
3485 | case GUEST_TR_BASE: | ||
3486 | case GUEST_GDTR_BASE: | ||
3487 | case GUEST_IDTR_BASE: | ||
3488 | case GUEST_PENDING_DBG_EXCEPTIONS: | ||
3489 | case GUEST_BNDCFGS: | ||
3490 | return true; | ||
3491 | default: | ||
3492 | break; | ||
3493 | } | ||
3406 | 3494 | ||
3407 | vmcs12->guest_rsp = kvm_rsp_read(vcpu); | 3495 | return false; |
3408 | vmcs12->guest_rip = kvm_rip_read(vcpu); | 3496 | } |
3409 | vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); | 3497 | |
3498 | static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, | ||
3499 | struct vmcs12 *vmcs12) | ||
3500 | { | ||
3501 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3410 | 3502 | ||
3411 | vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); | 3503 | vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); |
3412 | vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); | 3504 | vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); |
@@ -3427,8 +3519,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
3427 | vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); | 3519 | vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); |
3428 | vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); | 3520 | vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); |
3429 | vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); | 3521 | vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); |
3430 | vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); | ||
3431 | vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); | ||
3432 | vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); | 3522 | vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); |
3433 | vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); | 3523 | vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); |
3434 | vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); | 3524 | vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); |
@@ -3444,11 +3534,69 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
3444 | vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); | 3534 | vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); |
3445 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); | 3535 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); |
3446 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); | 3536 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); |
3537 | vmcs12->guest_pending_dbg_exceptions = | ||
3538 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | ||
3539 | if (kvm_mpx_supported()) | ||
3540 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | ||
3541 | |||
3542 | vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; | ||
3543 | } | ||
3544 | |||
3545 | static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, | ||
3546 | struct vmcs12 *vmcs12) | ||
3547 | { | ||
3548 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3549 | int cpu; | ||
3550 | |||
3551 | if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare) | ||
3552 | return; | ||
3553 | |||
3554 | |||
3555 | WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01); | ||
3556 | |||
3557 | cpu = get_cpu(); | ||
3558 | vmx->loaded_vmcs = &vmx->nested.vmcs02; | ||
3559 | vmx_vcpu_load(&vmx->vcpu, cpu); | ||
3560 | |||
3561 | sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); | ||
3562 | |||
3563 | vmx->loaded_vmcs = &vmx->vmcs01; | ||
3564 | vmx_vcpu_load(&vmx->vcpu, cpu); | ||
3565 | put_cpu(); | ||
3566 | } | ||
3567 | |||
3568 | /* | ||
3569 | * Update the guest state fields of vmcs12 to reflect changes that | ||
3570 | * occurred while L2 was running. (The "IA-32e mode guest" bit of the | ||
3571 | * VM-entry controls is also updated, since this is really a guest | ||
3572 | * state bit.) | ||
3573 | */ | ||
3574 | static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | ||
3575 | { | ||
3576 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3577 | |||
3578 | if (vmx->nested.hv_evmcs) | ||
3579 | sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); | ||
3580 | |||
3581 | vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs; | ||
3582 | |||
3583 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | ||
3584 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); | ||
3585 | |||
3586 | vmcs12->guest_rsp = kvm_rsp_read(vcpu); | ||
3587 | vmcs12->guest_rip = kvm_rip_read(vcpu); | ||
3588 | vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); | ||
3589 | |||
3590 | vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); | ||
3591 | vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); | ||
3592 | |||
3593 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | ||
3594 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | ||
3595 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | ||
3447 | 3596 | ||
3448 | vmcs12->guest_interruptibility_info = | 3597 | vmcs12->guest_interruptibility_info = |
3449 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 3598 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
3450 | vmcs12->guest_pending_dbg_exceptions = | 3599 | |
3451 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | ||
3452 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | 3600 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) |
3453 | vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; | 3601 | vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; |
3454 | else | 3602 | else |
@@ -3469,10 +3617,12 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
3469 | */ | 3617 | */ |
3470 | if (enable_ept) { | 3618 | if (enable_ept) { |
3471 | vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); | 3619 | vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); |
3472 | vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); | 3620 | if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) { |
3473 | vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); | 3621 | vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); |
3474 | vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); | 3622 | vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); |
3475 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); | 3623 | vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); |
3624 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); | ||
3625 | } | ||
3476 | } | 3626 | } |
3477 | 3627 | ||
3478 | vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); | 3628 | vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); |
@@ -3484,22 +3634,11 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
3484 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 3634 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
3485 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); | 3635 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); |
3486 | 3636 | ||
3487 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) { | 3637 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) |
3488 | kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); | 3638 | kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); |
3489 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | ||
3490 | } | ||
3491 | 3639 | ||
3492 | /* TODO: These cannot have changed unless we have MSR bitmaps and | ||
3493 | * the relevant bit asks not to trap the change */ | ||
3494 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) | ||
3495 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); | ||
3496 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) | 3640 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) |
3497 | vmcs12->guest_ia32_efer = vcpu->arch.efer; | 3641 | vmcs12->guest_ia32_efer = vcpu->arch.efer; |
3498 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | ||
3499 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | ||
3500 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | ||
3501 | if (kvm_mpx_supported()) | ||
3502 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | ||
3503 | } | 3642 | } |
3504 | 3643 | ||
3505 | /* | 3644 | /* |
@@ -3517,11 +3656,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
3517 | u32 exit_reason, u32 exit_intr_info, | 3656 | u32 exit_reason, u32 exit_intr_info, |
3518 | unsigned long exit_qualification) | 3657 | unsigned long exit_qualification) |
3519 | { | 3658 | { |
3520 | /* update guest state fields: */ | ||
3521 | sync_vmcs12(vcpu, vmcs12); | ||
3522 | |||
3523 | /* update exit information fields: */ | 3659 | /* update exit information fields: */ |
3524 | |||
3525 | vmcs12->vm_exit_reason = exit_reason; | 3660 | vmcs12->vm_exit_reason = exit_reason; |
3526 | vmcs12->exit_qualification = exit_qualification; | 3661 | vmcs12->exit_qualification = exit_qualification; |
3527 | vmcs12->vm_exit_intr_info = exit_intr_info; | 3662 | vmcs12->vm_exit_intr_info = exit_intr_info; |
@@ -3775,18 +3910,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) | |||
3775 | vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); | 3910 | vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); |
3776 | 3911 | ||
3777 | nested_ept_uninit_mmu_context(vcpu); | 3912 | nested_ept_uninit_mmu_context(vcpu); |
3778 | 3913 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | |
3779 | /* | 3914 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); |
3780 | * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3 | ||
3781 | * points to shadow pages! Fortunately we only get here after a WARN_ON | ||
3782 | * if EPT is disabled, so a VMabort is perfectly fine. | ||
3783 | */ | ||
3784 | if (enable_ept) { | ||
3785 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
3786 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
3787 | } else { | ||
3788 | nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED); | ||
3789 | } | ||
3790 | 3915 | ||
3791 | /* | 3916 | /* |
3792 | * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs | 3917 | * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs |
@@ -3794,7 +3919,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) | |||
3794 | * VMFail, like everything else we just need to ensure our | 3919 | * VMFail, like everything else we just need to ensure our |
3795 | * software model is up-to-date. | 3920 | * software model is up-to-date. |
3796 | */ | 3921 | */ |
3797 | ept_save_pdptrs(vcpu); | 3922 | if (enable_ept) |
3923 | ept_save_pdptrs(vcpu); | ||
3798 | 3924 | ||
3799 | kvm_mmu_reset_context(vcpu); | 3925 | kvm_mmu_reset_context(vcpu); |
3800 | 3926 | ||
@@ -3882,14 +4008,14 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
3882 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; | 4008 | vcpu->arch.tsc_offset -= vmcs12->tsc_offset; |
3883 | 4009 | ||
3884 | if (likely(!vmx->fail)) { | 4010 | if (likely(!vmx->fail)) { |
3885 | if (exit_reason == -1) | 4011 | sync_vmcs02_to_vmcs12(vcpu, vmcs12); |
3886 | sync_vmcs12(vcpu, vmcs12); | 4012 | |
3887 | else | 4013 | if (exit_reason != -1) |
3888 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, | 4014 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, |
3889 | exit_qualification); | 4015 | exit_qualification); |
3890 | 4016 | ||
3891 | /* | 4017 | /* |
3892 | * Must happen outside of sync_vmcs12() as it will | 4018 | * Must happen outside of sync_vmcs02_to_vmcs12() as it will |
3893 | * also be used to capture vmcs12 cache as part of | 4019 | * also be used to capture vmcs12 cache as part of |
3894 | * capturing nVMX state for snapshot (migration). | 4020 | * capturing nVMX state for snapshot (migration). |
3895 | * | 4021 | * |
@@ -3945,7 +4071,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
3945 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); | 4071 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); |
3946 | 4072 | ||
3947 | if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs)) | 4073 | if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs)) |
3948 | vmx->nested.need_vmcs12_sync = true; | 4074 | vmx->nested.need_vmcs12_to_shadow_sync = true; |
3949 | 4075 | ||
3950 | /* in case we halted in L2 */ | 4076 | /* in case we halted in L2 */ |
3951 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 4077 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -4008,7 +4134,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
4008 | * #UD or #GP. | 4134 | * #UD or #GP. |
4009 | */ | 4135 | */ |
4010 | int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | 4136 | int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, |
4011 | u32 vmx_instruction_info, bool wr, gva_t *ret) | 4137 | u32 vmx_instruction_info, bool wr, int len, gva_t *ret) |
4012 | { | 4138 | { |
4013 | gva_t off; | 4139 | gva_t off; |
4014 | bool exn; | 4140 | bool exn; |
@@ -4115,7 +4241,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | |||
4115 | */ | 4241 | */ |
4116 | if (!(s.base == 0 && s.limit == 0xffffffff && | 4242 | if (!(s.base == 0 && s.limit == 0xffffffff && |
4117 | ((s.type & 8) || !(s.type & 4)))) | 4243 | ((s.type & 8) || !(s.type & 4)))) |
4118 | exn = exn || (off + sizeof(u64) > s.limit); | 4244 | exn = exn || ((u64)off + len - 1 > s.limit); |
4119 | } | 4245 | } |
4120 | if (exn) { | 4246 | if (exn) { |
4121 | kvm_queue_exception_e(vcpu, | 4247 | kvm_queue_exception_e(vcpu, |
@@ -4134,7 +4260,8 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) | |||
4134 | struct x86_exception e; | 4260 | struct x86_exception e; |
4135 | 4261 | ||
4136 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | 4262 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), |
4137 | vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) | 4263 | vmcs_read32(VMX_INSTRUCTION_INFO), false, |
4264 | sizeof(*vmpointer), &gva)) | ||
4138 | return 1; | 4265 | return 1; |
4139 | 4266 | ||
4140 | if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { | 4267 | if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { |
@@ -4300,11 +4427,13 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) | |||
4300 | if (vmx->nested.current_vmptr == -1ull) | 4427 | if (vmx->nested.current_vmptr == -1ull) |
4301 | return; | 4428 | return; |
4302 | 4429 | ||
4430 | copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); | ||
4431 | |||
4303 | if (enable_shadow_vmcs) { | 4432 | if (enable_shadow_vmcs) { |
4304 | /* copy to memory all shadowed fields in case | 4433 | /* copy to memory all shadowed fields in case |
4305 | they were modified */ | 4434 | they were modified */ |
4306 | copy_shadow_to_vmcs12(vmx); | 4435 | copy_shadow_to_vmcs12(vmx); |
4307 | vmx->nested.need_vmcs12_sync = false; | 4436 | vmx->nested.need_vmcs12_to_shadow_sync = false; |
4308 | vmx_disable_shadow_vmcs(vmx); | 4437 | vmx_disable_shadow_vmcs(vmx); |
4309 | } | 4438 | } |
4310 | vmx->nested.posted_intr_nv = -1; | 4439 | vmx->nested.posted_intr_nv = -1; |
@@ -4334,6 +4463,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
4334 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4463 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4335 | u32 zero = 0; | 4464 | u32 zero = 0; |
4336 | gpa_t vmptr; | 4465 | gpa_t vmptr; |
4466 | u64 evmcs_gpa; | ||
4337 | 4467 | ||
4338 | if (!nested_vmx_check_permission(vcpu)) | 4468 | if (!nested_vmx_check_permission(vcpu)) |
4339 | return 1; | 4469 | return 1; |
@@ -4349,10 +4479,18 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
4349 | return nested_vmx_failValid(vcpu, | 4479 | return nested_vmx_failValid(vcpu, |
4350 | VMXERR_VMCLEAR_VMXON_POINTER); | 4480 | VMXERR_VMCLEAR_VMXON_POINTER); |
4351 | 4481 | ||
4352 | if (vmx->nested.hv_evmcs_map.hva) { | 4482 | /* |
4353 | if (vmptr == vmx->nested.hv_evmcs_vmptr) | 4483 | * When Enlightened VMEntry is enabled on the calling CPU we treat |
4354 | nested_release_evmcs(vcpu); | 4484 | * memory area pointer by vmptr as Enlightened VMCS (as there's no good |
4355 | } else { | 4485 | * way to distinguish it from VMCS12) and we must not corrupt it by |
4486 | * writing to the non-existent 'launch_state' field. The area doesn't | ||
4487 | * have to be the currently active EVMCS on the calling CPU and there's | ||
4488 | * nothing KVM has to do to transition it from 'active' to 'non-active' | ||
4489 | * state. It is possible that the area will stay mapped as | ||
4490 | * vmx->nested.hv_evmcs but this shouldn't be a problem. | ||
4491 | */ | ||
4492 | if (likely(!vmx->nested.enlightened_vmcs_enabled || | ||
4493 | !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) { | ||
4356 | if (vmptr == vmx->nested.current_vmptr) | 4494 | if (vmptr == vmx->nested.current_vmptr) |
4357 | nested_release_vmcs12(vcpu); | 4495 | nested_release_vmcs12(vcpu); |
4358 | 4496 | ||
@@ -4386,8 +4524,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) | |||
4386 | u64 field_value; | 4524 | u64 field_value; |
4387 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 4525 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
4388 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 4526 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
4527 | int len; | ||
4389 | gva_t gva = 0; | 4528 | gva_t gva = 0; |
4390 | struct vmcs12 *vmcs12; | 4529 | struct vmcs12 *vmcs12; |
4530 | short offset; | ||
4391 | 4531 | ||
4392 | if (!nested_vmx_check_permission(vcpu)) | 4532 | if (!nested_vmx_check_permission(vcpu)) |
4393 | return 1; | 4533 | return 1; |
@@ -4409,11 +4549,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu) | |||
4409 | 4549 | ||
4410 | /* Decode instruction info and find the field to read */ | 4550 | /* Decode instruction info and find the field to read */ |
4411 | field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); | 4551 | field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); |
4412 | /* Read the field, zero-extended to a u64 field_value */ | 4552 | |
4413 | if (vmcs12_read_any(vmcs12, field, &field_value) < 0) | 4553 | offset = vmcs_field_to_offset(field); |
4554 | if (offset < 0) | ||
4414 | return nested_vmx_failValid(vcpu, | 4555 | return nested_vmx_failValid(vcpu, |
4415 | VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 4556 | VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
4416 | 4557 | ||
4558 | if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) | ||
4559 | copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); | ||
4560 | |||
4561 | /* Read the field, zero-extended to a u64 field_value */ | ||
4562 | field_value = vmcs12_read_any(vmcs12, field, offset); | ||
4563 | |||
4417 | /* | 4564 | /* |
4418 | * Now copy part of this value to register or memory, as requested. | 4565 | * Now copy part of this value to register or memory, as requested. |
4419 | * Note that the number of bits actually copied is 32 or 64 depending | 4566 | * Note that the number of bits actually copied is 32 or 64 depending |
@@ -4423,21 +4570,45 @@ static int handle_vmread(struct kvm_vcpu *vcpu) | |||
4423 | kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), | 4570 | kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), |
4424 | field_value); | 4571 | field_value); |
4425 | } else { | 4572 | } else { |
4573 | len = is_64_bit_mode(vcpu) ? 8 : 4; | ||
4426 | if (get_vmx_mem_address(vcpu, exit_qualification, | 4574 | if (get_vmx_mem_address(vcpu, exit_qualification, |
4427 | vmx_instruction_info, true, &gva)) | 4575 | vmx_instruction_info, true, len, &gva)) |
4428 | return 1; | 4576 | return 1; |
4429 | /* _system ok, nested_vmx_check_permission has verified cpl=0 */ | 4577 | /* _system ok, nested_vmx_check_permission has verified cpl=0 */ |
4430 | kvm_write_guest_virt_system(vcpu, gva, &field_value, | 4578 | kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL); |
4431 | (is_long_mode(vcpu) ? 8 : 4), NULL); | ||
4432 | } | 4579 | } |
4433 | 4580 | ||
4434 | return nested_vmx_succeed(vcpu); | 4581 | return nested_vmx_succeed(vcpu); |
4435 | } | 4582 | } |
4436 | 4583 | ||
4584 | static bool is_shadow_field_rw(unsigned long field) | ||
4585 | { | ||
4586 | switch (field) { | ||
4587 | #define SHADOW_FIELD_RW(x, y) case x: | ||
4588 | #include "vmcs_shadow_fields.h" | ||
4589 | return true; | ||
4590 | default: | ||
4591 | break; | ||
4592 | } | ||
4593 | return false; | ||
4594 | } | ||
4595 | |||
4596 | static bool is_shadow_field_ro(unsigned long field) | ||
4597 | { | ||
4598 | switch (field) { | ||
4599 | #define SHADOW_FIELD_RO(x, y) case x: | ||
4600 | #include "vmcs_shadow_fields.h" | ||
4601 | return true; | ||
4602 | default: | ||
4603 | break; | ||
4604 | } | ||
4605 | return false; | ||
4606 | } | ||
4437 | 4607 | ||
4438 | static int handle_vmwrite(struct kvm_vcpu *vcpu) | 4608 | static int handle_vmwrite(struct kvm_vcpu *vcpu) |
4439 | { | 4609 | { |
4440 | unsigned long field; | 4610 | unsigned long field; |
4611 | int len; | ||
4441 | gva_t gva; | 4612 | gva_t gva; |
4442 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4613 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4443 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 4614 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -4452,6 +4623,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
4452 | u64 field_value = 0; | 4623 | u64 field_value = 0; |
4453 | struct x86_exception e; | 4624 | struct x86_exception e; |
4454 | struct vmcs12 *vmcs12; | 4625 | struct vmcs12 *vmcs12; |
4626 | short offset; | ||
4455 | 4627 | ||
4456 | if (!nested_vmx_check_permission(vcpu)) | 4628 | if (!nested_vmx_check_permission(vcpu)) |
4457 | return 1; | 4629 | return 1; |
@@ -4463,11 +4635,11 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
4463 | field_value = kvm_register_readl(vcpu, | 4635 | field_value = kvm_register_readl(vcpu, |
4464 | (((vmx_instruction_info) >> 3) & 0xf)); | 4636 | (((vmx_instruction_info) >> 3) & 0xf)); |
4465 | else { | 4637 | else { |
4638 | len = is_64_bit_mode(vcpu) ? 8 : 4; | ||
4466 | if (get_vmx_mem_address(vcpu, exit_qualification, | 4639 | if (get_vmx_mem_address(vcpu, exit_qualification, |
4467 | vmx_instruction_info, false, &gva)) | 4640 | vmx_instruction_info, false, len, &gva)) |
4468 | return 1; | 4641 | return 1; |
4469 | if (kvm_read_guest_virt(vcpu, gva, &field_value, | 4642 | if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) { |
4470 | (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { | ||
4471 | kvm_inject_page_fault(vcpu, &e); | 4643 | kvm_inject_page_fault(vcpu, &e); |
4472 | return 1; | 4644 | return 1; |
4473 | } | 4645 | } |
@@ -4484,9 +4656,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
4484 | return nested_vmx_failValid(vcpu, | 4656 | return nested_vmx_failValid(vcpu, |
4485 | VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); | 4657 | VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); |
4486 | 4658 | ||
4487 | if (!is_guest_mode(vcpu)) | 4659 | if (!is_guest_mode(vcpu)) { |
4488 | vmcs12 = get_vmcs12(vcpu); | 4660 | vmcs12 = get_vmcs12(vcpu); |
4489 | else { | 4661 | |
4662 | /* | ||
4663 | * Ensure vmcs12 is up-to-date before any VMWRITE that dirties | ||
4664 | * vmcs12, else we may crush a field or consume a stale value. | ||
4665 | */ | ||
4666 | if (!is_shadow_field_rw(field)) | ||
4667 | copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); | ||
4668 | } else { | ||
4490 | /* | 4669 | /* |
4491 | * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE | 4670 | * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE |
4492 | * to shadowed-field sets the ALU flags for VMfailInvalid. | 4671 | * to shadowed-field sets the ALU flags for VMfailInvalid. |
@@ -4496,28 +4675,46 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
4496 | vmcs12 = get_shadow_vmcs12(vcpu); | 4675 | vmcs12 = get_shadow_vmcs12(vcpu); |
4497 | } | 4676 | } |
4498 | 4677 | ||
4499 | if (vmcs12_write_any(vmcs12, field, field_value) < 0) | 4678 | offset = vmcs_field_to_offset(field); |
4679 | if (offset < 0) | ||
4500 | return nested_vmx_failValid(vcpu, | 4680 | return nested_vmx_failValid(vcpu, |
4501 | VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 4681 | VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
4502 | 4682 | ||
4503 | /* | 4683 | /* |
4504 | * Do not track vmcs12 dirty-state if in guest-mode | 4684 | * Some Intel CPUs intentionally drop the reserved bits of the AR byte |
4505 | * as we actually dirty shadow vmcs12 instead of vmcs12. | 4685 | * fields on VMWRITE. Emulate this behavior to ensure consistent KVM |
4686 | * behavior regardless of the underlying hardware, e.g. if an AR_BYTE | ||
4687 | * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD | ||
4688 | * from L1 will return a different value than VMREAD from L2 (L1 sees | ||
4689 | * the stripped down value, L2 sees the full value as stored by KVM). | ||
4506 | */ | 4690 | */ |
4507 | if (!is_guest_mode(vcpu)) { | 4691 | if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES) |
4508 | switch (field) { | 4692 | field_value &= 0x1f0ff; |
4509 | #define SHADOW_FIELD_RW(x) case x: | 4693 | |
4510 | #include "vmcs_shadow_fields.h" | 4694 | vmcs12_write_any(vmcs12, field, offset, field_value); |
4511 | /* | 4695 | |
4512 | * The fields that can be updated by L1 without a vmexit are | 4696 | /* |
4513 | * always updated in the vmcs02, the others go down the slow | 4697 | * Do not track vmcs12 dirty-state if in guest-mode as we actually |
4514 | * path of prepare_vmcs02. | 4698 | * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated |
4515 | */ | 4699 | * by L1 without a vmexit are always updated in the vmcs02, i.e. don't |
4516 | break; | 4700 | * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path. |
4517 | default: | 4701 | */ |
4518 | vmx->nested.dirty_vmcs12 = true; | 4702 | if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) { |
4519 | break; | 4703 | /* |
4704 | * L1 can read these fields without exiting, ensure the | ||
4705 | * shadow VMCS is up-to-date. | ||
4706 | */ | ||
4707 | if (enable_shadow_vmcs && is_shadow_field_ro(field)) { | ||
4708 | preempt_disable(); | ||
4709 | vmcs_load(vmx->vmcs01.shadow_vmcs); | ||
4710 | |||
4711 | __vmcs_writel(field, field_value); | ||
4712 | |||
4713 | vmcs_clear(vmx->vmcs01.shadow_vmcs); | ||
4714 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
4715 | preempt_enable(); | ||
4520 | } | 4716 | } |
4717 | vmx->nested.dirty_vmcs12 = true; | ||
4521 | } | 4718 | } |
4522 | 4719 | ||
4523 | return nested_vmx_succeed(vcpu); | 4720 | return nested_vmx_succeed(vcpu); |
@@ -4527,11 +4724,10 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) | |||
4527 | { | 4724 | { |
4528 | vmx->nested.current_vmptr = vmptr; | 4725 | vmx->nested.current_vmptr = vmptr; |
4529 | if (enable_shadow_vmcs) { | 4726 | if (enable_shadow_vmcs) { |
4530 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | 4727 | secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); |
4531 | SECONDARY_EXEC_SHADOW_VMCS); | ||
4532 | vmcs_write64(VMCS_LINK_POINTER, | 4728 | vmcs_write64(VMCS_LINK_POINTER, |
4533 | __pa(vmx->vmcs01.shadow_vmcs)); | 4729 | __pa(vmx->vmcs01.shadow_vmcs)); |
4534 | vmx->nested.need_vmcs12_sync = true; | 4730 | vmx->nested.need_vmcs12_to_shadow_sync = true; |
4535 | } | 4731 | } |
4536 | vmx->nested.dirty_vmcs12 = true; | 4732 | vmx->nested.dirty_vmcs12 = true; |
4537 | } | 4733 | } |
@@ -4615,7 +4811,8 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) | |||
4615 | if (unlikely(to_vmx(vcpu)->nested.hv_evmcs)) | 4811 | if (unlikely(to_vmx(vcpu)->nested.hv_evmcs)) |
4616 | return 1; | 4812 | return 1; |
4617 | 4813 | ||
4618 | if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) | 4814 | if (get_vmx_mem_address(vcpu, exit_qual, instr_info, |
4815 | true, sizeof(gpa_t), &gva)) | ||
4619 | return 1; | 4816 | return 1; |
4620 | /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ | 4817 | /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ |
4621 | if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, | 4818 | if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, |
@@ -4661,7 +4858,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
4661 | * operand is read even if it isn't needed (e.g., for type==global) | 4858 | * operand is read even if it isn't needed (e.g., for type==global) |
4662 | */ | 4859 | */ |
4663 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | 4860 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), |
4664 | vmx_instruction_info, false, &gva)) | 4861 | vmx_instruction_info, false, sizeof(operand), &gva)) |
4665 | return 1; | 4862 | return 1; |
4666 | if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { | 4863 | if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { |
4667 | kvm_inject_page_fault(vcpu, &e); | 4864 | kvm_inject_page_fault(vcpu, &e); |
@@ -4670,13 +4867,11 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
4670 | 4867 | ||
4671 | switch (type) { | 4868 | switch (type) { |
4672 | case VMX_EPT_EXTENT_GLOBAL: | 4869 | case VMX_EPT_EXTENT_GLOBAL: |
4870 | case VMX_EPT_EXTENT_CONTEXT: | ||
4673 | /* | 4871 | /* |
4674 | * TODO: track mappings and invalidate | 4872 | * TODO: Sync the necessary shadow EPT roots here, rather than |
4675 | * single context requests appropriately | 4873 | * at the next emulated VM-entry. |
4676 | */ | 4874 | */ |
4677 | case VMX_EPT_EXTENT_CONTEXT: | ||
4678 | kvm_mmu_sync_roots(vcpu); | ||
4679 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
4680 | break; | 4875 | break; |
4681 | default: | 4876 | default: |
4682 | BUG_ON(1); | 4877 | BUG_ON(1); |
@@ -4723,7 +4918,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
4723 | * operand is read even if it isn't needed (e.g., for type==global) | 4918 | * operand is read even if it isn't needed (e.g., for type==global) |
4724 | */ | 4919 | */ |
4725 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | 4920 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), |
4726 | vmx_instruction_info, false, &gva)) | 4921 | vmx_instruction_info, false, sizeof(operand), &gva)) |
4727 | return 1; | 4922 | return 1; |
4728 | if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { | 4923 | if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { |
4729 | kvm_inject_page_fault(vcpu, &e); | 4924 | kvm_inject_page_fault(vcpu, &e); |
@@ -5284,12 +5479,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, | |||
5284 | * When running L2, the authoritative vmcs12 state is in the | 5479 | * When running L2, the authoritative vmcs12 state is in the |
5285 | * vmcs02. When running L1, the authoritative vmcs12 state is | 5480 | * vmcs02. When running L1, the authoritative vmcs12 state is |
5286 | * in the shadow or enlightened vmcs linked to vmcs01, unless | 5481 | * in the shadow or enlightened vmcs linked to vmcs01, unless |
5287 | * need_vmcs12_sync is set, in which case, the authoritative | 5482 | * need_vmcs12_to_shadow_sync is set, in which case, the authoritative |
5288 | * vmcs12 state is in the vmcs12 already. | 5483 | * vmcs12 state is in the vmcs12 already. |
5289 | */ | 5484 | */ |
5290 | if (is_guest_mode(vcpu)) { | 5485 | if (is_guest_mode(vcpu)) { |
5291 | sync_vmcs12(vcpu, vmcs12); | 5486 | sync_vmcs02_to_vmcs12(vcpu, vmcs12); |
5292 | } else if (!vmx->nested.need_vmcs12_sync) { | 5487 | sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); |
5488 | } else if (!vmx->nested.need_vmcs12_to_shadow_sync) { | ||
5293 | if (vmx->nested.hv_evmcs) | 5489 | if (vmx->nested.hv_evmcs) |
5294 | copy_enlightened_to_vmcs12(vmx); | 5490 | copy_enlightened_to_vmcs12(vmx); |
5295 | else if (enable_shadow_vmcs) | 5491 | else if (enable_shadow_vmcs) |
@@ -5421,7 +5617,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, | |||
5421 | * Sync eVMCS upon entry as we may not have | 5617 | * Sync eVMCS upon entry as we may not have |
5422 | * HV_X64_MSR_VP_ASSIST_PAGE set up yet. | 5618 | * HV_X64_MSR_VP_ASSIST_PAGE set up yet. |
5423 | */ | 5619 | */ |
5424 | vmx->nested.need_vmcs12_sync = true; | 5620 | vmx->nested.need_vmcs12_to_shadow_sync = true; |
5425 | } else { | 5621 | } else { |
5426 | return -EINVAL; | 5622 | return -EINVAL; |
5427 | } | 5623 | } |
@@ -5489,14 +5685,8 @@ error_guest_mode: | |||
5489 | void nested_vmx_vcpu_setup(void) | 5685 | void nested_vmx_vcpu_setup(void) |
5490 | { | 5686 | { |
5491 | if (enable_shadow_vmcs) { | 5687 | if (enable_shadow_vmcs) { |
5492 | /* | ||
5493 | * At vCPU creation, "VMWRITE to any supported field | ||
5494 | * in the VMCS" is supported, so use the more | ||
5495 | * permissive vmx_vmread_bitmap to specify both read | ||
5496 | * and write permissions for the shadow VMCS. | ||
5497 | */ | ||
5498 | vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); | 5688 | vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); |
5499 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap)); | 5689 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); |
5500 | } | 5690 | } |
5501 | } | 5691 | } |
5502 | 5692 | ||
@@ -5626,10 +5816,15 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, | |||
5626 | msrs->secondary_ctls_low = 0; | 5816 | msrs->secondary_ctls_low = 0; |
5627 | msrs->secondary_ctls_high &= | 5817 | msrs->secondary_ctls_high &= |
5628 | SECONDARY_EXEC_DESC | | 5818 | SECONDARY_EXEC_DESC | |
5819 | SECONDARY_EXEC_RDTSCP | | ||
5629 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 5820 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
5821 | SECONDARY_EXEC_WBINVD_EXITING | | ||
5630 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 5822 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
5631 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 5823 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
5632 | SECONDARY_EXEC_WBINVD_EXITING; | 5824 | SECONDARY_EXEC_RDRAND_EXITING | |
5825 | SECONDARY_EXEC_ENABLE_INVPCID | | ||
5826 | SECONDARY_EXEC_RDSEED_EXITING | | ||
5827 | SECONDARY_EXEC_XSAVES; | ||
5633 | 5828 | ||
5634 | /* | 5829 | /* |
5635 | * We can emulate "VMCS shadowing," even if the hardware | 5830 | * We can emulate "VMCS shadowing," even if the hardware |
@@ -5749,14 +5944,6 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) | |||
5749 | { | 5944 | { |
5750 | int i; | 5945 | int i; |
5751 | 5946 | ||
5752 | /* | ||
5753 | * Without EPT it is not possible to restore L1's CR3 and PDPTR on | ||
5754 | * VMfail, because they are not available in vmcs01. Just always | ||
5755 | * use hardware checks. | ||
5756 | */ | ||
5757 | if (!enable_ept) | ||
5758 | nested_early_check = 1; | ||
5759 | |||
5760 | if (!cpu_has_vmx_shadow_vmcs()) | 5947 | if (!cpu_has_vmx_shadow_vmcs()) |
5761 | enable_shadow_vmcs = 0; | 5948 | enable_shadow_vmcs = 0; |
5762 | if (enable_shadow_vmcs) { | 5949 | if (enable_shadow_vmcs) { |
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index e847ff1019a2..187d39bf0bf1 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h | |||
@@ -17,11 +17,11 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); | |||
17 | bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); | 17 | bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); |
18 | void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | 18 | void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
19 | u32 exit_intr_info, unsigned long exit_qualification); | 19 | u32 exit_intr_info, unsigned long exit_qualification); |
20 | void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu); | 20 | void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu); |
21 | int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 21 | int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
22 | int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); | 22 | int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); |
23 | int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | 23 | int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, |
24 | u32 vmx_instruction_info, bool wr, gva_t *ret); | 24 | u32 vmx_instruction_info, bool wr, int len, gva_t *ret); |
25 | 25 | ||
26 | static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | 26 | static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) |
27 | { | 27 | { |
diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index b8e50f76fefc..2200fb698dd0 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h | |||
@@ -146,7 +146,6 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value) | |||
146 | 146 | ||
147 | __vmcs_writel(field, value); | 147 | __vmcs_writel(field, value); |
148 | #ifndef CONFIG_X86_64 | 148 | #ifndef CONFIG_X86_64 |
149 | asm volatile (""); | ||
150 | __vmcs_writel(field+1, value >> 32); | 149 | __vmcs_writel(field+1, value >> 32); |
151 | #endif | 150 | #endif |
152 | } | 151 | } |
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index cb6079f8a227..481ad879197b 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h | |||
@@ -42,6 +42,14 @@ struct vmcs_host_state { | |||
42 | #endif | 42 | #endif |
43 | }; | 43 | }; |
44 | 44 | ||
45 | struct vmcs_controls_shadow { | ||
46 | u32 vm_entry; | ||
47 | u32 vm_exit; | ||
48 | u32 pin; | ||
49 | u32 exec; | ||
50 | u32 secondary_exec; | ||
51 | }; | ||
52 | |||
45 | /* | 53 | /* |
46 | * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also | 54 | * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also |
47 | * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs | 55 | * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs |
@@ -53,7 +61,7 @@ struct loaded_vmcs { | |||
53 | int cpu; | 61 | int cpu; |
54 | bool launched; | 62 | bool launched; |
55 | bool nmi_known_unmasked; | 63 | bool nmi_known_unmasked; |
56 | bool hv_timer_armed; | 64 | bool hv_timer_soft_disabled; |
57 | /* Support for vnmi-less CPUs */ | 65 | /* Support for vnmi-less CPUs */ |
58 | int soft_vnmi_blocked; | 66 | int soft_vnmi_blocked; |
59 | ktime_t entry_time; | 67 | ktime_t entry_time; |
@@ -61,6 +69,7 @@ struct loaded_vmcs { | |||
61 | unsigned long *msr_bitmap; | 69 | unsigned long *msr_bitmap; |
62 | struct list_head loaded_vmcss_on_cpu_link; | 70 | struct list_head loaded_vmcss_on_cpu_link; |
63 | struct vmcs_host_state host_state; | 71 | struct vmcs_host_state host_state; |
72 | struct vmcs_controls_shadow controls_shadow; | ||
64 | }; | 73 | }; |
65 | 74 | ||
66 | static inline bool is_exception_n(u32 intr_info, u8 vector) | 75 | static inline bool is_exception_n(u32 intr_info, u8 vector) |
@@ -115,6 +124,12 @@ static inline bool is_nmi(u32 intr_info) | |||
115 | == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); | 124 | == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); |
116 | } | 125 | } |
117 | 126 | ||
127 | static inline bool is_external_intr(u32 intr_info) | ||
128 | { | ||
129 | return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) | ||
130 | == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR); | ||
131 | } | ||
132 | |||
118 | enum vmcs_field_width { | 133 | enum vmcs_field_width { |
119 | VMCS_FIELD_WIDTH_U16 = 0, | 134 | VMCS_FIELD_WIDTH_U16 = 0, |
120 | VMCS_FIELD_WIDTH_U64 = 1, | 135 | VMCS_FIELD_WIDTH_U64 = 1, |
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 337718fc8a36..d0c6df373f67 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h | |||
@@ -395,69 +395,48 @@ static inline short vmcs_field_to_offset(unsigned long field) | |||
395 | 395 | ||
396 | #undef ROL16 | 396 | #undef ROL16 |
397 | 397 | ||
398 | /* | 398 | static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field, |
399 | * Read a vmcs12 field. Since these can have varying lengths and we return | 399 | u16 offset) |
400 | * one type, we chose the biggest type (u64) and zero-extend the return value | ||
401 | * to that size. Note that the caller, handle_vmread, might need to use only | ||
402 | * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of | ||
403 | * 64-bit fields are to be returned). | ||
404 | */ | ||
405 | static inline int vmcs12_read_any(struct vmcs12 *vmcs12, | ||
406 | unsigned long field, u64 *ret) | ||
407 | { | 400 | { |
408 | short offset = vmcs_field_to_offset(field); | 401 | char *p = (char *)vmcs12 + offset; |
409 | char *p; | ||
410 | |||
411 | if (offset < 0) | ||
412 | return offset; | ||
413 | |||
414 | p = (char *)vmcs12 + offset; | ||
415 | 402 | ||
416 | switch (vmcs_field_width(field)) { | 403 | switch (vmcs_field_width(field)) { |
417 | case VMCS_FIELD_WIDTH_NATURAL_WIDTH: | 404 | case VMCS_FIELD_WIDTH_NATURAL_WIDTH: |
418 | *ret = *((natural_width *)p); | 405 | return *((natural_width *)p); |
419 | return 0; | ||
420 | case VMCS_FIELD_WIDTH_U16: | 406 | case VMCS_FIELD_WIDTH_U16: |
421 | *ret = *((u16 *)p); | 407 | return *((u16 *)p); |
422 | return 0; | ||
423 | case VMCS_FIELD_WIDTH_U32: | 408 | case VMCS_FIELD_WIDTH_U32: |
424 | *ret = *((u32 *)p); | 409 | return *((u32 *)p); |
425 | return 0; | ||
426 | case VMCS_FIELD_WIDTH_U64: | 410 | case VMCS_FIELD_WIDTH_U64: |
427 | *ret = *((u64 *)p); | 411 | return *((u64 *)p); |
428 | return 0; | ||
429 | default: | 412 | default: |
430 | WARN_ON(1); | 413 | WARN_ON_ONCE(1); |
431 | return -ENOENT; | 414 | return -1; |
432 | } | 415 | } |
433 | } | 416 | } |
434 | 417 | ||
435 | static inline int vmcs12_write_any(struct vmcs12 *vmcs12, | 418 | static inline void vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field, |
436 | unsigned long field, u64 field_value){ | 419 | u16 offset, u64 field_value) |
437 | short offset = vmcs_field_to_offset(field); | 420 | { |
438 | char *p = (char *)vmcs12 + offset; | 421 | char *p = (char *)vmcs12 + offset; |
439 | 422 | ||
440 | if (offset < 0) | ||
441 | return offset; | ||
442 | |||
443 | switch (vmcs_field_width(field)) { | 423 | switch (vmcs_field_width(field)) { |
444 | case VMCS_FIELD_WIDTH_U16: | 424 | case VMCS_FIELD_WIDTH_U16: |
445 | *(u16 *)p = field_value; | 425 | *(u16 *)p = field_value; |
446 | return 0; | 426 | break; |
447 | case VMCS_FIELD_WIDTH_U32: | 427 | case VMCS_FIELD_WIDTH_U32: |
448 | *(u32 *)p = field_value; | 428 | *(u32 *)p = field_value; |
449 | return 0; | 429 | break; |
450 | case VMCS_FIELD_WIDTH_U64: | 430 | case VMCS_FIELD_WIDTH_U64: |
451 | *(u64 *)p = field_value; | 431 | *(u64 *)p = field_value; |
452 | return 0; | 432 | break; |
453 | case VMCS_FIELD_WIDTH_NATURAL_WIDTH: | 433 | case VMCS_FIELD_WIDTH_NATURAL_WIDTH: |
454 | *(natural_width *)p = field_value; | 434 | *(natural_width *)p = field_value; |
455 | return 0; | 435 | break; |
456 | default: | 436 | default: |
457 | WARN_ON(1); | 437 | WARN_ON_ONCE(1); |
458 | return -ENOENT; | 438 | break; |
459 | } | 439 | } |
460 | |||
461 | } | 440 | } |
462 | 441 | ||
463 | #endif /* __KVM_X86_VMX_VMCS12_H */ | 442 | #endif /* __KVM_X86_VMX_VMCS12_H */ |
diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h index 132432f375c2..eb1ecd16fd22 100644 --- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h +++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h | |||
@@ -1,8 +1,12 @@ | |||
1 | #if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW) | ||
2 | BUILD_BUG_ON(1) | ||
3 | #endif | ||
4 | |||
1 | #ifndef SHADOW_FIELD_RO | 5 | #ifndef SHADOW_FIELD_RO |
2 | #define SHADOW_FIELD_RO(x) | 6 | #define SHADOW_FIELD_RO(x, y) |
3 | #endif | 7 | #endif |
4 | #ifndef SHADOW_FIELD_RW | 8 | #ifndef SHADOW_FIELD_RW |
5 | #define SHADOW_FIELD_RW(x) | 9 | #define SHADOW_FIELD_RW(x, y) |
6 | #endif | 10 | #endif |
7 | 11 | ||
8 | /* | 12 | /* |
@@ -28,47 +32,48 @@ | |||
28 | */ | 32 | */ |
29 | 33 | ||
30 | /* 16-bits */ | 34 | /* 16-bits */ |
31 | SHADOW_FIELD_RW(GUEST_INTR_STATUS) | 35 | SHADOW_FIELD_RW(GUEST_INTR_STATUS, guest_intr_status) |
32 | SHADOW_FIELD_RW(GUEST_PML_INDEX) | 36 | SHADOW_FIELD_RW(GUEST_PML_INDEX, guest_pml_index) |
33 | SHADOW_FIELD_RW(HOST_FS_SELECTOR) | 37 | SHADOW_FIELD_RW(HOST_FS_SELECTOR, host_fs_selector) |
34 | SHADOW_FIELD_RW(HOST_GS_SELECTOR) | 38 | SHADOW_FIELD_RW(HOST_GS_SELECTOR, host_gs_selector) |
35 | 39 | ||
36 | /* 32-bits */ | 40 | /* 32-bits */ |
37 | SHADOW_FIELD_RO(VM_EXIT_REASON) | 41 | SHADOW_FIELD_RO(VM_EXIT_REASON, vm_exit_reason) |
38 | SHADOW_FIELD_RO(VM_EXIT_INTR_INFO) | 42 | SHADOW_FIELD_RO(VM_EXIT_INTR_INFO, vm_exit_intr_info) |
39 | SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN) | 43 | SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len) |
40 | SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD) | 44 | SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field) |
41 | SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE) | 45 | SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code) |
42 | SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE) | 46 | SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code) |
43 | SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL) | 47 | SHADOW_FIELD_RO(GUEST_CS_AR_BYTES, guest_cs_ar_bytes) |
44 | SHADOW_FIELD_RW(EXCEPTION_BITMAP) | 48 | SHADOW_FIELD_RO(GUEST_SS_AR_BYTES, guest_ss_ar_bytes) |
45 | SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE) | 49 | SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control) |
46 | SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD) | 50 | SHADOW_FIELD_RW(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control) |
47 | SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN) | 51 | SHADOW_FIELD_RW(EXCEPTION_BITMAP, exception_bitmap) |
48 | SHADOW_FIELD_RW(TPR_THRESHOLD) | 52 | SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code) |
49 | SHADOW_FIELD_RW(GUEST_CS_AR_BYTES) | 53 | SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field) |
50 | SHADOW_FIELD_RW(GUEST_SS_AR_BYTES) | 54 | SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len) |
51 | SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO) | 55 | SHADOW_FIELD_RW(TPR_THRESHOLD, tpr_threshold) |
52 | SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE) | 56 | SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info) |
57 | SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value) | ||
53 | 58 | ||
54 | /* Natural width */ | 59 | /* Natural width */ |
55 | SHADOW_FIELD_RO(EXIT_QUALIFICATION) | 60 | SHADOW_FIELD_RO(EXIT_QUALIFICATION, exit_qualification) |
56 | SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS) | 61 | SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS, guest_linear_address) |
57 | SHADOW_FIELD_RW(GUEST_RIP) | 62 | SHADOW_FIELD_RW(GUEST_RIP, guest_rip) |
58 | SHADOW_FIELD_RW(GUEST_RSP) | 63 | SHADOW_FIELD_RW(GUEST_RSP, guest_rsp) |
59 | SHADOW_FIELD_RW(GUEST_CR0) | 64 | SHADOW_FIELD_RW(GUEST_CR0, guest_cr0) |
60 | SHADOW_FIELD_RW(GUEST_CR3) | 65 | SHADOW_FIELD_RW(GUEST_CR3, guest_cr3) |
61 | SHADOW_FIELD_RW(GUEST_CR4) | 66 | SHADOW_FIELD_RW(GUEST_CR4, guest_cr4) |
62 | SHADOW_FIELD_RW(GUEST_RFLAGS) | 67 | SHADOW_FIELD_RW(GUEST_RFLAGS, guest_rflags) |
63 | SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK) | 68 | SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK, cr0_guest_host_mask) |
64 | SHADOW_FIELD_RW(CR0_READ_SHADOW) | 69 | SHADOW_FIELD_RW(CR0_READ_SHADOW, cr0_read_shadow) |
65 | SHADOW_FIELD_RW(CR4_READ_SHADOW) | 70 | SHADOW_FIELD_RW(CR4_READ_SHADOW, cr4_read_shadow) |
66 | SHADOW_FIELD_RW(HOST_FS_BASE) | 71 | SHADOW_FIELD_RW(HOST_FS_BASE, host_fs_base) |
67 | SHADOW_FIELD_RW(HOST_GS_BASE) | 72 | SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base) |
68 | 73 | ||
69 | /* 64-bit */ | 74 | /* 64-bit */ |
70 | SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS) | 75 | SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address) |
71 | SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH) | 76 | SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address) |
72 | 77 | ||
73 | #undef SHADOW_FIELD_RO | 78 | #undef SHADOW_FIELD_RO |
74 | #undef SHADOW_FIELD_RW | 79 | #undef SHADOW_FIELD_RW |
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d98eac371c0a..69536553446d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
@@ -389,6 +389,7 @@ static const struct kvm_vmx_segment_field { | |||
389 | }; | 389 | }; |
390 | 390 | ||
391 | u64 host_efer; | 391 | u64 host_efer; |
392 | static unsigned long host_idt_base; | ||
392 | 393 | ||
393 | /* | 394 | /* |
394 | * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm | 395 | * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm |
@@ -1035,6 +1036,33 @@ static void pt_guest_exit(struct vcpu_vmx *vmx) | |||
1035 | wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); | 1036 | wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); |
1036 | } | 1037 | } |
1037 | 1038 | ||
1039 | void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, | ||
1040 | unsigned long fs_base, unsigned long gs_base) | ||
1041 | { | ||
1042 | if (unlikely(fs_sel != host->fs_sel)) { | ||
1043 | if (!(fs_sel & 7)) | ||
1044 | vmcs_write16(HOST_FS_SELECTOR, fs_sel); | ||
1045 | else | ||
1046 | vmcs_write16(HOST_FS_SELECTOR, 0); | ||
1047 | host->fs_sel = fs_sel; | ||
1048 | } | ||
1049 | if (unlikely(gs_sel != host->gs_sel)) { | ||
1050 | if (!(gs_sel & 7)) | ||
1051 | vmcs_write16(HOST_GS_SELECTOR, gs_sel); | ||
1052 | else | ||
1053 | vmcs_write16(HOST_GS_SELECTOR, 0); | ||
1054 | host->gs_sel = gs_sel; | ||
1055 | } | ||
1056 | if (unlikely(fs_base != host->fs_base)) { | ||
1057 | vmcs_writel(HOST_FS_BASE, fs_base); | ||
1058 | host->fs_base = fs_base; | ||
1059 | } | ||
1060 | if (unlikely(gs_base != host->gs_base)) { | ||
1061 | vmcs_writel(HOST_GS_BASE, gs_base); | ||
1062 | host->gs_base = gs_base; | ||
1063 | } | ||
1064 | } | ||
1065 | |||
1038 | void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) | 1066 | void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) |
1039 | { | 1067 | { |
1040 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1068 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1053,20 +1081,18 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) | |||
1053 | * when guest state is loaded. This happens when guest transitions | 1081 | * when guest state is loaded. This happens when guest transitions |
1054 | * to/from long-mode by setting MSR_EFER.LMA. | 1082 | * to/from long-mode by setting MSR_EFER.LMA. |
1055 | */ | 1083 | */ |
1056 | if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) { | 1084 | if (!vmx->guest_msrs_ready) { |
1057 | vmx->guest_msrs_dirty = false; | 1085 | vmx->guest_msrs_ready = true; |
1058 | for (i = 0; i < vmx->save_nmsrs; ++i) | 1086 | for (i = 0; i < vmx->save_nmsrs; ++i) |
1059 | kvm_set_shared_msr(vmx->guest_msrs[i].index, | 1087 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
1060 | vmx->guest_msrs[i].data, | 1088 | vmx->guest_msrs[i].data, |
1061 | vmx->guest_msrs[i].mask); | 1089 | vmx->guest_msrs[i].mask); |
1062 | 1090 | ||
1063 | } | 1091 | } |
1064 | 1092 | if (vmx->guest_state_loaded) | |
1065 | if (vmx->loaded_cpu_state) | ||
1066 | return; | 1093 | return; |
1067 | 1094 | ||
1068 | vmx->loaded_cpu_state = vmx->loaded_vmcs; | 1095 | host_state = &vmx->loaded_vmcs->host_state; |
1069 | host_state = &vmx->loaded_cpu_state->host_state; | ||
1070 | 1096 | ||
1071 | /* | 1097 | /* |
1072 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not | 1098 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not |
@@ -1100,42 +1126,20 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) | |||
1100 | gs_base = segment_base(gs_sel); | 1126 | gs_base = segment_base(gs_sel); |
1101 | #endif | 1127 | #endif |
1102 | 1128 | ||
1103 | if (unlikely(fs_sel != host_state->fs_sel)) { | 1129 | vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); |
1104 | if (!(fs_sel & 7)) | 1130 | vmx->guest_state_loaded = true; |
1105 | vmcs_write16(HOST_FS_SELECTOR, fs_sel); | ||
1106 | else | ||
1107 | vmcs_write16(HOST_FS_SELECTOR, 0); | ||
1108 | host_state->fs_sel = fs_sel; | ||
1109 | } | ||
1110 | if (unlikely(gs_sel != host_state->gs_sel)) { | ||
1111 | if (!(gs_sel & 7)) | ||
1112 | vmcs_write16(HOST_GS_SELECTOR, gs_sel); | ||
1113 | else | ||
1114 | vmcs_write16(HOST_GS_SELECTOR, 0); | ||
1115 | host_state->gs_sel = gs_sel; | ||
1116 | } | ||
1117 | if (unlikely(fs_base != host_state->fs_base)) { | ||
1118 | vmcs_writel(HOST_FS_BASE, fs_base); | ||
1119 | host_state->fs_base = fs_base; | ||
1120 | } | ||
1121 | if (unlikely(gs_base != host_state->gs_base)) { | ||
1122 | vmcs_writel(HOST_GS_BASE, gs_base); | ||
1123 | host_state->gs_base = gs_base; | ||
1124 | } | ||
1125 | } | 1131 | } |
1126 | 1132 | ||
1127 | static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) | 1133 | static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) |
1128 | { | 1134 | { |
1129 | struct vmcs_host_state *host_state; | 1135 | struct vmcs_host_state *host_state; |
1130 | 1136 | ||
1131 | if (!vmx->loaded_cpu_state) | 1137 | if (!vmx->guest_state_loaded) |
1132 | return; | 1138 | return; |
1133 | 1139 | ||
1134 | WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs); | 1140 | host_state = &vmx->loaded_vmcs->host_state; |
1135 | host_state = &vmx->loaded_cpu_state->host_state; | ||
1136 | 1141 | ||
1137 | ++vmx->vcpu.stat.host_state_reload; | 1142 | ++vmx->vcpu.stat.host_state_reload; |
1138 | vmx->loaded_cpu_state = NULL; | ||
1139 | 1143 | ||
1140 | #ifdef CONFIG_X86_64 | 1144 | #ifdef CONFIG_X86_64 |
1141 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 1145 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
@@ -1161,13 +1165,15 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) | |||
1161 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 1165 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
1162 | #endif | 1166 | #endif |
1163 | load_fixmap_gdt(raw_smp_processor_id()); | 1167 | load_fixmap_gdt(raw_smp_processor_id()); |
1168 | vmx->guest_state_loaded = false; | ||
1169 | vmx->guest_msrs_ready = false; | ||
1164 | } | 1170 | } |
1165 | 1171 | ||
1166 | #ifdef CONFIG_X86_64 | 1172 | #ifdef CONFIG_X86_64 |
1167 | static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) | 1173 | static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) |
1168 | { | 1174 | { |
1169 | preempt_disable(); | 1175 | preempt_disable(); |
1170 | if (vmx->loaded_cpu_state) | 1176 | if (vmx->guest_state_loaded) |
1171 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 1177 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
1172 | preempt_enable(); | 1178 | preempt_enable(); |
1173 | return vmx->msr_guest_kernel_gs_base; | 1179 | return vmx->msr_guest_kernel_gs_base; |
@@ -1176,7 +1182,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) | |||
1176 | static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) | 1182 | static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) |
1177 | { | 1183 | { |
1178 | preempt_disable(); | 1184 | preempt_disable(); |
1179 | if (vmx->loaded_cpu_state) | 1185 | if (vmx->guest_state_loaded) |
1180 | wrmsrl(MSR_KERNEL_GS_BASE, data); | 1186 | wrmsrl(MSR_KERNEL_GS_BASE, data); |
1181 | preempt_enable(); | 1187 | preempt_enable(); |
1182 | vmx->msr_guest_kernel_gs_base = data; | 1188 | vmx->msr_guest_kernel_gs_base = data; |
@@ -1225,11 +1231,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | |||
1225 | pi_set_on(pi_desc); | 1231 | pi_set_on(pi_desc); |
1226 | } | 1232 | } |
1227 | 1233 | ||
1228 | /* | 1234 | void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) |
1229 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | ||
1230 | * vcpu mutex is already taken. | ||
1231 | */ | ||
1232 | void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
1233 | { | 1235 | { |
1234 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1236 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1235 | bool already_loaded = vmx->loaded_vmcs->cpu == cpu; | 1237 | bool already_loaded = vmx->loaded_vmcs->cpu == cpu; |
@@ -1290,8 +1292,20 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1290 | if (kvm_has_tsc_control && | 1292 | if (kvm_has_tsc_control && |
1291 | vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) | 1293 | vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) |
1292 | decache_tsc_multiplier(vmx); | 1294 | decache_tsc_multiplier(vmx); |
1295 | } | ||
1296 | |||
1297 | /* | ||
1298 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | ||
1299 | * vcpu mutex is already taken. | ||
1300 | */ | ||
1301 | void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
1302 | { | ||
1303 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
1304 | |||
1305 | vmx_vcpu_load_vmcs(vcpu, cpu); | ||
1293 | 1306 | ||
1294 | vmx_vcpu_pi_load(vcpu, cpu); | 1307 | vmx_vcpu_pi_load(vcpu, cpu); |
1308 | |||
1295 | vmx->host_pkru = read_pkru(); | 1309 | vmx->host_pkru = read_pkru(); |
1296 | vmx->host_debugctlmsr = get_debugctlmsr(); | 1310 | vmx->host_debugctlmsr = get_debugctlmsr(); |
1297 | } | 1311 | } |
@@ -1310,7 +1324,7 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) | |||
1310 | pi_set_sn(pi_desc); | 1324 | pi_set_sn(pi_desc); |
1311 | } | 1325 | } |
1312 | 1326 | ||
1313 | void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 1327 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
1314 | { | 1328 | { |
1315 | vmx_vcpu_pi_put(vcpu); | 1329 | vmx_vcpu_pi_put(vcpu); |
1316 | 1330 | ||
@@ -1579,7 +1593,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
1579 | move_msr_up(vmx, index, save_nmsrs++); | 1593 | move_msr_up(vmx, index, save_nmsrs++); |
1580 | 1594 | ||
1581 | vmx->save_nmsrs = save_nmsrs; | 1595 | vmx->save_nmsrs = save_nmsrs; |
1582 | vmx->guest_msrs_dirty = true; | 1596 | vmx->guest_msrs_ready = false; |
1583 | 1597 | ||
1584 | if (cpu_has_vmx_msr_bitmap()) | 1598 | if (cpu_has_vmx_msr_bitmap()) |
1585 | vmx_update_msr_bitmap(&vmx->vcpu); | 1599 | vmx_update_msr_bitmap(&vmx->vcpu); |
@@ -1692,9 +1706,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1692 | case MSR_IA32_SYSENTER_ESP: | 1706 | case MSR_IA32_SYSENTER_ESP: |
1693 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); | 1707 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); |
1694 | break; | 1708 | break; |
1695 | case MSR_IA32_POWER_CTL: | ||
1696 | msr_info->data = vmx->msr_ia32_power_ctl; | ||
1697 | break; | ||
1698 | case MSR_IA32_BNDCFGS: | 1709 | case MSR_IA32_BNDCFGS: |
1699 | if (!kvm_mpx_supported() || | 1710 | if (!kvm_mpx_supported() || |
1700 | (!msr_info->host_initiated && | 1711 | (!msr_info->host_initiated && |
@@ -1718,7 +1729,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1718 | return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, | 1729 | return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, |
1719 | &msr_info->data); | 1730 | &msr_info->data); |
1720 | case MSR_IA32_XSS: | 1731 | case MSR_IA32_XSS: |
1721 | if (!vmx_xsaves_supported()) | 1732 | if (!vmx_xsaves_supported() || |
1733 | (!msr_info->host_initiated && | ||
1734 | !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||
1735 | guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) | ||
1722 | return 1; | 1736 | return 1; |
1723 | msr_info->data = vcpu->arch.ia32_xss; | 1737 | msr_info->data = vcpu->arch.ia32_xss; |
1724 | break; | 1738 | break; |
@@ -1817,17 +1831,28 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1817 | break; | 1831 | break; |
1818 | #endif | 1832 | #endif |
1819 | case MSR_IA32_SYSENTER_CS: | 1833 | case MSR_IA32_SYSENTER_CS: |
1834 | if (is_guest_mode(vcpu)) | ||
1835 | get_vmcs12(vcpu)->guest_sysenter_cs = data; | ||
1820 | vmcs_write32(GUEST_SYSENTER_CS, data); | 1836 | vmcs_write32(GUEST_SYSENTER_CS, data); |
1821 | break; | 1837 | break; |
1822 | case MSR_IA32_SYSENTER_EIP: | 1838 | case MSR_IA32_SYSENTER_EIP: |
1839 | if (is_guest_mode(vcpu)) | ||
1840 | get_vmcs12(vcpu)->guest_sysenter_eip = data; | ||
1823 | vmcs_writel(GUEST_SYSENTER_EIP, data); | 1841 | vmcs_writel(GUEST_SYSENTER_EIP, data); |
1824 | break; | 1842 | break; |
1825 | case MSR_IA32_SYSENTER_ESP: | 1843 | case MSR_IA32_SYSENTER_ESP: |
1844 | if (is_guest_mode(vcpu)) | ||
1845 | get_vmcs12(vcpu)->guest_sysenter_esp = data; | ||
1826 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 1846 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
1827 | break; | 1847 | break; |
1828 | case MSR_IA32_POWER_CTL: | 1848 | case MSR_IA32_DEBUGCTLMSR: |
1829 | vmx->msr_ia32_power_ctl = data; | 1849 | if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & |
1850 | VM_EXIT_SAVE_DEBUG_CONTROLS) | ||
1851 | get_vmcs12(vcpu)->guest_ia32_debugctl = data; | ||
1852 | |||
1853 | ret = kvm_set_msr_common(vcpu, msr_info); | ||
1830 | break; | 1854 | break; |
1855 | |||
1831 | case MSR_IA32_BNDCFGS: | 1856 | case MSR_IA32_BNDCFGS: |
1832 | if (!kvm_mpx_supported() || | 1857 | if (!kvm_mpx_supported() || |
1833 | (!msr_info->host_initiated && | 1858 | (!msr_info->host_initiated && |
@@ -1896,9 +1921,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1896 | MSR_TYPE_W); | 1921 | MSR_TYPE_W); |
1897 | break; | 1922 | break; |
1898 | case MSR_IA32_CR_PAT: | 1923 | case MSR_IA32_CR_PAT: |
1924 | if (!kvm_pat_valid(data)) | ||
1925 | return 1; | ||
1926 | |||
1927 | if (is_guest_mode(vcpu) && | ||
1928 | get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) | ||
1929 | get_vmcs12(vcpu)->guest_ia32_pat = data; | ||
1930 | |||
1899 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 1931 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
1900 | if (!kvm_pat_valid(data)) | ||
1901 | return 1; | ||
1902 | vmcs_write64(GUEST_IA32_PAT, data); | 1932 | vmcs_write64(GUEST_IA32_PAT, data); |
1903 | vcpu->arch.pat = data; | 1933 | vcpu->arch.pat = data; |
1904 | break; | 1934 | break; |
@@ -1932,7 +1962,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1932 | return 1; | 1962 | return 1; |
1933 | return vmx_set_vmx_msr(vcpu, msr_index, data); | 1963 | return vmx_set_vmx_msr(vcpu, msr_index, data); |
1934 | case MSR_IA32_XSS: | 1964 | case MSR_IA32_XSS: |
1935 | if (!vmx_xsaves_supported()) | 1965 | if (!vmx_xsaves_supported() || |
1966 | (!msr_info->host_initiated && | ||
1967 | !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||
1968 | guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) | ||
1936 | return 1; | 1969 | return 1; |
1937 | /* | 1970 | /* |
1938 | * The only supported bit as of Skylake is bit 8, but | 1971 | * The only supported bit as of Skylake is bit 8, but |
@@ -2435,6 +2468,7 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | |||
2435 | return -ENOMEM; | 2468 | return -ENOMEM; |
2436 | 2469 | ||
2437 | loaded_vmcs->shadow_vmcs = NULL; | 2470 | loaded_vmcs->shadow_vmcs = NULL; |
2471 | loaded_vmcs->hv_timer_soft_disabled = false; | ||
2438 | loaded_vmcs_init(loaded_vmcs); | 2472 | loaded_vmcs_init(loaded_vmcs); |
2439 | 2473 | ||
2440 | if (cpu_has_vmx_msr_bitmap()) { | 2474 | if (cpu_has_vmx_msr_bitmap()) { |
@@ -2455,6 +2489,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | |||
2455 | } | 2489 | } |
2456 | 2490 | ||
2457 | memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); | 2491 | memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); |
2492 | memset(&loaded_vmcs->controls_shadow, 0, | ||
2493 | sizeof(struct vmcs_controls_shadow)); | ||
2458 | 2494 | ||
2459 | return 0; | 2495 | return 0; |
2460 | 2496 | ||
@@ -2737,7 +2773,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | |||
2737 | (unsigned long *)&vcpu->arch.regs_dirty)) | 2773 | (unsigned long *)&vcpu->arch.regs_dirty)) |
2738 | return; | 2774 | return; |
2739 | 2775 | ||
2740 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | 2776 | if (is_pae_paging(vcpu)) { |
2741 | vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); | 2777 | vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); |
2742 | vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); | 2778 | vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); |
2743 | vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); | 2779 | vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); |
@@ -2749,7 +2785,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) | |||
2749 | { | 2785 | { |
2750 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; | 2786 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; |
2751 | 2787 | ||
2752 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | 2788 | if (is_pae_paging(vcpu)) { |
2753 | mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); | 2789 | mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); |
2754 | mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); | 2790 | mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); |
2755 | mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); | 2791 | mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); |
@@ -2766,22 +2802,20 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
2766 | unsigned long cr0, | 2802 | unsigned long cr0, |
2767 | struct kvm_vcpu *vcpu) | 2803 | struct kvm_vcpu *vcpu) |
2768 | { | 2804 | { |
2805 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2806 | |||
2769 | if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) | 2807 | if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) |
2770 | vmx_decache_cr3(vcpu); | 2808 | vmx_decache_cr3(vcpu); |
2771 | if (!(cr0 & X86_CR0_PG)) { | 2809 | if (!(cr0 & X86_CR0_PG)) { |
2772 | /* From paging/starting to nonpaging */ | 2810 | /* From paging/starting to nonpaging */ |
2773 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 2811 | exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | |
2774 | vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) | | 2812 | CPU_BASED_CR3_STORE_EXITING); |
2775 | (CPU_BASED_CR3_LOAD_EXITING | | ||
2776 | CPU_BASED_CR3_STORE_EXITING)); | ||
2777 | vcpu->arch.cr0 = cr0; | 2813 | vcpu->arch.cr0 = cr0; |
2778 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); | 2814 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
2779 | } else if (!is_paging(vcpu)) { | 2815 | } else if (!is_paging(vcpu)) { |
2780 | /* From nonpaging to paging */ | 2816 | /* From nonpaging to paging */ |
2781 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 2817 | exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | |
2782 | vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & | 2818 | CPU_BASED_CR3_STORE_EXITING); |
2783 | ~(CPU_BASED_CR3_LOAD_EXITING | | ||
2784 | CPU_BASED_CR3_STORE_EXITING)); | ||
2785 | vcpu->arch.cr0 = cr0; | 2819 | vcpu->arch.cr0 = cr0; |
2786 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); | 2820 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
2787 | } | 2821 | } |
@@ -2881,6 +2915,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
2881 | 2915 | ||
2882 | int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 2916 | int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
2883 | { | 2917 | { |
2918 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2884 | /* | 2919 | /* |
2885 | * Pass through host's Machine Check Enable value to hw_cr4, which | 2920 | * Pass through host's Machine Check Enable value to hw_cr4, which |
2886 | * is in force while we are in guest mode. Do not let guests control | 2921 | * is in force while we are in guest mode. Do not let guests control |
@@ -2891,20 +2926,19 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
2891 | hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); | 2926 | hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); |
2892 | if (enable_unrestricted_guest) | 2927 | if (enable_unrestricted_guest) |
2893 | hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; | 2928 | hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; |
2894 | else if (to_vmx(vcpu)->rmode.vm86_active) | 2929 | else if (vmx->rmode.vm86_active) |
2895 | hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; | 2930 | hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; |
2896 | else | 2931 | else |
2897 | hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; | 2932 | hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; |
2898 | 2933 | ||
2899 | if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { | 2934 | if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { |
2900 | if (cr4 & X86_CR4_UMIP) { | 2935 | if (cr4 & X86_CR4_UMIP) { |
2901 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | 2936 | secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); |
2902 | SECONDARY_EXEC_DESC); | ||
2903 | hw_cr4 &= ~X86_CR4_UMIP; | 2937 | hw_cr4 &= ~X86_CR4_UMIP; |
2904 | } else if (!is_guest_mode(vcpu) || | 2938 | } else if (!is_guest_mode(vcpu) || |
2905 | !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) | 2939 | !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { |
2906 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, | 2940 | secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); |
2907 | SECONDARY_EXEC_DESC); | 2941 | } |
2908 | } | 2942 | } |
2909 | 2943 | ||
2910 | if (cr4 & X86_CR4_VMXE) { | 2944 | if (cr4 & X86_CR4_VMXE) { |
@@ -2919,7 +2953,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
2919 | return 1; | 2953 | return 1; |
2920 | } | 2954 | } |
2921 | 2955 | ||
2922 | if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) | 2956 | if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) |
2923 | return 1; | 2957 | return 1; |
2924 | 2958 | ||
2925 | vcpu->arch.cr4 = cr4; | 2959 | vcpu->arch.cr4 = cr4; |
@@ -3537,7 +3571,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) | |||
3537 | u8 mode = 0; | 3571 | u8 mode = 0; |
3538 | 3572 | ||
3539 | if (cpu_has_secondary_exec_ctrls() && | 3573 | if (cpu_has_secondary_exec_ctrls() && |
3540 | (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & | 3574 | (secondary_exec_controls_get(to_vmx(vcpu)) & |
3541 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { | 3575 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { |
3542 | mode |= MSR_BITMAP_MODE_X2APIC; | 3576 | mode |= MSR_BITMAP_MODE_X2APIC; |
3543 | if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) | 3577 | if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) |
@@ -3731,7 +3765,6 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) | |||
3731 | { | 3765 | { |
3732 | u32 low32, high32; | 3766 | u32 low32, high32; |
3733 | unsigned long tmpl; | 3767 | unsigned long tmpl; |
3734 | struct desc_ptr dt; | ||
3735 | unsigned long cr0, cr3, cr4; | 3768 | unsigned long cr0, cr3, cr4; |
3736 | 3769 | ||
3737 | cr0 = read_cr0(); | 3770 | cr0 = read_cr0(); |
@@ -3767,9 +3800,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) | |||
3767 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 3800 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
3768 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 3801 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
3769 | 3802 | ||
3770 | store_idt(&dt); | 3803 | vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ |
3771 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | ||
3772 | vmx->host_idt_base = dt.address; | ||
3773 | 3804 | ||
3774 | vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ | 3805 | vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ |
3775 | 3806 | ||
@@ -3798,7 +3829,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) | |||
3798 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | 3829 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); |
3799 | } | 3830 | } |
3800 | 3831 | ||
3801 | static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | 3832 | u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) |
3802 | { | 3833 | { |
3803 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; | 3834 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; |
3804 | 3835 | ||
@@ -3808,8 +3839,9 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | |||
3808 | if (!enable_vnmi) | 3839 | if (!enable_vnmi) |
3809 | pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; | 3840 | pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; |
3810 | 3841 | ||
3811 | /* Enable the preemption timer dynamically */ | 3842 | if (!enable_preemption_timer) |
3812 | pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; | 3843 | pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; |
3844 | |||
3813 | return pin_based_exec_ctrl; | 3845 | return pin_based_exec_ctrl; |
3814 | } | 3846 | } |
3815 | 3847 | ||
@@ -3817,14 +3849,14 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | |||
3817 | { | 3849 | { |
3818 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3850 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3819 | 3851 | ||
3820 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); | 3852 | pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); |
3821 | if (cpu_has_secondary_exec_ctrls()) { | 3853 | if (cpu_has_secondary_exec_ctrls()) { |
3822 | if (kvm_vcpu_apicv_active(vcpu)) | 3854 | if (kvm_vcpu_apicv_active(vcpu)) |
3823 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | 3855 | secondary_exec_controls_setbit(vmx, |
3824 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 3856 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
3825 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 3857 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
3826 | else | 3858 | else |
3827 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, | 3859 | secondary_exec_controls_clearbit(vmx, |
3828 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 3860 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
3829 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 3861 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
3830 | } | 3862 | } |
@@ -4015,15 +4047,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4015 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | 4047 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
4016 | 4048 | ||
4017 | /* Control */ | 4049 | /* Control */ |
4018 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); | 4050 | pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); |
4019 | vmx->hv_deadline_tsc = -1; | 4051 | vmx->hv_deadline_tsc = -1; |
4020 | 4052 | ||
4021 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 4053 | exec_controls_set(vmx, vmx_exec_control(vmx)); |
4022 | 4054 | ||
4023 | if (cpu_has_secondary_exec_ctrls()) { | 4055 | if (cpu_has_secondary_exec_ctrls()) { |
4024 | vmx_compute_secondary_exec_control(vmx); | 4056 | vmx_compute_secondary_exec_control(vmx); |
4025 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 4057 | secondary_exec_controls_set(vmx, vmx->secondary_exec_control); |
4026 | vmx->secondary_exec_control); | ||
4027 | } | 4058 | } |
4028 | 4059 | ||
4029 | if (kvm_vcpu_apicv_active(&vmx->vcpu)) { | 4060 | if (kvm_vcpu_apicv_active(&vmx->vcpu)) { |
@@ -4081,10 +4112,10 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4081 | ++vmx->nmsrs; | 4112 | ++vmx->nmsrs; |
4082 | } | 4113 | } |
4083 | 4114 | ||
4084 | vm_exit_controls_init(vmx, vmx_vmexit_ctrl()); | 4115 | vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); |
4085 | 4116 | ||
4086 | /* 22.2.1, 20.8.1 */ | 4117 | /* 22.2.1, 20.8.1 */ |
4087 | vm_entry_controls_init(vmx, vmx_vmentry_ctrl()); | 4118 | vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); |
4088 | 4119 | ||
4089 | vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; | 4120 | vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; |
4090 | vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); | 4121 | vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); |
@@ -4208,8 +4239,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
4208 | 4239 | ||
4209 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 4240 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
4210 | { | 4241 | { |
4211 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, | 4242 | exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); |
4212 | CPU_BASED_VIRTUAL_INTR_PENDING); | ||
4213 | } | 4243 | } |
4214 | 4244 | ||
4215 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 4245 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
@@ -4220,8 +4250,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
4220 | return; | 4250 | return; |
4221 | } | 4251 | } |
4222 | 4252 | ||
4223 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, | 4253 | exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); |
4224 | CPU_BASED_VIRTUAL_NMI_PENDING); | ||
4225 | } | 4254 | } |
4226 | 4255 | ||
4227 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 4256 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
@@ -4442,11 +4471,11 @@ static void kvm_machine_check(void) | |||
4442 | 4471 | ||
4443 | static int handle_machine_check(struct kvm_vcpu *vcpu) | 4472 | static int handle_machine_check(struct kvm_vcpu *vcpu) |
4444 | { | 4473 | { |
4445 | /* already handled by vcpu_run */ | 4474 | /* handled by vmx_vcpu_run() */ |
4446 | return 1; | 4475 | return 1; |
4447 | } | 4476 | } |
4448 | 4477 | ||
4449 | static int handle_exception(struct kvm_vcpu *vcpu) | 4478 | static int handle_exception_nmi(struct kvm_vcpu *vcpu) |
4450 | { | 4479 | { |
4451 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4480 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4452 | struct kvm_run *kvm_run = vcpu->run; | 4481 | struct kvm_run *kvm_run = vcpu->run; |
@@ -4458,11 +4487,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4458 | vect_info = vmx->idt_vectoring_info; | 4487 | vect_info = vmx->idt_vectoring_info; |
4459 | intr_info = vmx->exit_intr_info; | 4488 | intr_info = vmx->exit_intr_info; |
4460 | 4489 | ||
4461 | if (is_machine_check(intr_info)) | 4490 | if (is_machine_check(intr_info) || is_nmi(intr_info)) |
4462 | return handle_machine_check(vcpu); | 4491 | return 1; /* handled by handle_exception_nmi_irqoff() */ |
4463 | |||
4464 | if (is_nmi(intr_info)) | ||
4465 | return 1; /* already handled by vmx_vcpu_run() */ | ||
4466 | 4492 | ||
4467 | if (is_invalid_opcode(intr_info)) | 4493 | if (is_invalid_opcode(intr_info)) |
4468 | return handle_ud(vcpu); | 4494 | return handle_ud(vcpu); |
@@ -4518,7 +4544,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4518 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 4544 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
4519 | if (!(vcpu->guest_debug & | 4545 | if (!(vcpu->guest_debug & |
4520 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { | 4546 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { |
4521 | vcpu->arch.dr6 &= ~15; | 4547 | vcpu->arch.dr6 &= ~DR_TRAP_BITS; |
4522 | vcpu->arch.dr6 |= dr6 | DR6_RTM; | 4548 | vcpu->arch.dr6 |= dr6 | DR6_RTM; |
4523 | if (is_icebp(intr_info)) | 4549 | if (is_icebp(intr_info)) |
4524 | skip_emulated_instruction(vcpu); | 4550 | skip_emulated_instruction(vcpu); |
@@ -4763,7 +4789,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
4763 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; | 4789 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; |
4764 | return 0; | 4790 | return 0; |
4765 | } else { | 4791 | } else { |
4766 | vcpu->arch.dr6 &= ~15; | 4792 | vcpu->arch.dr6 &= ~DR_TRAP_BITS; |
4767 | vcpu->arch.dr6 |= DR6_BD | DR6_RTM; | 4793 | vcpu->arch.dr6 |= DR6_BD | DR6_RTM; |
4768 | kvm_queue_exception(vcpu, DB_VECTOR); | 4794 | kvm_queue_exception(vcpu, DB_VECTOR); |
4769 | return 1; | 4795 | return 1; |
@@ -4771,8 +4797,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
4771 | } | 4797 | } |
4772 | 4798 | ||
4773 | if (vcpu->guest_debug == 0) { | 4799 | if (vcpu->guest_debug == 0) { |
4774 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, | 4800 | exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); |
4775 | CPU_BASED_MOV_DR_EXITING); | ||
4776 | 4801 | ||
4777 | /* | 4802 | /* |
4778 | * No more DR vmexits; force a reload of the debug registers | 4803 | * No more DR vmexits; force a reload of the debug registers |
@@ -4816,7 +4841,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) | |||
4816 | vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); | 4841 | vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); |
4817 | 4842 | ||
4818 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; | 4843 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; |
4819 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING); | 4844 | exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); |
4820 | } | 4845 | } |
4821 | 4846 | ||
4822 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | 4847 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) |
@@ -4876,8 +4901,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) | |||
4876 | 4901 | ||
4877 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) | 4902 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
4878 | { | 4903 | { |
4879 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, | 4904 | exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); |
4880 | CPU_BASED_VIRTUAL_INTR_PENDING); | ||
4881 | 4905 | ||
4882 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 4906 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
4883 | 4907 | ||
@@ -5131,8 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
5131 | static int handle_nmi_window(struct kvm_vcpu *vcpu) | 5155 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
5132 | { | 5156 | { |
5133 | WARN_ON_ONCE(!enable_vnmi); | 5157 | WARN_ON_ONCE(!enable_vnmi); |
5134 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, | 5158 | exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); |
5135 | CPU_BASED_VIRTUAL_NMI_PENDING); | ||
5136 | ++vcpu->stat.nmi_window_exits; | 5159 | ++vcpu->stat.nmi_window_exits; |
5137 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5160 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5138 | 5161 | ||
@@ -5144,7 +5167,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5144 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5167 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5145 | enum emulation_result err = EMULATE_DONE; | 5168 | enum emulation_result err = EMULATE_DONE; |
5146 | int ret = 1; | 5169 | int ret = 1; |
5147 | u32 cpu_exec_ctrl; | ||
5148 | bool intr_window_requested; | 5170 | bool intr_window_requested; |
5149 | unsigned count = 130; | 5171 | unsigned count = 130; |
5150 | 5172 | ||
@@ -5155,8 +5177,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5155 | */ | 5177 | */ |
5156 | WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); | 5178 | WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); |
5157 | 5179 | ||
5158 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 5180 | intr_window_requested = exec_controls_get(vmx) & |
5159 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; | 5181 | CPU_BASED_VIRTUAL_INTR_PENDING; |
5160 | 5182 | ||
5161 | while (vmx->emulation_required && count-- != 0) { | 5183 | while (vmx->emulation_required && count-- != 0) { |
5162 | if (intr_window_requested && vmx_interrupt_allowed(vcpu)) | 5184 | if (intr_window_requested && vmx_interrupt_allowed(vcpu)) |
@@ -5342,7 +5364,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) | |||
5342 | * is read even if it isn't needed (e.g., for type==all) | 5364 | * is read even if it isn't needed (e.g., for type==all) |
5343 | */ | 5365 | */ |
5344 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | 5366 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), |
5345 | vmx_instruction_info, false, &gva)) | 5367 | vmx_instruction_info, false, |
5368 | sizeof(operand), &gva)) | ||
5346 | return 1; | 5369 | return 1; |
5347 | 5370 | ||
5348 | if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { | 5371 | if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { |
@@ -5437,8 +5460,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) | |||
5437 | 5460 | ||
5438 | static int handle_preemption_timer(struct kvm_vcpu *vcpu) | 5461 | static int handle_preemption_timer(struct kvm_vcpu *vcpu) |
5439 | { | 5462 | { |
5440 | if (!to_vmx(vcpu)->req_immediate_exit) | 5463 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5464 | |||
5465 | if (!vmx->req_immediate_exit && | ||
5466 | !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) | ||
5441 | kvm_lapic_expired_hv_timer(vcpu); | 5467 | kvm_lapic_expired_hv_timer(vcpu); |
5468 | |||
5442 | return 1; | 5469 | return 1; |
5443 | } | 5470 | } |
5444 | 5471 | ||
@@ -5469,7 +5496,7 @@ static int handle_encls(struct kvm_vcpu *vcpu) | |||
5469 | * to be done to userspace and return 0. | 5496 | * to be done to userspace and return 0. |
5470 | */ | 5497 | */ |
5471 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | 5498 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { |
5472 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 5499 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, |
5473 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 5500 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
5474 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 5501 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
5475 | [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, | 5502 | [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, |
@@ -5952,6 +5979,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
5952 | 5979 | ||
5953 | void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) | 5980 | void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) |
5954 | { | 5981 | { |
5982 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5955 | u32 sec_exec_control; | 5983 | u32 sec_exec_control; |
5956 | 5984 | ||
5957 | if (!lapic_in_kernel(vcpu)) | 5985 | if (!lapic_in_kernel(vcpu)) |
@@ -5963,11 +5991,11 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) | |||
5963 | 5991 | ||
5964 | /* Postpone execution until vmcs01 is the current VMCS. */ | 5992 | /* Postpone execution until vmcs01 is the current VMCS. */ |
5965 | if (is_guest_mode(vcpu)) { | 5993 | if (is_guest_mode(vcpu)) { |
5966 | to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; | 5994 | vmx->nested.change_vmcs01_virtual_apic_mode = true; |
5967 | return; | 5995 | return; |
5968 | } | 5996 | } |
5969 | 5997 | ||
5970 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 5998 | sec_exec_control = secondary_exec_controls_get(vmx); |
5971 | sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 5999 | sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
5972 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); | 6000 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); |
5973 | 6001 | ||
@@ -5989,7 +6017,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) | |||
5989 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 6017 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
5990 | break; | 6018 | break; |
5991 | } | 6019 | } |
5992 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | 6020 | secondary_exec_controls_set(vmx, sec_exec_control); |
5993 | 6021 | ||
5994 | vmx_update_msr_bitmap(vcpu); | 6022 | vmx_update_msr_bitmap(vcpu); |
5995 | } | 6023 | } |
@@ -6107,76 +6135,81 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) | |||
6107 | memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); | 6135 | memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); |
6108 | } | 6136 | } |
6109 | 6137 | ||
6110 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 6138 | static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) |
6111 | { | 6139 | { |
6112 | u32 exit_intr_info = 0; | 6140 | vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
6113 | u16 basic_exit_reason = (u16)vmx->exit_reason; | ||
6114 | |||
6115 | if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY | ||
6116 | || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI)) | ||
6117 | return; | ||
6118 | |||
6119 | if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) | ||
6120 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
6121 | vmx->exit_intr_info = exit_intr_info; | ||
6122 | 6141 | ||
6123 | /* if exit due to PF check for async PF */ | 6142 | /* if exit due to PF check for async PF */ |
6124 | if (is_page_fault(exit_intr_info)) | 6143 | if (is_page_fault(vmx->exit_intr_info)) |
6125 | vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); | 6144 | vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); |
6126 | 6145 | ||
6127 | /* Handle machine checks before interrupts are enabled */ | 6146 | /* Handle machine checks before interrupts are enabled */ |
6128 | if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY || | 6147 | if (is_machine_check(vmx->exit_intr_info)) |
6129 | is_machine_check(exit_intr_info)) | ||
6130 | kvm_machine_check(); | 6148 | kvm_machine_check(); |
6131 | 6149 | ||
6132 | /* We need to handle NMIs before interrupts are enabled */ | 6150 | /* We need to handle NMIs before interrupts are enabled */ |
6133 | if (is_nmi(exit_intr_info)) { | 6151 | if (is_nmi(vmx->exit_intr_info)) { |
6134 | kvm_before_interrupt(&vmx->vcpu); | 6152 | kvm_before_interrupt(&vmx->vcpu); |
6135 | asm("int $2"); | 6153 | asm("int $2"); |
6136 | kvm_after_interrupt(&vmx->vcpu); | 6154 | kvm_after_interrupt(&vmx->vcpu); |
6137 | } | 6155 | } |
6138 | } | 6156 | } |
6139 | 6157 | ||
6140 | static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | 6158 | static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) |
6141 | { | 6159 | { |
6142 | u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 6160 | unsigned int vector; |
6143 | 6161 | unsigned long entry; | |
6144 | if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) | ||
6145 | == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { | ||
6146 | unsigned int vector; | ||
6147 | unsigned long entry; | ||
6148 | gate_desc *desc; | ||
6149 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6150 | #ifdef CONFIG_X86_64 | 6162 | #ifdef CONFIG_X86_64 |
6151 | unsigned long tmp; | 6163 | unsigned long tmp; |
6152 | #endif | 6164 | #endif |
6165 | gate_desc *desc; | ||
6166 | u32 intr_info; | ||
6167 | |||
6168 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
6169 | if (WARN_ONCE(!is_external_intr(intr_info), | ||
6170 | "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) | ||
6171 | return; | ||
6153 | 6172 | ||
6154 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | 6173 | vector = intr_info & INTR_INFO_VECTOR_MASK; |
6155 | desc = (gate_desc *)vmx->host_idt_base + vector; | 6174 | desc = (gate_desc *)host_idt_base + vector; |
6156 | entry = gate_offset(desc); | 6175 | entry = gate_offset(desc); |
6157 | asm volatile( | 6176 | |
6177 | kvm_before_interrupt(vcpu); | ||
6178 | |||
6179 | asm volatile( | ||
6158 | #ifdef CONFIG_X86_64 | 6180 | #ifdef CONFIG_X86_64 |
6159 | "mov %%" _ASM_SP ", %[sp]\n\t" | 6181 | "mov %%" _ASM_SP ", %[sp]\n\t" |
6160 | "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" | 6182 | "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" |
6161 | "push $%c[ss]\n\t" | 6183 | "push $%c[ss]\n\t" |
6162 | "push %[sp]\n\t" | 6184 | "push %[sp]\n\t" |
6163 | #endif | 6185 | #endif |
6164 | "pushf\n\t" | 6186 | "pushf\n\t" |
6165 | __ASM_SIZE(push) " $%c[cs]\n\t" | 6187 | __ASM_SIZE(push) " $%c[cs]\n\t" |
6166 | CALL_NOSPEC | 6188 | CALL_NOSPEC |
6167 | : | 6189 | : |
6168 | #ifdef CONFIG_X86_64 | 6190 | #ifdef CONFIG_X86_64 |
6169 | [sp]"=&r"(tmp), | 6191 | [sp]"=&r"(tmp), |
6170 | #endif | 6192 | #endif |
6171 | ASM_CALL_CONSTRAINT | 6193 | ASM_CALL_CONSTRAINT |
6172 | : | 6194 | : |
6173 | THUNK_TARGET(entry), | 6195 | THUNK_TARGET(entry), |
6174 | [ss]"i"(__KERNEL_DS), | 6196 | [ss]"i"(__KERNEL_DS), |
6175 | [cs]"i"(__KERNEL_CS) | 6197 | [cs]"i"(__KERNEL_CS) |
6176 | ); | 6198 | ); |
6177 | } | 6199 | |
6200 | kvm_after_interrupt(vcpu); | ||
6201 | } | ||
6202 | STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); | ||
6203 | |||
6204 | static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) | ||
6205 | { | ||
6206 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6207 | |||
6208 | if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) | ||
6209 | handle_external_interrupt_irqoff(vcpu); | ||
6210 | else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) | ||
6211 | handle_exception_nmi_irqoff(vmx); | ||
6178 | } | 6212 | } |
6179 | STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); | ||
6180 | 6213 | ||
6181 | static bool vmx_has_emulated_msr(int index) | 6214 | static bool vmx_has_emulated_msr(int index) |
6182 | { | 6215 | { |
@@ -6187,6 +6220,8 @@ static bool vmx_has_emulated_msr(int index) | |||
6187 | * real mode. | 6220 | * real mode. |
6188 | */ | 6221 | */ |
6189 | return enable_unrestricted_guest || emulate_invalid_guest_state; | 6222 | return enable_unrestricted_guest || emulate_invalid_guest_state; |
6223 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
6224 | return nested; | ||
6190 | case MSR_AMD64_VIRT_SPEC_CTRL: | 6225 | case MSR_AMD64_VIRT_SPEC_CTRL: |
6191 | /* This is AMD only. */ | 6226 | /* This is AMD only. */ |
6192 | return false; | 6227 | return false; |
@@ -6332,15 +6367,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) | |||
6332 | msrs[i].host, false); | 6367 | msrs[i].host, false); |
6333 | } | 6368 | } |
6334 | 6369 | ||
6335 | static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) | ||
6336 | { | ||
6337 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); | ||
6338 | if (!vmx->loaded_vmcs->hv_timer_armed) | ||
6339 | vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, | ||
6340 | PIN_BASED_VMX_PREEMPTION_TIMER); | ||
6341 | vmx->loaded_vmcs->hv_timer_armed = true; | ||
6342 | } | ||
6343 | |||
6344 | static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) | 6370 | static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) |
6345 | { | 6371 | { |
6346 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6372 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -6348,11 +6374,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) | |||
6348 | u32 delta_tsc; | 6374 | u32 delta_tsc; |
6349 | 6375 | ||
6350 | if (vmx->req_immediate_exit) { | 6376 | if (vmx->req_immediate_exit) { |
6351 | vmx_arm_hv_timer(vmx, 0); | 6377 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); |
6352 | return; | 6378 | vmx->loaded_vmcs->hv_timer_soft_disabled = false; |
6353 | } | 6379 | } else if (vmx->hv_deadline_tsc != -1) { |
6354 | |||
6355 | if (vmx->hv_deadline_tsc != -1) { | ||
6356 | tscl = rdtsc(); | 6380 | tscl = rdtsc(); |
6357 | if (vmx->hv_deadline_tsc > tscl) | 6381 | if (vmx->hv_deadline_tsc > tscl) |
6358 | /* set_hv_timer ensures the delta fits in 32-bits */ | 6382 | /* set_hv_timer ensures the delta fits in 32-bits */ |
@@ -6361,14 +6385,12 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) | |||
6361 | else | 6385 | else |
6362 | delta_tsc = 0; | 6386 | delta_tsc = 0; |
6363 | 6387 | ||
6364 | vmx_arm_hv_timer(vmx, delta_tsc); | 6388 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); |
6365 | return; | 6389 | vmx->loaded_vmcs->hv_timer_soft_disabled = false; |
6390 | } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { | ||
6391 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); | ||
6392 | vmx->loaded_vmcs->hv_timer_soft_disabled = true; | ||
6366 | } | 6393 | } |
6367 | |||
6368 | if (vmx->loaded_vmcs->hv_timer_armed) | ||
6369 | vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, | ||
6370 | PIN_BASED_VMX_PREEMPTION_TIMER); | ||
6371 | vmx->loaded_vmcs->hv_timer_armed = false; | ||
6372 | } | 6394 | } |
6373 | 6395 | ||
6374 | void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) | 6396 | void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) |
@@ -6401,8 +6423,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6401 | vmcs_write32(PLE_WINDOW, vmx->ple_window); | 6423 | vmcs_write32(PLE_WINDOW, vmx->ple_window); |
6402 | } | 6424 | } |
6403 | 6425 | ||
6404 | if (vmx->nested.need_vmcs12_sync) | 6426 | if (vmx->nested.need_vmcs12_to_shadow_sync) |
6405 | nested_sync_from_vmcs12(vcpu); | 6427 | nested_sync_vmcs12_to_shadow(vcpu); |
6406 | 6428 | ||
6407 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6429 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
6408 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 6430 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
@@ -6440,7 +6462,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6440 | 6462 | ||
6441 | atomic_switch_perf_msrs(vmx); | 6463 | atomic_switch_perf_msrs(vmx); |
6442 | 6464 | ||
6443 | vmx_update_hv_timer(vcpu); | 6465 | if (enable_preemption_timer) |
6466 | vmx_update_hv_timer(vcpu); | ||
6467 | |||
6468 | if (lapic_in_kernel(vcpu) && | ||
6469 | vcpu->arch.apic->lapic_timer.timer_advance_ns) | ||
6470 | kvm_wait_lapic_expire(vcpu); | ||
6444 | 6471 | ||
6445 | /* | 6472 | /* |
6446 | * If this vCPU has touched SPEC_CTRL, restore the guest's value if | 6473 | * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
@@ -6533,13 +6560,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6533 | vmx->idt_vectoring_info = 0; | 6560 | vmx->idt_vectoring_info = 0; |
6534 | 6561 | ||
6535 | vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); | 6562 | vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); |
6563 | if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) | ||
6564 | kvm_machine_check(); | ||
6565 | |||
6536 | if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) | 6566 | if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) |
6537 | return; | 6567 | return; |
6538 | 6568 | ||
6539 | vmx->loaded_vmcs->launched = 1; | 6569 | vmx->loaded_vmcs->launched = 1; |
6540 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 6570 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
6541 | 6571 | ||
6542 | vmx_complete_atomic_exit(vmx); | ||
6543 | vmx_recover_nmi_blocking(vmx); | 6572 | vmx_recover_nmi_blocking(vmx); |
6544 | vmx_complete_interrupts(vmx); | 6573 | vmx_complete_interrupts(vmx); |
6545 | } | 6574 | } |
@@ -6630,6 +6659,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
6630 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); | 6659 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); |
6631 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); | 6660 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); |
6632 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); | 6661 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); |
6662 | if (kvm_cstate_in_guest(kvm)) { | ||
6663 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); | ||
6664 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); | ||
6665 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); | ||
6666 | vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); | ||
6667 | } | ||
6633 | vmx->msr_bitmap_mode = 0; | 6668 | vmx->msr_bitmap_mode = 0; |
6634 | 6669 | ||
6635 | vmx->loaded_vmcs = &vmx->vmcs01; | 6670 | vmx->loaded_vmcs = &vmx->vmcs01; |
@@ -6726,22 +6761,22 @@ static int vmx_vm_init(struct kvm *kvm) | |||
6726 | return 0; | 6761 | return 0; |
6727 | } | 6762 | } |
6728 | 6763 | ||
6729 | static void __init vmx_check_processor_compat(void *rtn) | 6764 | static int __init vmx_check_processor_compat(void) |
6730 | { | 6765 | { |
6731 | struct vmcs_config vmcs_conf; | 6766 | struct vmcs_config vmcs_conf; |
6732 | struct vmx_capability vmx_cap; | 6767 | struct vmx_capability vmx_cap; |
6733 | 6768 | ||
6734 | *(int *)rtn = 0; | ||
6735 | if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) | 6769 | if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) |
6736 | *(int *)rtn = -EIO; | 6770 | return -EIO; |
6737 | if (nested) | 6771 | if (nested) |
6738 | nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, | 6772 | nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, |
6739 | enable_apicv); | 6773 | enable_apicv); |
6740 | if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { | 6774 | if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { |
6741 | printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", | 6775 | printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", |
6742 | smp_processor_id()); | 6776 | smp_processor_id()); |
6743 | *(int *)rtn = -EIO; | 6777 | return -EIO; |
6744 | } | 6778 | } |
6779 | return 0; | ||
6745 | } | 6780 | } |
6746 | 6781 | ||
6747 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | 6782 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
@@ -6795,7 +6830,7 @@ static int vmx_get_lpage_level(void) | |||
6795 | return PT_PDPE_LEVEL; | 6830 | return PT_PDPE_LEVEL; |
6796 | } | 6831 | } |
6797 | 6832 | ||
6798 | static void vmcs_set_secondary_exec_control(u32 new_ctl) | 6833 | static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) |
6799 | { | 6834 | { |
6800 | /* | 6835 | /* |
6801 | * These bits in the secondary execution controls field | 6836 | * These bits in the secondary execution controls field |
@@ -6809,10 +6844,10 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl) | |||
6809 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 6844 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
6810 | SECONDARY_EXEC_DESC; | 6845 | SECONDARY_EXEC_DESC; |
6811 | 6846 | ||
6812 | u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 6847 | u32 new_ctl = vmx->secondary_exec_control; |
6848 | u32 cur_ctl = secondary_exec_controls_get(vmx); | ||
6813 | 6849 | ||
6814 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 6850 | secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); |
6815 | (new_ctl & ~mask) | (cur_ctl & mask)); | ||
6816 | } | 6851 | } |
6817 | 6852 | ||
6818 | /* | 6853 | /* |
@@ -6950,7 +6985,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
6950 | 6985 | ||
6951 | if (cpu_has_secondary_exec_ctrls()) { | 6986 | if (cpu_has_secondary_exec_ctrls()) { |
6952 | vmx_compute_secondary_exec_control(vmx); | 6987 | vmx_compute_secondary_exec_control(vmx); |
6953 | vmcs_set_secondary_exec_control(vmx->secondary_exec_control); | 6988 | vmcs_set_secondary_exec_control(vmx); |
6954 | } | 6989 | } |
6955 | 6990 | ||
6956 | if (nested_vmx_allowed(vcpu)) | 6991 | if (nested_vmx_allowed(vcpu)) |
@@ -7424,10 +7459,14 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) | |||
7424 | static __init int hardware_setup(void) | 7459 | static __init int hardware_setup(void) |
7425 | { | 7460 | { |
7426 | unsigned long host_bndcfgs; | 7461 | unsigned long host_bndcfgs; |
7462 | struct desc_ptr dt; | ||
7427 | int r, i; | 7463 | int r, i; |
7428 | 7464 | ||
7429 | rdmsrl_safe(MSR_EFER, &host_efer); | 7465 | rdmsrl_safe(MSR_EFER, &host_efer); |
7430 | 7466 | ||
7467 | store_idt(&dt); | ||
7468 | host_idt_base = dt.address; | ||
7469 | |||
7431 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) | 7470 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) |
7432 | kvm_define_shared_msr(i, vmx_msr_index[i]); | 7471 | kvm_define_shared_msr(i, vmx_msr_index[i]); |
7433 | 7472 | ||
@@ -7531,17 +7570,33 @@ static __init int hardware_setup(void) | |||
7531 | } | 7570 | } |
7532 | 7571 | ||
7533 | if (!cpu_has_vmx_preemption_timer()) | 7572 | if (!cpu_has_vmx_preemption_timer()) |
7534 | kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; | 7573 | enable_preemption_timer = false; |
7535 | 7574 | ||
7536 | if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { | 7575 | if (enable_preemption_timer) { |
7576 | u64 use_timer_freq = 5000ULL * 1000 * 1000; | ||
7537 | u64 vmx_msr; | 7577 | u64 vmx_msr; |
7538 | 7578 | ||
7539 | rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); | 7579 | rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); |
7540 | cpu_preemption_timer_multi = | 7580 | cpu_preemption_timer_multi = |
7541 | vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; | 7581 | vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; |
7542 | } else { | 7582 | |
7583 | if (tsc_khz) | ||
7584 | use_timer_freq = (u64)tsc_khz * 1000; | ||
7585 | use_timer_freq >>= cpu_preemption_timer_multi; | ||
7586 | |||
7587 | /* | ||
7588 | * KVM "disables" the preemption timer by setting it to its max | ||
7589 | * value. Don't use the timer if it might cause spurious exits | ||
7590 | * at a rate faster than 0.1 Hz (of uninterrupted guest time). | ||
7591 | */ | ||
7592 | if (use_timer_freq > 0xffffffffu / 10) | ||
7593 | enable_preemption_timer = false; | ||
7594 | } | ||
7595 | |||
7596 | if (!enable_preemption_timer) { | ||
7543 | kvm_x86_ops->set_hv_timer = NULL; | 7597 | kvm_x86_ops->set_hv_timer = NULL; |
7544 | kvm_x86_ops->cancel_hv_timer = NULL; | 7598 | kvm_x86_ops->cancel_hv_timer = NULL; |
7599 | kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; | ||
7545 | } | 7600 | } |
7546 | 7601 | ||
7547 | kvm_set_posted_intr_wakeup_handler(wakeup_handler); | 7602 | kvm_set_posted_intr_wakeup_handler(wakeup_handler); |
@@ -7683,7 +7738,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
7683 | .set_tdp_cr3 = vmx_set_cr3, | 7738 | .set_tdp_cr3 = vmx_set_cr3, |
7684 | 7739 | ||
7685 | .check_intercept = vmx_check_intercept, | 7740 | .check_intercept = vmx_check_intercept, |
7686 | .handle_external_intr = vmx_handle_external_intr, | 7741 | .handle_exit_irqoff = vmx_handle_exit_irqoff, |
7687 | .mpx_supported = vmx_mpx_supported, | 7742 | .mpx_supported = vmx_mpx_supported, |
7688 | .xsaves_supported = vmx_xsaves_supported, | 7743 | .xsaves_supported = vmx_xsaves_supported, |
7689 | .umip_emulated = vmx_umip_emulated, | 7744 | .umip_emulated = vmx_umip_emulated, |
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 61128b48c503..82d0bc3a4d52 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h | |||
@@ -109,14 +109,21 @@ struct nested_vmx { | |||
109 | * to guest memory during VM exit. | 109 | * to guest memory during VM exit. |
110 | */ | 110 | */ |
111 | struct vmcs12 *cached_shadow_vmcs12; | 111 | struct vmcs12 *cached_shadow_vmcs12; |
112 | |||
112 | /* | 113 | /* |
113 | * Indicates if the shadow vmcs or enlightened vmcs must be updated | 114 | * Indicates if the shadow vmcs or enlightened vmcs must be updated |
114 | * with the data held by struct vmcs12. | 115 | * with the data held by struct vmcs12. |
115 | */ | 116 | */ |
116 | bool need_vmcs12_sync; | 117 | bool need_vmcs12_to_shadow_sync; |
117 | bool dirty_vmcs12; | 118 | bool dirty_vmcs12; |
118 | 119 | ||
119 | /* | 120 | /* |
121 | * Indicates lazily loaded guest state has not yet been decached from | ||
122 | * vmcs02. | ||
123 | */ | ||
124 | bool need_sync_vmcs02_to_vmcs12_rare; | ||
125 | |||
126 | /* | ||
120 | * vmcs02 has been initialized, i.e. state that is constant for | 127 | * vmcs02 has been initialized, i.e. state that is constant for |
121 | * vmcs02 has been written to the backing VMCS. Initialization | 128 | * vmcs02 has been written to the backing VMCS. Initialization |
122 | * is delayed until L1 actually attempts to run a nested VM. | 129 | * is delayed until L1 actually attempts to run a nested VM. |
@@ -180,14 +187,24 @@ struct vcpu_vmx { | |||
180 | struct kvm_vcpu vcpu; | 187 | struct kvm_vcpu vcpu; |
181 | u8 fail; | 188 | u8 fail; |
182 | u8 msr_bitmap_mode; | 189 | u8 msr_bitmap_mode; |
190 | |||
191 | /* | ||
192 | * If true, host state has been stored in vmx->loaded_vmcs for | ||
193 | * the CPU registers that only need to be switched when transitioning | ||
194 | * to/from the kernel, and the registers have been loaded with guest | ||
195 | * values. If false, host state is loaded in the CPU registers | ||
196 | * and vmx->loaded_vmcs->host_state is invalid. | ||
197 | */ | ||
198 | bool guest_state_loaded; | ||
199 | |||
183 | u32 exit_intr_info; | 200 | u32 exit_intr_info; |
184 | u32 idt_vectoring_info; | 201 | u32 idt_vectoring_info; |
185 | ulong rflags; | 202 | ulong rflags; |
203 | |||
186 | struct shared_msr_entry *guest_msrs; | 204 | struct shared_msr_entry *guest_msrs; |
187 | int nmsrs; | 205 | int nmsrs; |
188 | int save_nmsrs; | 206 | int save_nmsrs; |
189 | bool guest_msrs_dirty; | 207 | bool guest_msrs_ready; |
190 | unsigned long host_idt_base; | ||
191 | #ifdef CONFIG_X86_64 | 208 | #ifdef CONFIG_X86_64 |
192 | u64 msr_host_kernel_gs_base; | 209 | u64 msr_host_kernel_gs_base; |
193 | u64 msr_guest_kernel_gs_base; | 210 | u64 msr_guest_kernel_gs_base; |
@@ -195,21 +212,15 @@ struct vcpu_vmx { | |||
195 | 212 | ||
196 | u64 spec_ctrl; | 213 | u64 spec_ctrl; |
197 | 214 | ||
198 | u32 vm_entry_controls_shadow; | ||
199 | u32 vm_exit_controls_shadow; | ||
200 | u32 secondary_exec_control; | 215 | u32 secondary_exec_control; |
201 | 216 | ||
202 | /* | 217 | /* |
203 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a | 218 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a |
204 | * non-nested (L1) guest, it always points to vmcs01. For a nested | 219 | * non-nested (L1) guest, it always points to vmcs01. For a nested |
205 | * guest (L2), it points to a different VMCS. loaded_cpu_state points | 220 | * guest (L2), it points to a different VMCS. |
206 | * to the VMCS whose state is loaded into the CPU registers that only | ||
207 | * need to be switched when transitioning to/from the kernel; a NULL | ||
208 | * value indicates that host state is loaded. | ||
209 | */ | 221 | */ |
210 | struct loaded_vmcs vmcs01; | 222 | struct loaded_vmcs vmcs01; |
211 | struct loaded_vmcs *loaded_vmcs; | 223 | struct loaded_vmcs *loaded_vmcs; |
212 | struct loaded_vmcs *loaded_cpu_state; | ||
213 | 224 | ||
214 | struct msr_autoload { | 225 | struct msr_autoload { |
215 | struct vmx_msrs guest; | 226 | struct vmx_msrs guest; |
@@ -260,8 +271,6 @@ struct vcpu_vmx { | |||
260 | 271 | ||
261 | unsigned long host_debugctlmsr; | 272 | unsigned long host_debugctlmsr; |
262 | 273 | ||
263 | u64 msr_ia32_power_ctl; | ||
264 | |||
265 | /* | 274 | /* |
266 | * Only bits masked by msr_ia32_feature_control_valid_bits can be set in | 275 | * Only bits masked by msr_ia32_feature_control_valid_bits can be set in |
267 | * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included | 276 | * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included |
@@ -292,12 +301,14 @@ struct kvm_vmx { | |||
292 | }; | 301 | }; |
293 | 302 | ||
294 | bool nested_vmx_allowed(struct kvm_vcpu *vcpu); | 303 | bool nested_vmx_allowed(struct kvm_vcpu *vcpu); |
304 | void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu); | ||
295 | void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); | 305 | void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); |
296 | void vmx_vcpu_put(struct kvm_vcpu *vcpu); | ||
297 | int allocate_vpid(void); | 306 | int allocate_vpid(void); |
298 | void free_vpid(int vpid); | 307 | void free_vpid(int vpid); |
299 | void vmx_set_constant_host_state(struct vcpu_vmx *vmx); | 308 | void vmx_set_constant_host_state(struct vcpu_vmx *vmx); |
300 | void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); | 309 | void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); |
310 | void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, | ||
311 | unsigned long fs_base, unsigned long gs_base); | ||
301 | int vmx_get_cpl(struct kvm_vcpu *vcpu); | 312 | int vmx_get_cpl(struct kvm_vcpu *vcpu); |
302 | unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); | 313 | unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); |
303 | void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | 314 | void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); |
@@ -376,69 +387,31 @@ static inline u8 vmx_get_rvi(void) | |||
376 | return vmcs_read16(GUEST_INTR_STATUS) & 0xff; | 387 | return vmcs_read16(GUEST_INTR_STATUS) & 0xff; |
377 | } | 388 | } |
378 | 389 | ||
379 | static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx) | 390 | #define BUILD_CONTROLS_SHADOW(lname, uname) \ |
380 | { | 391 | static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ |
381 | vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS); | 392 | { \ |
382 | } | 393 | if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ |
383 | 394 | vmcs_write32(uname, val); \ | |
384 | static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) | 395 | vmx->loaded_vmcs->controls_shadow.lname = val; \ |
385 | { | 396 | } \ |
386 | vmcs_write32(VM_ENTRY_CONTROLS, val); | 397 | } \ |
387 | vmx->vm_entry_controls_shadow = val; | 398 | static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ |
388 | } | 399 | { \ |
389 | 400 | return vmx->loaded_vmcs->controls_shadow.lname; \ | |
390 | static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) | 401 | } \ |
391 | { | 402 | static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ |
392 | if (vmx->vm_entry_controls_shadow != val) | 403 | { \ |
393 | vm_entry_controls_init(vmx, val); | 404 | lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ |
394 | } | 405 | } \ |
395 | 406 | static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \ | |
396 | static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) | 407 | { \ |
397 | { | 408 | lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ |
398 | return vmx->vm_entry_controls_shadow; | ||
399 | } | ||
400 | |||
401 | static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
402 | { | ||
403 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); | ||
404 | } | ||
405 | |||
406 | static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
407 | { | ||
408 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); | ||
409 | } | ||
410 | |||
411 | static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx) | ||
412 | { | ||
413 | vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS); | ||
414 | } | ||
415 | |||
416 | static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) | ||
417 | { | ||
418 | vmcs_write32(VM_EXIT_CONTROLS, val); | ||
419 | vmx->vm_exit_controls_shadow = val; | ||
420 | } | ||
421 | |||
422 | static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) | ||
423 | { | ||
424 | if (vmx->vm_exit_controls_shadow != val) | ||
425 | vm_exit_controls_init(vmx, val); | ||
426 | } | ||
427 | |||
428 | static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) | ||
429 | { | ||
430 | return vmx->vm_exit_controls_shadow; | ||
431 | } | ||
432 | |||
433 | static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
434 | { | ||
435 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); | ||
436 | } | ||
437 | |||
438 | static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
439 | { | ||
440 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); | ||
441 | } | 409 | } |
410 | BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS) | ||
411 | BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS) | ||
412 | BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL) | ||
413 | BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL) | ||
414 | BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL) | ||
442 | 415 | ||
443 | static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) | 416 | static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) |
444 | { | 417 | { |
@@ -468,6 +441,7 @@ static inline u32 vmx_vmexit_ctrl(void) | |||
468 | } | 441 | } |
469 | 442 | ||
470 | u32 vmx_exec_control(struct vcpu_vmx *vmx); | 443 | u32 vmx_exec_control(struct vcpu_vmx *vmx); |
444 | u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx); | ||
471 | 445 | ||
472 | static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) | 446 | static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) |
473 | { | 447 | { |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63bb1ee8258e..4a0b74ecd1de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -717,7 +717,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) | |||
717 | gfn_t gfn; | 717 | gfn_t gfn; |
718 | int r; | 718 | int r; |
719 | 719 | ||
720 | if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu)) | 720 | if (!is_pae_paging(vcpu)) |
721 | return false; | 721 | return false; |
722 | 722 | ||
723 | if (!test_bit(VCPU_EXREG_PDPTR, | 723 | if (!test_bit(VCPU_EXREG_PDPTR, |
@@ -960,8 +960,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
960 | if (is_long_mode(vcpu) && | 960 | if (is_long_mode(vcpu) && |
961 | (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) | 961 | (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) |
962 | return 1; | 962 | return 1; |
963 | else if (is_pae(vcpu) && is_paging(vcpu) && | 963 | else if (is_pae_paging(vcpu) && |
964 | !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) | 964 | !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) |
965 | return 1; | 965 | return 1; |
966 | 966 | ||
967 | kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); | 967 | kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); |
@@ -1174,7 +1174,28 @@ static u32 emulated_msrs[] = { | |||
1174 | MSR_AMD64_VIRT_SPEC_CTRL, | 1174 | MSR_AMD64_VIRT_SPEC_CTRL, |
1175 | MSR_IA32_POWER_CTL, | 1175 | MSR_IA32_POWER_CTL, |
1176 | 1176 | ||
1177 | /* | ||
1178 | * The following list leaves out MSRs whose values are determined | ||
1179 | * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. | ||
1180 | * We always support the "true" VMX control MSRs, even if the host | ||
1181 | * processor does not, so I am putting these registers here rather | ||
1182 | * than in msrs_to_save. | ||
1183 | */ | ||
1184 | MSR_IA32_VMX_BASIC, | ||
1185 | MSR_IA32_VMX_TRUE_PINBASED_CTLS, | ||
1186 | MSR_IA32_VMX_TRUE_PROCBASED_CTLS, | ||
1187 | MSR_IA32_VMX_TRUE_EXIT_CTLS, | ||
1188 | MSR_IA32_VMX_TRUE_ENTRY_CTLS, | ||
1189 | MSR_IA32_VMX_MISC, | ||
1190 | MSR_IA32_VMX_CR0_FIXED0, | ||
1191 | MSR_IA32_VMX_CR4_FIXED0, | ||
1192 | MSR_IA32_VMX_VMCS_ENUM, | ||
1193 | MSR_IA32_VMX_PROCBASED_CTLS2, | ||
1194 | MSR_IA32_VMX_EPT_VPID_CAP, | ||
1195 | MSR_IA32_VMX_VMFUNC, | ||
1196 | |||
1177 | MSR_K7_HWCR, | 1197 | MSR_K7_HWCR, |
1198 | MSR_KVM_POLL_CONTROL, | ||
1178 | }; | 1199 | }; |
1179 | 1200 | ||
1180 | static unsigned num_emulated_msrs; | 1201 | static unsigned num_emulated_msrs; |
@@ -1210,11 +1231,12 @@ static u32 msr_based_features[] = { | |||
1210 | 1231 | ||
1211 | static unsigned int num_msr_based_features; | 1232 | static unsigned int num_msr_based_features; |
1212 | 1233 | ||
1213 | u64 kvm_get_arch_capabilities(void) | 1234 | static u64 kvm_get_arch_capabilities(void) |
1214 | { | 1235 | { |
1215 | u64 data; | 1236 | u64 data = 0; |
1216 | 1237 | ||
1217 | rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); | 1238 | if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) |
1239 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); | ||
1218 | 1240 | ||
1219 | /* | 1241 | /* |
1220 | * If we're doing cache flushes (either "always" or "cond") | 1242 | * If we're doing cache flushes (either "always" or "cond") |
@@ -1230,7 +1252,6 @@ u64 kvm_get_arch_capabilities(void) | |||
1230 | 1252 | ||
1231 | return data; | 1253 | return data; |
1232 | } | 1254 | } |
1233 | EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); | ||
1234 | 1255 | ||
1235 | static int kvm_get_msr_feature(struct kvm_msr_entry *msr) | 1256 | static int kvm_get_msr_feature(struct kvm_msr_entry *msr) |
1236 | { | 1257 | { |
@@ -2545,13 +2566,24 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2545 | } | 2566 | } |
2546 | break; | 2567 | break; |
2547 | case MSR_IA32_MISC_ENABLE: | 2568 | case MSR_IA32_MISC_ENABLE: |
2548 | vcpu->arch.ia32_misc_enable_msr = data; | 2569 | if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) && |
2570 | ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) { | ||
2571 | if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3)) | ||
2572 | return 1; | ||
2573 | vcpu->arch.ia32_misc_enable_msr = data; | ||
2574 | kvm_update_cpuid(vcpu); | ||
2575 | } else { | ||
2576 | vcpu->arch.ia32_misc_enable_msr = data; | ||
2577 | } | ||
2549 | break; | 2578 | break; |
2550 | case MSR_IA32_SMBASE: | 2579 | case MSR_IA32_SMBASE: |
2551 | if (!msr_info->host_initiated) | 2580 | if (!msr_info->host_initiated) |
2552 | return 1; | 2581 | return 1; |
2553 | vcpu->arch.smbase = data; | 2582 | vcpu->arch.smbase = data; |
2554 | break; | 2583 | break; |
2584 | case MSR_IA32_POWER_CTL: | ||
2585 | vcpu->arch.msr_ia32_power_ctl = data; | ||
2586 | break; | ||
2555 | case MSR_IA32_TSC: | 2587 | case MSR_IA32_TSC: |
2556 | kvm_write_tsc(vcpu, msr_info); | 2588 | kvm_write_tsc(vcpu, msr_info); |
2557 | break; | 2589 | break; |
@@ -2626,6 +2658,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2626 | return 1; | 2658 | return 1; |
2627 | break; | 2659 | break; |
2628 | 2660 | ||
2661 | case MSR_KVM_POLL_CONTROL: | ||
2662 | /* only enable bit supported */ | ||
2663 | if (data & (-1ULL << 1)) | ||
2664 | return 1; | ||
2665 | |||
2666 | vcpu->arch.msr_kvm_poll_control = data; | ||
2667 | break; | ||
2668 | |||
2629 | case MSR_IA32_MCG_CTL: | 2669 | case MSR_IA32_MCG_CTL: |
2630 | case MSR_IA32_MCG_STATUS: | 2670 | case MSR_IA32_MCG_STATUS: |
2631 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: | 2671 | case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: |
@@ -2803,6 +2843,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2803 | return 1; | 2843 | return 1; |
2804 | msr_info->data = vcpu->arch.arch_capabilities; | 2844 | msr_info->data = vcpu->arch.arch_capabilities; |
2805 | break; | 2845 | break; |
2846 | case MSR_IA32_POWER_CTL: | ||
2847 | msr_info->data = vcpu->arch.msr_ia32_power_ctl; | ||
2848 | break; | ||
2806 | case MSR_IA32_TSC: | 2849 | case MSR_IA32_TSC: |
2807 | msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; | 2850 | msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; |
2808 | break; | 2851 | break; |
@@ -2875,6 +2918,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2875 | case MSR_KVM_PV_EOI_EN: | 2918 | case MSR_KVM_PV_EOI_EN: |
2876 | msr_info->data = vcpu->arch.pv_eoi.msr_val; | 2919 | msr_info->data = vcpu->arch.pv_eoi.msr_val; |
2877 | break; | 2920 | break; |
2921 | case MSR_KVM_POLL_CONTROL: | ||
2922 | msr_info->data = vcpu->arch.msr_kvm_poll_control; | ||
2923 | break; | ||
2878 | case MSR_IA32_P5_MC_ADDR: | 2924 | case MSR_IA32_P5_MC_ADDR: |
2879 | case MSR_IA32_P5_MC_TYPE: | 2925 | case MSR_IA32_P5_MC_TYPE: |
2880 | case MSR_IA32_MCG_CAP: | 2926 | case MSR_IA32_MCG_CAP: |
@@ -3084,6 +3130,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
3084 | case KVM_CAP_SET_BOOT_CPU_ID: | 3130 | case KVM_CAP_SET_BOOT_CPU_ID: |
3085 | case KVM_CAP_SPLIT_IRQCHIP: | 3131 | case KVM_CAP_SPLIT_IRQCHIP: |
3086 | case KVM_CAP_IMMEDIATE_EXIT: | 3132 | case KVM_CAP_IMMEDIATE_EXIT: |
3133 | case KVM_CAP_PMU_EVENT_FILTER: | ||
3087 | case KVM_CAP_GET_MSR_FEATURES: | 3134 | case KVM_CAP_GET_MSR_FEATURES: |
3088 | case KVM_CAP_MSR_PLATFORM_INFO: | 3135 | case KVM_CAP_MSR_PLATFORM_INFO: |
3089 | case KVM_CAP_EXCEPTION_PAYLOAD: | 3136 | case KVM_CAP_EXCEPTION_PAYLOAD: |
@@ -3096,7 +3143,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
3096 | r = KVM_CLOCK_TSC_STABLE; | 3143 | r = KVM_CLOCK_TSC_STABLE; |
3097 | break; | 3144 | break; |
3098 | case KVM_CAP_X86_DISABLE_EXITS: | 3145 | case KVM_CAP_X86_DISABLE_EXITS: |
3099 | r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE; | 3146 | r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | |
3147 | KVM_X86_DISABLE_EXITS_CSTATE; | ||
3100 | if(kvm_can_mwait_in_guest()) | 3148 | if(kvm_can_mwait_in_guest()) |
3101 | r |= KVM_X86_DISABLE_EXITS_MWAIT; | 3149 | r |= KVM_X86_DISABLE_EXITS_MWAIT; |
3102 | break; | 3150 | break; |
@@ -4613,6 +4661,8 @@ split_irqchip_unlock: | |||
4613 | kvm->arch.hlt_in_guest = true; | 4661 | kvm->arch.hlt_in_guest = true; |
4614 | if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) | 4662 | if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) |
4615 | kvm->arch.pause_in_guest = true; | 4663 | kvm->arch.pause_in_guest = true; |
4664 | if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) | ||
4665 | kvm->arch.cstate_in_guest = true; | ||
4616 | r = 0; | 4666 | r = 0; |
4617 | break; | 4667 | break; |
4618 | case KVM_CAP_MSR_PLATFORM_INFO: | 4668 | case KVM_CAP_MSR_PLATFORM_INFO: |
@@ -4927,6 +4977,9 @@ set_identity_unlock: | |||
4927 | r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); | 4977 | r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); |
4928 | break; | 4978 | break; |
4929 | } | 4979 | } |
4980 | case KVM_SET_PMU_EVENT_FILTER: | ||
4981 | r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); | ||
4982 | break; | ||
4930 | default: | 4983 | default: |
4931 | r = -ENOTTY; | 4984 | r = -ENOTTY; |
4932 | } | 4985 | } |
@@ -6379,7 +6432,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | |||
6379 | vcpu->arch.db); | 6432 | vcpu->arch.db); |
6380 | 6433 | ||
6381 | if (dr6 != 0) { | 6434 | if (dr6 != 0) { |
6382 | vcpu->arch.dr6 &= ~15; | 6435 | vcpu->arch.dr6 &= ~DR_TRAP_BITS; |
6383 | vcpu->arch.dr6 |= dr6 | DR6_RTM; | 6436 | vcpu->arch.dr6 |= dr6 | DR6_RTM; |
6384 | kvm_queue_exception(vcpu, DB_VECTOR); | 6437 | kvm_queue_exception(vcpu, DB_VECTOR); |
6385 | *r = EMULATE_DONE; | 6438 | *r = EMULATE_DONE; |
@@ -6706,7 +6759,7 @@ static void kvm_hyperv_tsc_notifier(void) | |||
6706 | struct kvm_vcpu *vcpu; | 6759 | struct kvm_vcpu *vcpu; |
6707 | int cpu; | 6760 | int cpu; |
6708 | 6761 | ||
6709 | spin_lock(&kvm_lock); | 6762 | mutex_lock(&kvm_lock); |
6710 | list_for_each_entry(kvm, &vm_list, vm_list) | 6763 | list_for_each_entry(kvm, &vm_list, vm_list) |
6711 | kvm_make_mclock_inprogress_request(kvm); | 6764 | kvm_make_mclock_inprogress_request(kvm); |
6712 | 6765 | ||
@@ -6732,7 +6785,7 @@ static void kvm_hyperv_tsc_notifier(void) | |||
6732 | 6785 | ||
6733 | spin_unlock(&ka->pvclock_gtod_sync_lock); | 6786 | spin_unlock(&ka->pvclock_gtod_sync_lock); |
6734 | } | 6787 | } |
6735 | spin_unlock(&kvm_lock); | 6788 | mutex_unlock(&kvm_lock); |
6736 | } | 6789 | } |
6737 | #endif | 6790 | #endif |
6738 | 6791 | ||
@@ -6783,17 +6836,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) | |||
6783 | 6836 | ||
6784 | smp_call_function_single(cpu, tsc_khz_changed, freq, 1); | 6837 | smp_call_function_single(cpu, tsc_khz_changed, freq, 1); |
6785 | 6838 | ||
6786 | spin_lock(&kvm_lock); | 6839 | mutex_lock(&kvm_lock); |
6787 | list_for_each_entry(kvm, &vm_list, vm_list) { | 6840 | list_for_each_entry(kvm, &vm_list, vm_list) { |
6788 | kvm_for_each_vcpu(i, vcpu, kvm) { | 6841 | kvm_for_each_vcpu(i, vcpu, kvm) { |
6789 | if (vcpu->cpu != cpu) | 6842 | if (vcpu->cpu != cpu) |
6790 | continue; | 6843 | continue; |
6791 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 6844 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
6792 | if (vcpu->cpu != smp_processor_id()) | 6845 | if (vcpu->cpu != raw_smp_processor_id()) |
6793 | send_ipi = 1; | 6846 | send_ipi = 1; |
6794 | } | 6847 | } |
6795 | } | 6848 | } |
6796 | spin_unlock(&kvm_lock); | 6849 | mutex_unlock(&kvm_lock); |
6797 | 6850 | ||
6798 | if (freq->old < freq->new && send_ipi) { | 6851 | if (freq->old < freq->new && send_ipi) { |
6799 | /* | 6852 | /* |
@@ -6908,35 +6961,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { | |||
6908 | .handle_intel_pt_intr = kvm_handle_intel_pt_intr, | 6961 | .handle_intel_pt_intr = kvm_handle_intel_pt_intr, |
6909 | }; | 6962 | }; |
6910 | 6963 | ||
6911 | static void kvm_set_mmio_spte_mask(void) | ||
6912 | { | ||
6913 | u64 mask; | ||
6914 | int maxphyaddr = boot_cpu_data.x86_phys_bits; | ||
6915 | |||
6916 | /* | ||
6917 | * Set the reserved bits and the present bit of an paging-structure | ||
6918 | * entry to generate page fault with PFER.RSV = 1. | ||
6919 | */ | ||
6920 | |||
6921 | /* | ||
6922 | * Mask the uppermost physical address bit, which would be reserved as | ||
6923 | * long as the supported physical address width is less than 52. | ||
6924 | */ | ||
6925 | mask = 1ull << 51; | ||
6926 | |||
6927 | /* Set the present bit. */ | ||
6928 | mask |= 1ull; | ||
6929 | |||
6930 | /* | ||
6931 | * If reserved bit is not supported, clear the present bit to disable | ||
6932 | * mmio page fault. | ||
6933 | */ | ||
6934 | if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) | ||
6935 | mask &= ~1ull; | ||
6936 | |||
6937 | kvm_mmu_set_mmio_spte_mask(mask, mask); | ||
6938 | } | ||
6939 | |||
6940 | #ifdef CONFIG_X86_64 | 6964 | #ifdef CONFIG_X86_64 |
6941 | static void pvclock_gtod_update_fn(struct work_struct *work) | 6965 | static void pvclock_gtod_update_fn(struct work_struct *work) |
6942 | { | 6966 | { |
@@ -6945,12 +6969,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) | |||
6945 | struct kvm_vcpu *vcpu; | 6969 | struct kvm_vcpu *vcpu; |
6946 | int i; | 6970 | int i; |
6947 | 6971 | ||
6948 | spin_lock(&kvm_lock); | 6972 | mutex_lock(&kvm_lock); |
6949 | list_for_each_entry(kvm, &vm_list, vm_list) | 6973 | list_for_each_entry(kvm, &vm_list, vm_list) |
6950 | kvm_for_each_vcpu(i, vcpu, kvm) | 6974 | kvm_for_each_vcpu(i, vcpu, kvm) |
6951 | kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); | 6975 | kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); |
6952 | atomic_set(&kvm_guest_has_master_clock, 0); | 6976 | atomic_set(&kvm_guest_has_master_clock, 0); |
6953 | spin_unlock(&kvm_lock); | 6977 | mutex_unlock(&kvm_lock); |
6954 | } | 6978 | } |
6955 | 6979 | ||
6956 | static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); | 6980 | static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); |
@@ -7033,8 +7057,6 @@ int kvm_arch_init(void *opaque) | |||
7033 | if (r) | 7057 | if (r) |
7034 | goto out_free_percpu; | 7058 | goto out_free_percpu; |
7035 | 7059 | ||
7036 | kvm_set_mmio_spte_mask(); | ||
7037 | |||
7038 | kvm_x86_ops = ops; | 7060 | kvm_x86_ops = ops; |
7039 | 7061 | ||
7040 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 7062 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
@@ -7173,6 +7195,23 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) | |||
7173 | kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); | 7195 | kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); |
7174 | } | 7196 | } |
7175 | 7197 | ||
7198 | static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id) | ||
7199 | { | ||
7200 | struct kvm_vcpu *target = NULL; | ||
7201 | struct kvm_apic_map *map; | ||
7202 | |||
7203 | rcu_read_lock(); | ||
7204 | map = rcu_dereference(kvm->arch.apic_map); | ||
7205 | |||
7206 | if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id]) | ||
7207 | target = map->phys_map[dest_id]->vcpu; | ||
7208 | |||
7209 | rcu_read_unlock(); | ||
7210 | |||
7211 | if (target) | ||
7212 | kvm_vcpu_yield_to(target); | ||
7213 | } | ||
7214 | |||
7176 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 7215 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
7177 | { | 7216 | { |
7178 | unsigned long nr, a0, a1, a2, a3, ret; | 7217 | unsigned long nr, a0, a1, a2, a3, ret; |
@@ -7219,6 +7258,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
7219 | case KVM_HC_SEND_IPI: | 7258 | case KVM_HC_SEND_IPI: |
7220 | ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); | 7259 | ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); |
7221 | break; | 7260 | break; |
7261 | case KVM_HC_SCHED_YIELD: | ||
7262 | kvm_sched_yield(vcpu->kvm, a0); | ||
7263 | ret = 0; | ||
7264 | break; | ||
7222 | default: | 7265 | default: |
7223 | ret = -KVM_ENOSYS; | 7266 | ret = -KVM_ENOSYS; |
7224 | break; | 7267 | break; |
@@ -7951,9 +7994,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
7951 | } | 7994 | } |
7952 | 7995 | ||
7953 | trace_kvm_entry(vcpu->vcpu_id); | 7996 | trace_kvm_entry(vcpu->vcpu_id); |
7954 | if (lapic_in_kernel(vcpu) && | ||
7955 | vcpu->arch.apic->lapic_timer.timer_advance_ns) | ||
7956 | wait_lapic_expire(vcpu); | ||
7957 | guest_enter_irqoff(); | 7997 | guest_enter_irqoff(); |
7958 | 7998 | ||
7959 | fpregs_assert_state_consistent(); | 7999 | fpregs_assert_state_consistent(); |
@@ -8002,13 +8042,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
8002 | vcpu->mode = OUTSIDE_GUEST_MODE; | 8042 | vcpu->mode = OUTSIDE_GUEST_MODE; |
8003 | smp_wmb(); | 8043 | smp_wmb(); |
8004 | 8044 | ||
8005 | kvm_before_interrupt(vcpu); | 8045 | kvm_x86_ops->handle_exit_irqoff(vcpu); |
8006 | kvm_x86_ops->handle_external_intr(vcpu); | ||
8007 | kvm_after_interrupt(vcpu); | ||
8008 | 8046 | ||
8047 | /* | ||
8048 | * Consume any pending interrupts, including the possible source of | ||
8049 | * VM-Exit on SVM and any ticks that occur between VM-Exit and now. | ||
8050 | * An instruction is required after local_irq_enable() to fully unblock | ||
8051 | * interrupts on processors that implement an interrupt shadow, the | ||
8052 | * stat.exits increment will do nicely. | ||
8053 | */ | ||
8054 | kvm_before_interrupt(vcpu); | ||
8055 | local_irq_enable(); | ||
8009 | ++vcpu->stat.exits; | 8056 | ++vcpu->stat.exits; |
8057 | local_irq_disable(); | ||
8058 | kvm_after_interrupt(vcpu); | ||
8010 | 8059 | ||
8011 | guest_exit_irqoff(); | 8060 | guest_exit_irqoff(); |
8061 | if (lapic_in_kernel(vcpu)) { | ||
8062 | s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta; | ||
8063 | if (delta != S64_MIN) { | ||
8064 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta); | ||
8065 | vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN; | ||
8066 | } | ||
8067 | } | ||
8012 | 8068 | ||
8013 | local_irq_enable(); | 8069 | local_irq_enable(); |
8014 | preempt_enable(); | 8070 | preempt_enable(); |
@@ -8594,7 +8650,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
8594 | kvm_update_cpuid(vcpu); | 8650 | kvm_update_cpuid(vcpu); |
8595 | 8651 | ||
8596 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 8652 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
8597 | if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) { | 8653 | if (is_pae_paging(vcpu)) { |
8598 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); | 8654 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
8599 | mmu_reset_needed = 1; | 8655 | mmu_reset_needed = 1; |
8600 | } | 8656 | } |
@@ -8875,6 +8931,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
8875 | msr.host_initiated = true; | 8931 | msr.host_initiated = true; |
8876 | kvm_write_tsc(vcpu, &msr); | 8932 | kvm_write_tsc(vcpu, &msr); |
8877 | vcpu_put(vcpu); | 8933 | vcpu_put(vcpu); |
8934 | |||
8935 | /* poll control enabled by default */ | ||
8936 | vcpu->arch.msr_kvm_poll_control = 1; | ||
8937 | |||
8878 | mutex_unlock(&vcpu->mutex); | 8938 | mutex_unlock(&vcpu->mutex); |
8879 | 8939 | ||
8880 | if (!kvmclock_periodic_sync) | 8940 | if (!kvmclock_periodic_sync) |
@@ -9107,9 +9167,9 @@ void kvm_arch_hardware_unsetup(void) | |||
9107 | kvm_x86_ops->hardware_unsetup(); | 9167 | kvm_x86_ops->hardware_unsetup(); |
9108 | } | 9168 | } |
9109 | 9169 | ||
9110 | void kvm_arch_check_processor_compat(void *rtn) | 9170 | int kvm_arch_check_processor_compat(void) |
9111 | { | 9171 | { |
9112 | kvm_x86_ops->check_processor_compatibility(rtn); | 9172 | return kvm_x86_ops->check_processor_compatibility(); |
9113 | } | 9173 | } |
9114 | 9174 | ||
9115 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) | 9175 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) |
@@ -9381,6 +9441,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
9381 | kvm_ioapic_destroy(kvm); | 9441 | kvm_ioapic_destroy(kvm); |
9382 | kvm_free_vcpus(kvm); | 9442 | kvm_free_vcpus(kvm); |
9383 | kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); | 9443 | kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); |
9444 | kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); | ||
9384 | kvm_mmu_uninit_vm(kvm); | 9445 | kvm_mmu_uninit_vm(kvm); |
9385 | kvm_page_track_cleanup(kvm); | 9446 | kvm_page_track_cleanup(kvm); |
9386 | kvm_hv_destroy_vm(kvm); | 9447 | kvm_hv_destroy_vm(kvm); |
@@ -9789,6 +9850,36 @@ static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) | |||
9789 | sizeof(u32)); | 9850 | sizeof(u32)); |
9790 | } | 9851 | } |
9791 | 9852 | ||
9853 | static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) | ||
9854 | { | ||
9855 | if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) | ||
9856 | return false; | ||
9857 | |||
9858 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || | ||
9859 | (vcpu->arch.apf.send_user_only && | ||
9860 | kvm_x86_ops->get_cpl(vcpu) == 0)) | ||
9861 | return false; | ||
9862 | |||
9863 | return true; | ||
9864 | } | ||
9865 | |||
9866 | bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) | ||
9867 | { | ||
9868 | if (unlikely(!lapic_in_kernel(vcpu) || | ||
9869 | kvm_event_needs_reinjection(vcpu) || | ||
9870 | vcpu->arch.exception.pending)) | ||
9871 | return false; | ||
9872 | |||
9873 | if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu)) | ||
9874 | return false; | ||
9875 | |||
9876 | /* | ||
9877 | * If interrupts are off we cannot even use an artificial | ||
9878 | * halt state. | ||
9879 | */ | ||
9880 | return kvm_x86_ops->interrupt_allowed(vcpu); | ||
9881 | } | ||
9882 | |||
9792 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | 9883 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, |
9793 | struct kvm_async_pf *work) | 9884 | struct kvm_async_pf *work) |
9794 | { | 9885 | { |
@@ -9797,11 +9888,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | |||
9797 | trace_kvm_async_pf_not_present(work->arch.token, work->gva); | 9888 | trace_kvm_async_pf_not_present(work->arch.token, work->gva); |
9798 | kvm_add_async_pf_gfn(vcpu, work->arch.gfn); | 9889 | kvm_add_async_pf_gfn(vcpu, work->arch.gfn); |
9799 | 9890 | ||
9800 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || | 9891 | if (kvm_can_deliver_async_pf(vcpu) && |
9801 | (vcpu->arch.apf.send_user_only && | 9892 | !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { |
9802 | kvm_x86_ops->get_cpl(vcpu) == 0)) | ||
9803 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
9804 | else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { | ||
9805 | fault.vector = PF_VECTOR; | 9893 | fault.vector = PF_VECTOR; |
9806 | fault.error_code_valid = true; | 9894 | fault.error_code_valid = true; |
9807 | fault.error_code = 0; | 9895 | fault.error_code = 0; |
@@ -9809,6 +9897,16 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | |||
9809 | fault.address = work->arch.token; | 9897 | fault.address = work->arch.token; |
9810 | fault.async_page_fault = true; | 9898 | fault.async_page_fault = true; |
9811 | kvm_inject_page_fault(vcpu, &fault); | 9899 | kvm_inject_page_fault(vcpu, &fault); |
9900 | } else { | ||
9901 | /* | ||
9902 | * It is not possible to deliver a paravirtualized asynchronous | ||
9903 | * page fault, but putting the guest in an artificial halt state | ||
9904 | * can be beneficial nevertheless: if an interrupt arrives, we | ||
9905 | * can deliver it timely and perhaps the guest will schedule | ||
9906 | * another process. When the instruction that triggered a page | ||
9907 | * fault is retried, hopefully the page will be ready in the host. | ||
9908 | */ | ||
9909 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
9812 | } | 9910 | } |
9813 | } | 9911 | } |
9814 | 9912 | ||
@@ -9949,6 +10047,13 @@ bool kvm_vector_hashing_enabled(void) | |||
9949 | } | 10047 | } |
9950 | EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); | 10048 | EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); |
9951 | 10049 | ||
10050 | bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) | ||
10051 | { | ||
10052 | return (vcpu->arch.msr_kvm_poll_control & 1) == 0; | ||
10053 | } | ||
10054 | EXPORT_SYMBOL_GPL(kvm_arch_no_poll); | ||
10055 | |||
10056 | |||
9952 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 10057 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
9953 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); | 10058 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); |
9954 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 10059 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a470ff0868c5..e08a12892e8b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -139,6 +139,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
139 | return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG)); | 139 | return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG)); |
140 | } | 140 | } |
141 | 141 | ||
142 | static inline bool is_pae_paging(struct kvm_vcpu *vcpu) | ||
143 | { | ||
144 | return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu); | ||
145 | } | ||
146 | |||
142 | static inline u32 bit(int bitno) | 147 | static inline u32 bit(int bitno) |
143 | { | 148 | { |
144 | return 1 << (bitno & 31); | 149 | return 1 << (bitno & 31); |
@@ -333,6 +338,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm) | |||
333 | return kvm->arch.pause_in_guest; | 338 | return kvm->arch.pause_in_guest; |
334 | } | 339 | } |
335 | 340 | ||
341 | static inline bool kvm_cstate_in_guest(struct kvm *kvm) | ||
342 | { | ||
343 | return kvm->arch.cstate_in_guest; | ||
344 | } | ||
345 | |||
336 | DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 346 | DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
337 | 347 | ||
338 | static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) | 348 | static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) |