aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSean Christopherson <sean.j.christopherson@intel.com>2018-08-27 18:21:12 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2018-09-19 18:51:42 -0400
commitd264ee0c2ed20c6a426663590d4fc7a36cb6abd7 (patch)
tree7435ea3691a720a98b3bfcb08395787e52f48a8e
parentf459a707ed313f110e4939d634317edcf9e96774 (diff)
KVM: VMX: use preemption timer to force immediate VMExit
A VMX preemption timer value of '0' is guaranteed to cause a VMExit prior to the CPU executing any instructions in the guest. Use the preemption timer (if it's supported) to trigger immediate VMExit in place of the current method of sending a self-IPI. This ensures that pending VMExit injection to L1 occurs prior to executing any instructions in the guest (regardless of nesting level). When deferring VMExit injection, KVM generates an immediate VMExit from the (possibly nested) guest by sending itself an IPI. Because hardware interrupts are blocked prior to VMEnter and are unblocked (in hardware) after VMEnter, this results in taking a VMExit(INTR) before any guest instruction is executed. But, as this approach relies on the IPI being received before VMEnter executes, it only works as intended when KVM is running as L0. Because there are no architectural guarantees regarding when IPIs are delivered, when running nested the INTR may "arrive" long after L2 is running e.g. L0 KVM doesn't force an immediate switch to L1 to deliver an INTR. For the most part, this unintended delay is not an issue since the events being injected to L1 also do not have architectural guarantees regarding their timing. The notable exception is the VMX preemption timer[1], which is architecturally guaranteed to cause a VMExit prior to executing any instructions in the guest if the timer value is '0' at VMEnter. Specifically, the delay in injecting the VMExit causes the preemption timer KVM unit test to fail when run in a nested guest. Note: this approach is viable even on CPUs with a broken preemption timer, as broken in this context only means the timer counts at the wrong rate. There are no known errata affecting timer value of '0'. [1] I/O SMIs also have guarantees on when they arrive, but I have no idea if/how those are emulated in KVM. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> [Use a hook for SVM instead of leaving the default in x86.c - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c21
-rw-r--r--arch/x86/kvm/x86.c8
4 files changed, 31 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e90488c3d56..bffb25b50425 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1055,6 +1055,7 @@ struct kvm_x86_ops {
1055 bool (*umip_emulated)(void); 1055 bool (*umip_emulated)(void);
1056 1056
1057 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); 1057 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
1058 void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
1058 1059
1059 void (*sched_in)(struct kvm_vcpu *kvm, int cpu); 1060 void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
1060 1061
@@ -1482,6 +1483,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
1482 1483
1483int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); 1484int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
1484int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); 1485int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
1486void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
1485 1487
1486int kvm_is_in_guest(void); 1488int kvm_is_in_guest(void);
1487 1489
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c7f1c3fd782d..d96092b35936 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -7148,6 +7148,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
7148 .check_intercept = svm_check_intercept, 7148 .check_intercept = svm_check_intercept,
7149 .handle_external_intr = svm_handle_external_intr, 7149 .handle_external_intr = svm_handle_external_intr,
7150 7150
7151 .request_immediate_exit = __kvm_request_immediate_exit,
7152
7151 .sched_in = svm_sched_in, 7153 .sched_in = svm_sched_in,
7152 7154
7153 .pmu_ops = &amd_pmu_ops, 7155 .pmu_ops = &amd_pmu_ops,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 62670b2f6d48..a4a1585f47f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1020,6 +1020,8 @@ struct vcpu_vmx {
1020 int ple_window; 1020 int ple_window;
1021 bool ple_window_dirty; 1021 bool ple_window_dirty;
1022 1022
1023 bool req_immediate_exit;
1024
1023 /* Support for PML */ 1025 /* Support for PML */
1024#define PML_ENTITY_NUM 512 1026#define PML_ENTITY_NUM 512
1025 struct page *pml_pg; 1027 struct page *pml_pg;
@@ -2865,6 +2867,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
2865 u16 fs_sel, gs_sel; 2867 u16 fs_sel, gs_sel;
2866 int i; 2868 int i;
2867 2869
2870 vmx->req_immediate_exit = false;
2871
2868 if (vmx->loaded_cpu_state) 2872 if (vmx->loaded_cpu_state)
2869 return; 2873 return;
2870 2874
@@ -7967,6 +7971,9 @@ static __init int hardware_setup(void)
7967 kvm_x86_ops->enable_log_dirty_pt_masked = NULL; 7971 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
7968 } 7972 }
7969 7973
7974 if (!cpu_has_vmx_preemption_timer())
7975 kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
7976
7970 if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { 7977 if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
7971 u64 vmx_msr; 7978 u64 vmx_msr;
7972 7979
@@ -9209,7 +9216,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
9209 9216
9210static int handle_preemption_timer(struct kvm_vcpu *vcpu) 9217static int handle_preemption_timer(struct kvm_vcpu *vcpu)
9211{ 9218{
9212 kvm_lapic_expired_hv_timer(vcpu); 9219 if (!to_vmx(vcpu)->req_immediate_exit)
9220 kvm_lapic_expired_hv_timer(vcpu);
9213 return 1; 9221 return 1;
9214} 9222}
9215 9223
@@ -10611,6 +10619,11 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
10611 u64 tscl; 10619 u64 tscl;
10612 u32 delta_tsc; 10620 u32 delta_tsc;
10613 10621
10622 if (vmx->req_immediate_exit) {
10623 vmx_arm_hv_timer(vmx, 0);
10624 return;
10625 }
10626
10614 if (vmx->hv_deadline_tsc != -1) { 10627 if (vmx->hv_deadline_tsc != -1) {
10615 tscl = rdtsc(); 10628 tscl = rdtsc();
10616 if (vmx->hv_deadline_tsc > tscl) 10629 if (vmx->hv_deadline_tsc > tscl)
@@ -12879,6 +12892,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
12879 return 0; 12892 return 0;
12880} 12893}
12881 12894
12895static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
12896{
12897 to_vmx(vcpu)->req_immediate_exit = true;
12898}
12899
12882static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) 12900static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
12883{ 12901{
12884 ktime_t remaining = 12902 ktime_t remaining =
@@ -14135,6 +14153,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
14135 .umip_emulated = vmx_umip_emulated, 14153 .umip_emulated = vmx_umip_emulated,
14136 14154
14137 .check_nested_events = vmx_check_nested_events, 14155 .check_nested_events = vmx_check_nested_events,
14156 .request_immediate_exit = vmx_request_immediate_exit,
14138 14157
14139 .sched_in = vmx_sched_in, 14158 .sched_in = vmx_sched_in,
14140 14159
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5c870203737f..9d0fda9056de 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7361,6 +7361,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
7361} 7361}
7362EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); 7362EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
7363 7363
7364void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
7365{
7366 smp_send_reschedule(vcpu->cpu);
7367}
7368EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
7369
7364/* 7370/*
7365 * Returns 1 to let vcpu_run() continue the guest execution loop without 7371 * Returns 1 to let vcpu_run() continue the guest execution loop without
7366 * exiting to the userspace. Otherwise, the value will be returned to the 7372 * exiting to the userspace. Otherwise, the value will be returned to the
@@ -7565,7 +7571,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
7565 7571
7566 if (req_immediate_exit) { 7572 if (req_immediate_exit) {
7567 kvm_make_request(KVM_REQ_EVENT, vcpu); 7573 kvm_make_request(KVM_REQ_EVENT, vcpu);
7568 smp_send_reschedule(vcpu->cpu); 7574 kvm_x86_ops->request_immediate_exit(vcpu);
7569 } 7575 }
7570 7576
7571 trace_kvm_entry(vcpu->vcpu_id); 7577 trace_kvm_entry(vcpu->vcpu_id);