diff options
author | Gleb Natapov <gleb@redhat.com> | 2010-10-14 05:22:46 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-01-12 04:21:39 -0500 |
commit | af585b921e5d1e919947c4b1164b59507fe7cd7b (patch) | |
tree | d0d4cc753d4d58934c5986733d7340fe69e523de /arch/x86/kvm/x86.c | |
parent | 010c520e20413dfd567d568aba2b7238acd37e33 (diff) |
KVM: Halt vcpu if page it tries to access is swapped out
If a guest accesses swapped out memory do not swap it in from vcpu thread
context. Schedule work to do swapping and put vcpu into halted state
instead.
Interrupts will still be delivered to the guest and if interrupt will
cause reschedule guest will continue to run another task.
[avi: remove call to get_user_pages_noio(), nacked by Linus; this
makes everything synchrnous again]
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 112 |
1 files changed, 109 insertions, 3 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c05d47701292..3cd4d091c2f3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/perf_event.h> | 44 | #include <linux/perf_event.h> |
45 | #include <linux/uaccess.h> | 45 | #include <linux/uaccess.h> |
46 | #include <linux/hash.h> | ||
46 | #include <trace/events/kvm.h> | 47 | #include <trace/events/kvm.h> |
47 | 48 | ||
48 | #define CREATE_TRACE_POINTS | 49 | #define CREATE_TRACE_POINTS |
@@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
155 | 156 | ||
156 | u64 __read_mostly host_xcr0; | 157 | u64 __read_mostly host_xcr0; |
157 | 158 | ||
159 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | ||
160 | { | ||
161 | int i; | ||
162 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++) | ||
163 | vcpu->arch.apf.gfns[i] = ~0; | ||
164 | } | ||
165 | |||
158 | static void kvm_on_user_return(struct user_return_notifier *urn) | 166 | static void kvm_on_user_return(struct user_return_notifier *urn) |
159 | { | 167 | { |
160 | unsigned slot; | 168 | unsigned slot; |
@@ -5115,6 +5123,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5115 | vcpu->fpu_active = 0; | 5123 | vcpu->fpu_active = 0; |
5116 | kvm_x86_ops->fpu_deactivate(vcpu); | 5124 | kvm_x86_ops->fpu_deactivate(vcpu); |
5117 | } | 5125 | } |
5126 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { | ||
5127 | /* Page is swapped out. Do synthetic halt */ | ||
5128 | vcpu->arch.apf.halted = true; | ||
5129 | r = 1; | ||
5130 | goto out; | ||
5131 | } | ||
5118 | } | 5132 | } |
5119 | 5133 | ||
5120 | r = kvm_mmu_reload(vcpu); | 5134 | r = kvm_mmu_reload(vcpu); |
@@ -5243,7 +5257,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5243 | 5257 | ||
5244 | r = 1; | 5258 | r = 1; |
5245 | while (r > 0) { | 5259 | while (r > 0) { |
5246 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 5260 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
5261 | !vcpu->arch.apf.halted) | ||
5247 | r = vcpu_enter_guest(vcpu); | 5262 | r = vcpu_enter_guest(vcpu); |
5248 | else { | 5263 | else { |
5249 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 5264 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
@@ -5256,6 +5271,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5256 | vcpu->arch.mp_state = | 5271 | vcpu->arch.mp_state = |
5257 | KVM_MP_STATE_RUNNABLE; | 5272 | KVM_MP_STATE_RUNNABLE; |
5258 | case KVM_MP_STATE_RUNNABLE: | 5273 | case KVM_MP_STATE_RUNNABLE: |
5274 | vcpu->arch.apf.halted = false; | ||
5259 | break; | 5275 | break; |
5260 | case KVM_MP_STATE_SIPI_RECEIVED: | 5276 | case KVM_MP_STATE_SIPI_RECEIVED: |
5261 | default: | 5277 | default: |
@@ -5277,6 +5293,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5277 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5293 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
5278 | ++vcpu->stat.request_irq_exits; | 5294 | ++vcpu->stat.request_irq_exits; |
5279 | } | 5295 | } |
5296 | |||
5297 | kvm_check_async_pf_completion(vcpu); | ||
5298 | |||
5280 | if (signal_pending(current)) { | 5299 | if (signal_pending(current)) { |
5281 | r = -EINTR; | 5300 | r = -EINTR; |
5282 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5301 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
@@ -5792,6 +5811,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5792 | 5811 | ||
5793 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5812 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5794 | 5813 | ||
5814 | kvm_clear_async_pf_completion_queue(vcpu); | ||
5815 | kvm_async_pf_hash_reset(vcpu); | ||
5816 | vcpu->arch.apf.halted = false; | ||
5817 | |||
5795 | return kvm_x86_ops->vcpu_reset(vcpu); | 5818 | return kvm_x86_ops->vcpu_reset(vcpu); |
5796 | } | 5819 | } |
5797 | 5820 | ||
@@ -5880,6 +5903,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5880 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 5903 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) |
5881 | goto fail_free_mce_banks; | 5904 | goto fail_free_mce_banks; |
5882 | 5905 | ||
5906 | kvm_async_pf_hash_reset(vcpu); | ||
5907 | |||
5883 | return 0; | 5908 | return 0; |
5884 | fail_free_mce_banks: | 5909 | fail_free_mce_banks: |
5885 | kfree(vcpu->arch.mce_banks); | 5910 | kfree(vcpu->arch.mce_banks); |
@@ -5938,8 +5963,10 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
5938 | /* | 5963 | /* |
5939 | * Unpin any mmu pages first. | 5964 | * Unpin any mmu pages first. |
5940 | */ | 5965 | */ |
5941 | kvm_for_each_vcpu(i, vcpu, kvm) | 5966 | kvm_for_each_vcpu(i, vcpu, kvm) { |
5967 | kvm_clear_async_pf_completion_queue(vcpu); | ||
5942 | kvm_unload_vcpu_mmu(vcpu); | 5968 | kvm_unload_vcpu_mmu(vcpu); |
5969 | } | ||
5943 | kvm_for_each_vcpu(i, vcpu, kvm) | 5970 | kvm_for_each_vcpu(i, vcpu, kvm) |
5944 | kvm_arch_vcpu_free(vcpu); | 5971 | kvm_arch_vcpu_free(vcpu); |
5945 | 5972 | ||
@@ -6050,7 +6077,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
6050 | 6077 | ||
6051 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 6078 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
6052 | { | 6079 | { |
6053 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE | 6080 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6081 | !vcpu->arch.apf.halted) | ||
6082 | || !list_empty_careful(&vcpu->async_pf.done) | ||
6054 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 6083 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
6055 | || vcpu->arch.nmi_pending || | 6084 | || vcpu->arch.nmi_pending || |
6056 | (kvm_arch_interrupt_allowed(vcpu) && | 6085 | (kvm_arch_interrupt_allowed(vcpu) && |
@@ -6109,6 +6138,83 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | |||
6109 | } | 6138 | } |
6110 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 6139 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
6111 | 6140 | ||
6141 | static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) | ||
6142 | { | ||
6143 | return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU)); | ||
6144 | } | ||
6145 | |||
6146 | static inline u32 kvm_async_pf_next_probe(u32 key) | ||
6147 | { | ||
6148 | return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1); | ||
6149 | } | ||
6150 | |||
6151 | static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6152 | { | ||
6153 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6154 | |||
6155 | while (vcpu->arch.apf.gfns[key] != ~0) | ||
6156 | key = kvm_async_pf_next_probe(key); | ||
6157 | |||
6158 | vcpu->arch.apf.gfns[key] = gfn; | ||
6159 | } | ||
6160 | |||
6161 | static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6162 | { | ||
6163 | int i; | ||
6164 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6165 | |||
6166 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) && | ||
6167 | (vcpu->arch.apf.gfns[key] != gfn || | ||
6168 | vcpu->arch.apf.gfns[key] == ~0); i++) | ||
6169 | key = kvm_async_pf_next_probe(key); | ||
6170 | |||
6171 | return key; | ||
6172 | } | ||
6173 | |||
6174 | bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6175 | { | ||
6176 | return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn; | ||
6177 | } | ||
6178 | |||
6179 | static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6180 | { | ||
6181 | u32 i, j, k; | ||
6182 | |||
6183 | i = j = kvm_async_pf_gfn_slot(vcpu, gfn); | ||
6184 | while (true) { | ||
6185 | vcpu->arch.apf.gfns[i] = ~0; | ||
6186 | do { | ||
6187 | j = kvm_async_pf_next_probe(j); | ||
6188 | if (vcpu->arch.apf.gfns[j] == ~0) | ||
6189 | return; | ||
6190 | k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]); | ||
6191 | /* | ||
6192 | * k lies cyclically in ]i,j] | ||
6193 | * | i.k.j | | ||
6194 | * |....j i.k.| or |.k..j i...| | ||
6195 | */ | ||
6196 | } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j)); | ||
6197 | vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j]; | ||
6198 | i = j; | ||
6199 | } | ||
6200 | } | ||
6201 | |||
6202 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | ||
6203 | struct kvm_async_pf *work) | ||
6204 | { | ||
6205 | trace_kvm_async_pf_not_present(work->gva); | ||
6206 | |||
6207 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
6208 | kvm_add_async_pf_gfn(vcpu, work->arch.gfn); | ||
6209 | } | ||
6210 | |||
6211 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | ||
6212 | struct kvm_async_pf *work) | ||
6213 | { | ||
6214 | trace_kvm_async_pf_ready(work->gva); | ||
6215 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); | ||
6216 | } | ||
6217 | |||
6112 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 6218 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
6113 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 6219 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
6114 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 6220 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |