aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorGleb Natapov <gleb@redhat.com>2010-10-14 05:22:46 -0400
committerAvi Kivity <avi@redhat.com>2011-01-12 04:21:39 -0500
commitaf585b921e5d1e919947c4b1164b59507fe7cd7b (patch)
treed0d4cc753d4d58934c5986733d7340fe69e523de /arch
parent010c520e20413dfd567d568aba2b7238acd37e33 (diff)
KVM: Halt vcpu if page it tries to access is swapped out
If a guest accesses swapped out memory do not swap it in from vcpu thread context. Schedule work to do swapping and put vcpu into halted state instead. Interrupts will still be delivered to the guest and if interrupt will cause reschedule guest will continue to run another task. [avi: remove call to get_user_pages_noio(), nacked by Linus; this makes everything synchrnous again] Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile1
-rw-r--r--arch/x86/kvm/mmu.c52
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/x86.c112
6 files changed, 183 insertions, 5 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f702f82aa1eb..b5f4c1a36d65 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -83,11 +83,14 @@
83#define KVM_NR_FIXED_MTRR_REGION 88 83#define KVM_NR_FIXED_MTRR_REGION 88
84#define KVM_NR_VAR_MTRR 8 84#define KVM_NR_VAR_MTRR 8
85 85
86#define ASYNC_PF_PER_VCPU 64
87
86extern spinlock_t kvm_lock; 88extern spinlock_t kvm_lock;
87extern struct list_head vm_list; 89extern struct list_head vm_list;
88 90
89struct kvm_vcpu; 91struct kvm_vcpu;
90struct kvm; 92struct kvm;
93struct kvm_async_pf;
91 94
92enum kvm_reg { 95enum kvm_reg {
93 VCPU_REGS_RAX = 0, 96 VCPU_REGS_RAX = 0,
@@ -412,6 +415,11 @@ struct kvm_vcpu_arch {
412 u64 hv_vapic; 415 u64 hv_vapic;
413 416
414 cpumask_var_t wbinvd_dirty_mask; 417 cpumask_var_t wbinvd_dirty_mask;
418
419 struct {
420 bool halted;
421 gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
422 } apf;
415}; 423};
416 424
417struct kvm_arch { 425struct kvm_arch {
@@ -585,6 +593,10 @@ struct kvm_x86_ops {
585 const struct trace_print_flags *exit_reasons_str; 593 const struct trace_print_flags *exit_reasons_str;
586}; 594};
587 595
596struct kvm_arch_async_pf {
597 gfn_t gfn;
598};
599
588extern struct kvm_x86_ops *kvm_x86_ops; 600extern struct kvm_x86_ops *kvm_x86_ops;
589 601
590int kvm_mmu_module_init(void); 602int kvm_mmu_module_init(void);
@@ -799,4 +811,10 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
799 811
800bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 812bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
801 813
814void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
815 struct kvm_async_pf *work);
816void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
817 struct kvm_async_pf *work);
818extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
819
802#endif /* _ASM_X86_KVM_HOST_H */ 820#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ddc131ff438f..50f63648ce1b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
28 select HAVE_KVM_IRQCHIP 28 select HAVE_KVM_IRQCHIP
29 select HAVE_KVM_EVENTFD 29 select HAVE_KVM_EVENTFD
30 select KVM_APIC_ARCHITECTURE 30 select KVM_APIC_ARCHITECTURE
31 select KVM_ASYNC_PF
31 select USER_RETURN_NOTIFIER 32 select USER_RETURN_NOTIFIER
32 select KVM_MMIO 33 select KVM_MMIO
33 ---help--- 34 ---help---
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 31a7035c4bd9..c53bf19b1da0 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,6 +9,7 @@ kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
9 coalesced_mmio.o irq_comm.o eventfd.o \ 9 coalesced_mmio.o irq_comm.o eventfd.o \
10 assigned-dev.o) 10 assigned-dev.o)
11kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) 11kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
12kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
12 13
13kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ 14kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
14 i8254.o timer.o 15 i8254.o timer.o
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fbb04aee8301..4ab04de5a76a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,9 +18,11 @@
18 * 18 *
19 */ 19 */
20 20
21#include "irq.h"
21#include "mmu.h" 22#include "mmu.h"
22#include "x86.h" 23#include "x86.h"
23#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include "x86.h"
24 26
25#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
26#include <linux/types.h> 28#include <linux/types.h>
@@ -2587,6 +2589,50 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
2587 error_code & PFERR_WRITE_MASK, gfn); 2589 error_code & PFERR_WRITE_MASK, gfn);
2588} 2590}
2589 2591
2592int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
2593{
2594 struct kvm_arch_async_pf arch;
2595 arch.gfn = gfn;
2596
2597 return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
2598}
2599
2600static bool can_do_async_pf(struct kvm_vcpu *vcpu)
2601{
2602 if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
2603 kvm_event_needs_reinjection(vcpu)))
2604 return false;
2605
2606 return kvm_x86_ops->interrupt_allowed(vcpu);
2607}
2608
2609static bool try_async_pf(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
2610 pfn_t *pfn)
2611{
2612 bool async;
2613
2614 *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);
2615
2616 if (!async)
2617 return false; /* *pfn has correct page already */
2618
2619 put_page(pfn_to_page(*pfn));
2620
2621 if (can_do_async_pf(vcpu)) {
2622 trace_kvm_try_async_get_page(async, *pfn);
2623 if (kvm_find_async_pf_gfn(vcpu, gfn)) {
2624 trace_kvm_async_pf_doublefault(gva, gfn);
2625 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
2626 return true;
2627 } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
2628 return true;
2629 }
2630
2631 *pfn = gfn_to_pfn(vcpu->kvm, gfn);
2632
2633 return false;
2634}
2635
2590static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, 2636static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
2591 u32 error_code) 2637 u32 error_code)
2592{ 2638{
@@ -2609,7 +2655,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
2609 2655
2610 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2656 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2611 smp_rmb(); 2657 smp_rmb();
2612 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2658
2659 if (try_async_pf(vcpu, gfn, gpa, &pfn))
2660 return 0;
2661
2662 /* mmio */
2613 if (is_error_pfn(pfn)) 2663 if (is_error_pfn(pfn))
2614 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); 2664 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
2615 spin_lock(&vcpu->kvm->mmu_lock); 2665 spin_lock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index cd7a833a3b52..c45376dd041a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -568,7 +568,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
568 568
569 mmu_seq = vcpu->kvm->mmu_notifier_seq; 569 mmu_seq = vcpu->kvm->mmu_notifier_seq;
570 smp_rmb(); 570 smp_rmb();
571 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 571
572 if (try_async_pf(vcpu, walker.gfn, addr, &pfn))
573 return 0;
572 574
573 /* mmio */ 575 /* mmio */
574 if (is_error_pfn(pfn)) 576 if (is_error_pfn(pfn))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c05d47701292..3cd4d091c2f3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/perf_event.h> 44#include <linux/perf_event.h>
45#include <linux/uaccess.h> 45#include <linux/uaccess.h>
46#include <linux/hash.h>
46#include <trace/events/kvm.h> 47#include <trace/events/kvm.h>
47 48
48#define CREATE_TRACE_POINTS 49#define CREATE_TRACE_POINTS
@@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155 156
156u64 __read_mostly host_xcr0; 157u64 __read_mostly host_xcr0;
157 158
159static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
160{
161 int i;
162 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
163 vcpu->arch.apf.gfns[i] = ~0;
164}
165
158static void kvm_on_user_return(struct user_return_notifier *urn) 166static void kvm_on_user_return(struct user_return_notifier *urn)
159{ 167{
160 unsigned slot; 168 unsigned slot;
@@ -5115,6 +5123,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5115 vcpu->fpu_active = 0; 5123 vcpu->fpu_active = 0;
5116 kvm_x86_ops->fpu_deactivate(vcpu); 5124 kvm_x86_ops->fpu_deactivate(vcpu);
5117 } 5125 }
5126 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5127 /* Page is swapped out. Do synthetic halt */
5128 vcpu->arch.apf.halted = true;
5129 r = 1;
5130 goto out;
5131 }
5118 } 5132 }
5119 5133
5120 r = kvm_mmu_reload(vcpu); 5134 r = kvm_mmu_reload(vcpu);
@@ -5243,7 +5257,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5243 5257
5244 r = 1; 5258 r = 1;
5245 while (r > 0) { 5259 while (r > 0) {
5246 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 5260 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5261 !vcpu->arch.apf.halted)
5247 r = vcpu_enter_guest(vcpu); 5262 r = vcpu_enter_guest(vcpu);
5248 else { 5263 else {
5249 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 5264 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -5256,6 +5271,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5256 vcpu->arch.mp_state = 5271 vcpu->arch.mp_state =
5257 KVM_MP_STATE_RUNNABLE; 5272 KVM_MP_STATE_RUNNABLE;
5258 case KVM_MP_STATE_RUNNABLE: 5273 case KVM_MP_STATE_RUNNABLE:
5274 vcpu->arch.apf.halted = false;
5259 break; 5275 break;
5260 case KVM_MP_STATE_SIPI_RECEIVED: 5276 case KVM_MP_STATE_SIPI_RECEIVED:
5261 default: 5277 default:
@@ -5277,6 +5293,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5277 vcpu->run->exit_reason = KVM_EXIT_INTR; 5293 vcpu->run->exit_reason = KVM_EXIT_INTR;
5278 ++vcpu->stat.request_irq_exits; 5294 ++vcpu->stat.request_irq_exits;
5279 } 5295 }
5296
5297 kvm_check_async_pf_completion(vcpu);
5298
5280 if (signal_pending(current)) { 5299 if (signal_pending(current)) {
5281 r = -EINTR; 5300 r = -EINTR;
5282 vcpu->run->exit_reason = KVM_EXIT_INTR; 5301 vcpu->run->exit_reason = KVM_EXIT_INTR;
@@ -5792,6 +5811,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5792 5811
5793 kvm_make_request(KVM_REQ_EVENT, vcpu); 5812 kvm_make_request(KVM_REQ_EVENT, vcpu);
5794 5813
5814 kvm_clear_async_pf_completion_queue(vcpu);
5815 kvm_async_pf_hash_reset(vcpu);
5816 vcpu->arch.apf.halted = false;
5817
5795 return kvm_x86_ops->vcpu_reset(vcpu); 5818 return kvm_x86_ops->vcpu_reset(vcpu);
5796} 5819}
5797 5820
@@ -5880,6 +5903,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5880 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) 5903 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
5881 goto fail_free_mce_banks; 5904 goto fail_free_mce_banks;
5882 5905
5906 kvm_async_pf_hash_reset(vcpu);
5907
5883 return 0; 5908 return 0;
5884fail_free_mce_banks: 5909fail_free_mce_banks:
5885 kfree(vcpu->arch.mce_banks); 5910 kfree(vcpu->arch.mce_banks);
@@ -5938,8 +5963,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
5938 /* 5963 /*
5939 * Unpin any mmu pages first. 5964 * Unpin any mmu pages first.
5940 */ 5965 */
5941 kvm_for_each_vcpu(i, vcpu, kvm) 5966 kvm_for_each_vcpu(i, vcpu, kvm) {
5967 kvm_clear_async_pf_completion_queue(vcpu);
5942 kvm_unload_vcpu_mmu(vcpu); 5968 kvm_unload_vcpu_mmu(vcpu);
5969 }
5943 kvm_for_each_vcpu(i, vcpu, kvm) 5970 kvm_for_each_vcpu(i, vcpu, kvm)
5944 kvm_arch_vcpu_free(vcpu); 5971 kvm_arch_vcpu_free(vcpu);
5945 5972
@@ -6050,7 +6077,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
6050 6077
6051int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 6078int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6052{ 6079{
6053 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 6080 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6081 !vcpu->arch.apf.halted)
6082 || !list_empty_careful(&vcpu->async_pf.done)
6054 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 6083 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6055 || vcpu->arch.nmi_pending || 6084 || vcpu->arch.nmi_pending ||
6056 (kvm_arch_interrupt_allowed(vcpu) && 6085 (kvm_arch_interrupt_allowed(vcpu) &&
@@ -6109,6 +6138,83 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6109} 6138}
6110EXPORT_SYMBOL_GPL(kvm_set_rflags); 6139EXPORT_SYMBOL_GPL(kvm_set_rflags);
6111 6140
6141static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6142{
6143 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6144}
6145
6146static inline u32 kvm_async_pf_next_probe(u32 key)
6147{
6148 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6149}
6150
6151static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6152{
6153 u32 key = kvm_async_pf_hash_fn(gfn);
6154
6155 while (vcpu->arch.apf.gfns[key] != ~0)
6156 key = kvm_async_pf_next_probe(key);
6157
6158 vcpu->arch.apf.gfns[key] = gfn;
6159}
6160
6161static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6162{
6163 int i;
6164 u32 key = kvm_async_pf_hash_fn(gfn);
6165
6166 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6167 (vcpu->arch.apf.gfns[key] != gfn ||
6168 vcpu->arch.apf.gfns[key] == ~0); i++)
6169 key = kvm_async_pf_next_probe(key);
6170
6171 return key;
6172}
6173
6174bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6175{
6176 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6177}
6178
6179static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6180{
6181 u32 i, j, k;
6182
6183 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6184 while (true) {
6185 vcpu->arch.apf.gfns[i] = ~0;
6186 do {
6187 j = kvm_async_pf_next_probe(j);
6188 if (vcpu->arch.apf.gfns[j] == ~0)
6189 return;
6190 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6191 /*
6192 * k lies cyclically in ]i,j]
6193 * | i.k.j |
6194 * |....j i.k.| or |.k..j i...|
6195 */
6196 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6197 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6198 i = j;
6199 }
6200}
6201
6202void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6203 struct kvm_async_pf *work)
6204{
6205 trace_kvm_async_pf_not_present(work->gva);
6206
6207 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6208 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6209}
6210
6211void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6212 struct kvm_async_pf *work)
6213{
6214 trace_kvm_async_pf_ready(work->gva);
6215 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6216}
6217
6112EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 6218EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
6113EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 6219EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
6114EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 6220EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);