aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile1
-rw-r--r--arch/x86/kvm/mmu.c52
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/x86.c112
-rw-r--r--include/linux/kvm_host.h31
-rw-r--r--include/trace/events/kvm.h90
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/async_pf.c190
-rw-r--r--virt/kvm/async_pf.h36
-rw-r--r--virt/kvm/kvm_main.c48
12 files changed, 570 insertions, 16 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f702f82aa1eb..b5f4c1a36d65 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -83,11 +83,14 @@
83#define KVM_NR_FIXED_MTRR_REGION 88 83#define KVM_NR_FIXED_MTRR_REGION 88
84#define KVM_NR_VAR_MTRR 8 84#define KVM_NR_VAR_MTRR 8
85 85
86#define ASYNC_PF_PER_VCPU 64
87
86extern spinlock_t kvm_lock; 88extern spinlock_t kvm_lock;
87extern struct list_head vm_list; 89extern struct list_head vm_list;
88 90
89struct kvm_vcpu; 91struct kvm_vcpu;
90struct kvm; 92struct kvm;
93struct kvm_async_pf;
91 94
92enum kvm_reg { 95enum kvm_reg {
93 VCPU_REGS_RAX = 0, 96 VCPU_REGS_RAX = 0,
@@ -412,6 +415,11 @@ struct kvm_vcpu_arch {
412 u64 hv_vapic; 415 u64 hv_vapic;
413 416
414 cpumask_var_t wbinvd_dirty_mask; 417 cpumask_var_t wbinvd_dirty_mask;
418
419 struct {
420 bool halted;
421 gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
422 } apf;
415}; 423};
416 424
417struct kvm_arch { 425struct kvm_arch {
@@ -585,6 +593,10 @@ struct kvm_x86_ops {
585 const struct trace_print_flags *exit_reasons_str; 593 const struct trace_print_flags *exit_reasons_str;
586}; 594};
587 595
596struct kvm_arch_async_pf {
597 gfn_t gfn;
598};
599
588extern struct kvm_x86_ops *kvm_x86_ops; 600extern struct kvm_x86_ops *kvm_x86_ops;
589 601
590int kvm_mmu_module_init(void); 602int kvm_mmu_module_init(void);
@@ -799,4 +811,10 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
799 811
800bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 812bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
801 813
814void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
815 struct kvm_async_pf *work);
816void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
817 struct kvm_async_pf *work);
818extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
819
802#endif /* _ASM_X86_KVM_HOST_H */ 820#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ddc131ff438f..50f63648ce1b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
28 select HAVE_KVM_IRQCHIP 28 select HAVE_KVM_IRQCHIP
29 select HAVE_KVM_EVENTFD 29 select HAVE_KVM_EVENTFD
30 select KVM_APIC_ARCHITECTURE 30 select KVM_APIC_ARCHITECTURE
31 select KVM_ASYNC_PF
31 select USER_RETURN_NOTIFIER 32 select USER_RETURN_NOTIFIER
32 select KVM_MMIO 33 select KVM_MMIO
33 ---help--- 34 ---help---
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 31a7035c4bd9..c53bf19b1da0 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,6 +9,7 @@ kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
9 coalesced_mmio.o irq_comm.o eventfd.o \ 9 coalesced_mmio.o irq_comm.o eventfd.o \
10 assigned-dev.o) 10 assigned-dev.o)
11kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) 11kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
12kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
12 13
13kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ 14kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
14 i8254.o timer.o 15 i8254.o timer.o
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fbb04aee8301..4ab04de5a76a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,9 +18,11 @@
18 * 18 *
19 */ 19 */
20 20
21#include "irq.h"
21#include "mmu.h" 22#include "mmu.h"
22#include "x86.h" 23#include "x86.h"
23#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include "x86.h"
24 26
25#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
26#include <linux/types.h> 28#include <linux/types.h>
@@ -2587,6 +2589,50 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
2587 error_code & PFERR_WRITE_MASK, gfn); 2589 error_code & PFERR_WRITE_MASK, gfn);
2588} 2590}
2589 2591
2592int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
2593{
2594 struct kvm_arch_async_pf arch;
2595 arch.gfn = gfn;
2596
2597 return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
2598}
2599
2600static bool can_do_async_pf(struct kvm_vcpu *vcpu)
2601{
2602 if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
2603 kvm_event_needs_reinjection(vcpu)))
2604 return false;
2605
2606 return kvm_x86_ops->interrupt_allowed(vcpu);
2607}
2608
2609static bool try_async_pf(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
2610 pfn_t *pfn)
2611{
2612 bool async;
2613
2614 *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);
2615
2616 if (!async)
2617 return false; /* *pfn has correct page already */
2618
2619 put_page(pfn_to_page(*pfn));
2620
2621 if (can_do_async_pf(vcpu)) {
2622 trace_kvm_try_async_get_page(async, *pfn);
2623 if (kvm_find_async_pf_gfn(vcpu, gfn)) {
2624 trace_kvm_async_pf_doublefault(gva, gfn);
2625 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
2626 return true;
2627 } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
2628 return true;
2629 }
2630
2631 *pfn = gfn_to_pfn(vcpu->kvm, gfn);
2632
2633 return false;
2634}
2635
2590static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, 2636static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
2591 u32 error_code) 2637 u32 error_code)
2592{ 2638{
@@ -2609,7 +2655,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
2609 2655
2610 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2656 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2611 smp_rmb(); 2657 smp_rmb();
2612 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2658
2659 if (try_async_pf(vcpu, gfn, gpa, &pfn))
2660 return 0;
2661
2662 /* mmio */
2613 if (is_error_pfn(pfn)) 2663 if (is_error_pfn(pfn))
2614 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); 2664 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
2615 spin_lock(&vcpu->kvm->mmu_lock); 2665 spin_lock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index cd7a833a3b52..c45376dd041a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -568,7 +568,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
568 568
569 mmu_seq = vcpu->kvm->mmu_notifier_seq; 569 mmu_seq = vcpu->kvm->mmu_notifier_seq;
570 smp_rmb(); 570 smp_rmb();
571 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 571
572 if (try_async_pf(vcpu, walker.gfn, addr, &pfn))
573 return 0;
572 574
573 /* mmio */ 575 /* mmio */
574 if (is_error_pfn(pfn)) 576 if (is_error_pfn(pfn))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c05d47701292..3cd4d091c2f3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/perf_event.h> 44#include <linux/perf_event.h>
45#include <linux/uaccess.h> 45#include <linux/uaccess.h>
46#include <linux/hash.h>
46#include <trace/events/kvm.h> 47#include <trace/events/kvm.h>
47 48
48#define CREATE_TRACE_POINTS 49#define CREATE_TRACE_POINTS
@@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155 156
156u64 __read_mostly host_xcr0; 157u64 __read_mostly host_xcr0;
157 158
159static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
160{
161 int i;
162 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
163 vcpu->arch.apf.gfns[i] = ~0;
164}
165
158static void kvm_on_user_return(struct user_return_notifier *urn) 166static void kvm_on_user_return(struct user_return_notifier *urn)
159{ 167{
160 unsigned slot; 168 unsigned slot;
@@ -5115,6 +5123,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5115 vcpu->fpu_active = 0; 5123 vcpu->fpu_active = 0;
5116 kvm_x86_ops->fpu_deactivate(vcpu); 5124 kvm_x86_ops->fpu_deactivate(vcpu);
5117 } 5125 }
5126 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5127 /* Page is swapped out. Do synthetic halt */
5128 vcpu->arch.apf.halted = true;
5129 r = 1;
5130 goto out;
5131 }
5118 } 5132 }
5119 5133
5120 r = kvm_mmu_reload(vcpu); 5134 r = kvm_mmu_reload(vcpu);
@@ -5243,7 +5257,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5243 5257
5244 r = 1; 5258 r = 1;
5245 while (r > 0) { 5259 while (r > 0) {
5246 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 5260 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5261 !vcpu->arch.apf.halted)
5247 r = vcpu_enter_guest(vcpu); 5262 r = vcpu_enter_guest(vcpu);
5248 else { 5263 else {
5249 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 5264 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -5256,6 +5271,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5256 vcpu->arch.mp_state = 5271 vcpu->arch.mp_state =
5257 KVM_MP_STATE_RUNNABLE; 5272 KVM_MP_STATE_RUNNABLE;
5258 case KVM_MP_STATE_RUNNABLE: 5273 case KVM_MP_STATE_RUNNABLE:
5274 vcpu->arch.apf.halted = false;
5259 break; 5275 break;
5260 case KVM_MP_STATE_SIPI_RECEIVED: 5276 case KVM_MP_STATE_SIPI_RECEIVED:
5261 default: 5277 default:
@@ -5277,6 +5293,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5277 vcpu->run->exit_reason = KVM_EXIT_INTR; 5293 vcpu->run->exit_reason = KVM_EXIT_INTR;
5278 ++vcpu->stat.request_irq_exits; 5294 ++vcpu->stat.request_irq_exits;
5279 } 5295 }
5296
5297 kvm_check_async_pf_completion(vcpu);
5298
5280 if (signal_pending(current)) { 5299 if (signal_pending(current)) {
5281 r = -EINTR; 5300 r = -EINTR;
5282 vcpu->run->exit_reason = KVM_EXIT_INTR; 5301 vcpu->run->exit_reason = KVM_EXIT_INTR;
@@ -5792,6 +5811,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5792 5811
5793 kvm_make_request(KVM_REQ_EVENT, vcpu); 5812 kvm_make_request(KVM_REQ_EVENT, vcpu);
5794 5813
5814 kvm_clear_async_pf_completion_queue(vcpu);
5815 kvm_async_pf_hash_reset(vcpu);
5816 vcpu->arch.apf.halted = false;
5817
5795 return kvm_x86_ops->vcpu_reset(vcpu); 5818 return kvm_x86_ops->vcpu_reset(vcpu);
5796} 5819}
5797 5820
@@ -5880,6 +5903,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5880 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) 5903 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
5881 goto fail_free_mce_banks; 5904 goto fail_free_mce_banks;
5882 5905
5906 kvm_async_pf_hash_reset(vcpu);
5907
5883 return 0; 5908 return 0;
5884fail_free_mce_banks: 5909fail_free_mce_banks:
5885 kfree(vcpu->arch.mce_banks); 5910 kfree(vcpu->arch.mce_banks);
@@ -5938,8 +5963,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
5938 /* 5963 /*
5939 * Unpin any mmu pages first. 5964 * Unpin any mmu pages first.
5940 */ 5965 */
5941 kvm_for_each_vcpu(i, vcpu, kvm) 5966 kvm_for_each_vcpu(i, vcpu, kvm) {
5967 kvm_clear_async_pf_completion_queue(vcpu);
5942 kvm_unload_vcpu_mmu(vcpu); 5968 kvm_unload_vcpu_mmu(vcpu);
5969 }
5943 kvm_for_each_vcpu(i, vcpu, kvm) 5970 kvm_for_each_vcpu(i, vcpu, kvm)
5944 kvm_arch_vcpu_free(vcpu); 5971 kvm_arch_vcpu_free(vcpu);
5945 5972
@@ -6050,7 +6077,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
6050 6077
6051int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 6078int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6052{ 6079{
6053 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 6080 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6081 !vcpu->arch.apf.halted)
6082 || !list_empty_careful(&vcpu->async_pf.done)
6054 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 6083 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6055 || vcpu->arch.nmi_pending || 6084 || vcpu->arch.nmi_pending ||
6056 (kvm_arch_interrupt_allowed(vcpu) && 6085 (kvm_arch_interrupt_allowed(vcpu) &&
@@ -6109,6 +6138,83 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6109} 6138}
6110EXPORT_SYMBOL_GPL(kvm_set_rflags); 6139EXPORT_SYMBOL_GPL(kvm_set_rflags);
6111 6140
6141static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6142{
6143 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6144}
6145
6146static inline u32 kvm_async_pf_next_probe(u32 key)
6147{
6148 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6149}
6150
6151static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6152{
6153 u32 key = kvm_async_pf_hash_fn(gfn);
6154
6155 while (vcpu->arch.apf.gfns[key] != ~0)
6156 key = kvm_async_pf_next_probe(key);
6157
6158 vcpu->arch.apf.gfns[key] = gfn;
6159}
6160
6161static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6162{
6163 int i;
6164 u32 key = kvm_async_pf_hash_fn(gfn);
6165
6166 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6167 (vcpu->arch.apf.gfns[key] != gfn ||
6168 vcpu->arch.apf.gfns[key] == ~0); i++)
6169 key = kvm_async_pf_next_probe(key);
6170
6171 return key;
6172}
6173
6174bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6175{
6176 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6177}
6178
6179static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6180{
6181 u32 i, j, k;
6182
6183 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6184 while (true) {
6185 vcpu->arch.apf.gfns[i] = ~0;
6186 do {
6187 j = kvm_async_pf_next_probe(j);
6188 if (vcpu->arch.apf.gfns[j] == ~0)
6189 return;
6190 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6191 /*
6192 * k lies cyclically in ]i,j]
6193 * | i.k.j |
6194 * |....j i.k.| or |.k..j i...|
6195 */
6196 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6197 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6198 i = j;
6199 }
6200}
6201
6202void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6203 struct kvm_async_pf *work)
6204{
6205 trace_kvm_async_pf_not_present(work->gva);
6206
6207 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6208 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6209}
6210
6211void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6212 struct kvm_async_pf *work)
6213{
6214 trace_kvm_async_pf_ready(work->gva);
6215 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6216}
6217
6112EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 6218EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
6113EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 6219EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
6114EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 6220EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a0557422715e..e56acc7857e2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -40,6 +40,7 @@
40#define KVM_REQ_KICK 9 40#define KVM_REQ_KICK 9
41#define KVM_REQ_DEACTIVATE_FPU 10 41#define KVM_REQ_DEACTIVATE_FPU 10
42#define KVM_REQ_EVENT 11 42#define KVM_REQ_EVENT 11
43#define KVM_REQ_APF_HALT 12
43 44
44#define KVM_USERSPACE_IRQ_SOURCE_ID 0 45#define KVM_USERSPACE_IRQ_SOURCE_ID 0
45 46
@@ -74,6 +75,26 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
74int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 75int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
75 struct kvm_io_device *dev); 76 struct kvm_io_device *dev);
76 77
78#ifdef CONFIG_KVM_ASYNC_PF
79struct kvm_async_pf {
80 struct work_struct work;
81 struct list_head link;
82 struct list_head queue;
83 struct kvm_vcpu *vcpu;
84 struct mm_struct *mm;
85 gva_t gva;
86 unsigned long addr;
87 struct kvm_arch_async_pf arch;
88 struct page *page;
89 bool done;
90};
91
92void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
93void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
94int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
95 struct kvm_arch_async_pf *arch);
96#endif
97
77struct kvm_vcpu { 98struct kvm_vcpu {
78 struct kvm *kvm; 99 struct kvm *kvm;
79#ifdef CONFIG_PREEMPT_NOTIFIERS 100#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -104,6 +125,15 @@ struct kvm_vcpu {
104 gpa_t mmio_phys_addr; 125 gpa_t mmio_phys_addr;
105#endif 126#endif
106 127
128#ifdef CONFIG_KVM_ASYNC_PF
129 struct {
130 u32 queued;
131 struct list_head queue;
132 struct list_head done;
133 spinlock_t lock;
134 } async_pf;
135#endif
136
107 struct kvm_vcpu_arch arch; 137 struct kvm_vcpu_arch arch;
108}; 138};
109 139
@@ -302,6 +332,7 @@ void kvm_set_page_accessed(struct page *page);
302 332
303pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); 333pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
304pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); 334pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
335pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async);
305pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); 336pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
306pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 337pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
307 struct kvm_memory_slot *slot, gfn_t gfn); 338 struct kvm_memory_slot *slot, gfn_t gfn);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 6dd3a51ab1cb..a78a5e574632 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -185,6 +185,96 @@ TRACE_EVENT(kvm_age_page,
185 __entry->referenced ? "YOUNG" : "OLD") 185 __entry->referenced ? "YOUNG" : "OLD")
186); 186);
187 187
188#ifdef CONFIG_KVM_ASYNC_PF
189TRACE_EVENT(
190 kvm_try_async_get_page,
191 TP_PROTO(bool async, u64 pfn),
192 TP_ARGS(async, pfn),
193
194 TP_STRUCT__entry(
195 __field(__u64, pfn)
196 ),
197
198 TP_fast_assign(
199 __entry->pfn = (!async) ? pfn : (u64)-1;
200 ),
201
202 TP_printk("pfn %#llx", __entry->pfn)
203);
204
205TRACE_EVENT(
206 kvm_async_pf_not_present,
207 TP_PROTO(u64 gva),
208 TP_ARGS(gva),
209
210 TP_STRUCT__entry(
211 __field(__u64, gva)
212 ),
213
214 TP_fast_assign(
215 __entry->gva = gva;
216 ),
217
218 TP_printk("gva %#llx not present", __entry->gva)
219);
220
221TRACE_EVENT(
222 kvm_async_pf_ready,
223 TP_PROTO(u64 gva),
224 TP_ARGS(gva),
225
226 TP_STRUCT__entry(
227 __field(__u64, gva)
228 ),
229
230 TP_fast_assign(
231 __entry->gva = gva;
232 ),
233
234 TP_printk("gva %#llx ready", __entry->gva)
235);
236
237TRACE_EVENT(
238 kvm_async_pf_completed,
239 TP_PROTO(unsigned long address, struct page *page, u64 gva),
240 TP_ARGS(address, page, gva),
241
242 TP_STRUCT__entry(
243 __field(unsigned long, address)
244 __field(pfn_t, pfn)
245 __field(u64, gva)
246 ),
247
248 TP_fast_assign(
249 __entry->address = address;
250 __entry->pfn = page ? page_to_pfn(page) : 0;
251 __entry->gva = gva;
252 ),
253
254 TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva,
255 __entry->address, __entry->pfn)
256);
257
258TRACE_EVENT(
259 kvm_async_pf_doublefault,
260 TP_PROTO(u64 gva, u64 gfn),
261 TP_ARGS(gva, gfn),
262
263 TP_STRUCT__entry(
264 __field(u64, gva)
265 __field(u64, gfn)
266 ),
267
268 TP_fast_assign(
269 __entry->gva = gva;
270 __entry->gfn = gfn;
271 ),
272
273 TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
274);
275
276#endif
277
188#endif /* _TRACE_KVM_MAIN_H */ 278#endif /* _TRACE_KVM_MAIN_H */
189 279
190/* This part must be outside protection */ 280/* This part must be outside protection */
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 7f1178f6b839..f63ccb0a5982 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -15,3 +15,6 @@ config KVM_APIC_ARCHITECTURE
15 15
16config KVM_MMIO 16config KVM_MMIO
17 bool 17 bool
18
19config KVM_ASYNC_PF
20 bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
new file mode 100644
index 000000000000..857d63431cb7
--- /dev/null
+++ b/virt/kvm/async_pf.c
@@ -0,0 +1,190 @@
1/*
2 * kvm asynchronous fault support
3 *
4 * Copyright 2010 Red Hat, Inc.
5 *
6 * Author:
7 * Gleb Natapov <gleb@redhat.com>
8 *
9 * This file is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
21 */
22
23#include <linux/kvm_host.h>
24#include <linux/slab.h>
25#include <linux/module.h>
26#include <linux/mmu_context.h>
27
28#include "async_pf.h"
29#include <trace/events/kvm.h>
30
31static struct kmem_cache *async_pf_cache;
32
33int kvm_async_pf_init(void)
34{
35 async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
36
37 if (!async_pf_cache)
38 return -ENOMEM;
39
40 return 0;
41}
42
43void kvm_async_pf_deinit(void)
44{
45 if (async_pf_cache)
46 kmem_cache_destroy(async_pf_cache);
47 async_pf_cache = NULL;
48}
49
50void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
51{
52 INIT_LIST_HEAD(&vcpu->async_pf.done);
53 INIT_LIST_HEAD(&vcpu->async_pf.queue);
54 spin_lock_init(&vcpu->async_pf.lock);
55}
56
57static void async_pf_execute(struct work_struct *work)
58{
59 struct page *page = NULL;
60 struct kvm_async_pf *apf =
61 container_of(work, struct kvm_async_pf, work);
62 struct mm_struct *mm = apf->mm;
63 struct kvm_vcpu *vcpu = apf->vcpu;
64 unsigned long addr = apf->addr;
65 gva_t gva = apf->gva;
66
67 might_sleep();
68
69 use_mm(mm);
70 down_read(&mm->mmap_sem);
71 get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
72 up_read(&mm->mmap_sem);
73 unuse_mm(mm);
74
75 spin_lock(&vcpu->async_pf.lock);
76 list_add_tail(&apf->link, &vcpu->async_pf.done);
77 apf->page = page;
78 apf->done = true;
79 spin_unlock(&vcpu->async_pf.lock);
80
81 /*
82 * apf may be freed by kvm_check_async_pf_completion() after
83 * this point
84 */
85
86 trace_kvm_async_pf_completed(addr, page, gva);
87
88 if (waitqueue_active(&vcpu->wq))
89 wake_up_interruptible(&vcpu->wq);
90
91 mmdrop(mm);
92 kvm_put_kvm(vcpu->kvm);
93}
94
95void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
96{
97 /* cancel outstanding work queue item */
98 while (!list_empty(&vcpu->async_pf.queue)) {
99 struct kvm_async_pf *work =
100 list_entry(vcpu->async_pf.queue.next,
101 typeof(*work), queue);
102 cancel_work_sync(&work->work);
103 list_del(&work->queue);
104 if (!work->done) /* work was canceled */
105 kmem_cache_free(async_pf_cache, work);
106 }
107
108 spin_lock(&vcpu->async_pf.lock);
109 while (!list_empty(&vcpu->async_pf.done)) {
110 struct kvm_async_pf *work =
111 list_entry(vcpu->async_pf.done.next,
112 typeof(*work), link);
113 list_del(&work->link);
114 if (work->page)
115 put_page(work->page);
116 kmem_cache_free(async_pf_cache, work);
117 }
118 spin_unlock(&vcpu->async_pf.lock);
119
120 vcpu->async_pf.queued = 0;
121}
122
123void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
124{
125 struct kvm_async_pf *work;
126
127 if (list_empty_careful(&vcpu->async_pf.done))
128 return;
129
130 spin_lock(&vcpu->async_pf.lock);
131 work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
132 list_del(&work->link);
133 spin_unlock(&vcpu->async_pf.lock);
134
135 kvm_arch_async_page_present(vcpu, work);
136
137 list_del(&work->queue);
138 vcpu->async_pf.queued--;
139 if (work->page)
140 put_page(work->page);
141 kmem_cache_free(async_pf_cache, work);
142}
143
144int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
145 struct kvm_arch_async_pf *arch)
146{
147 struct kvm_async_pf *work;
148
149 if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
150 return 0;
151
152 /* setup delayed work */
153
154 /*
155 * do alloc nowait since if we are going to sleep anyway we
156 * may as well sleep faulting in page
157 */
158 work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
159 if (!work)
160 return 0;
161
162 work->page = NULL;
163 work->done = false;
164 work->vcpu = vcpu;
165 work->gva = gva;
166 work->addr = gfn_to_hva(vcpu->kvm, gfn);
167 work->arch = *arch;
168 work->mm = current->mm;
169 atomic_inc(&work->mm->mm_count);
170 kvm_get_kvm(work->vcpu->kvm);
171
172 /* this can't really happen otherwise gfn_to_pfn_async
173 would succeed */
174 if (unlikely(kvm_is_error_hva(work->addr)))
175 goto retry_sync;
176
177 INIT_WORK(&work->work, async_pf_execute);
178 if (!schedule_work(&work->work))
179 goto retry_sync;
180
181 list_add_tail(&work->queue, &vcpu->async_pf.queue);
182 vcpu->async_pf.queued++;
183 kvm_arch_async_page_not_present(vcpu, work);
184 return 1;
185retry_sync:
186 kvm_put_kvm(work->vcpu->kvm);
187 mmdrop(work->mm);
188 kmem_cache_free(async_pf_cache, work);
189 return 0;
190}
diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h
new file mode 100644
index 000000000000..e7ef6447cb82
--- /dev/null
+++ b/virt/kvm/async_pf.h
@@ -0,0 +1,36 @@
1/*
2 * kvm asynchronous fault support
3 *
4 * Copyright 2010 Red Hat, Inc.
5 *
6 * Author:
7 * Gleb Natapov <gleb@redhat.com>
8 *
9 * This file is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
21 */
22
23#ifndef __KVM_ASYNC_PF_H__
24#define __KVM_ASYNC_PF_H__
25
26#ifdef CONFIG_KVM_ASYNC_PF
27int kvm_async_pf_init(void);
28void kvm_async_pf_deinit(void);
29void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
30#else
31#define kvm_async_pf_init() (0)
32#define kvm_async_pf_deinit() do{}while(0)
33#define kvm_async_pf_vcpu_init(C) do{}while(0)
34#endif
35
36#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5225052aebc1..75fd590c0214 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -55,6 +55,7 @@
55#include <asm-generic/bitops/le.h> 55#include <asm-generic/bitops/le.h>
56 56
57#include "coalesced_mmio.h" 57#include "coalesced_mmio.h"
58#include "async_pf.h"
58 59
59#define CREATE_TRACE_POINTS 60#define CREATE_TRACE_POINTS
60#include <trace/events/kvm.h> 61#include <trace/events/kvm.h>
@@ -186,6 +187,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
186 vcpu->kvm = kvm; 187 vcpu->kvm = kvm;
187 vcpu->vcpu_id = id; 188 vcpu->vcpu_id = id;
188 init_waitqueue_head(&vcpu->wq); 189 init_waitqueue_head(&vcpu->wq);
190 kvm_async_pf_vcpu_init(vcpu);
189 191
190 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 192 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
191 if (!page) { 193 if (!page) {
@@ -946,15 +948,20 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
946} 948}
947EXPORT_SYMBOL_GPL(gfn_to_hva); 949EXPORT_SYMBOL_GPL(gfn_to_hva);
948 950
949static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic) 951static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
952 bool *async)
950{ 953{
951 struct page *page[1]; 954 struct page *page[1];
952 int npages; 955 int npages = 0;
953 pfn_t pfn; 956 pfn_t pfn;
954 957
955 if (atomic) 958 /* we can do it either atomically or asynchronously, not both */
959 BUG_ON(atomic && async);
960
961 if (atomic || async)
956 npages = __get_user_pages_fast(addr, 1, 1, page); 962 npages = __get_user_pages_fast(addr, 1, 1, page);
957 else { 963
964 if (unlikely(npages != 1) && !atomic) {
958 might_sleep(); 965 might_sleep();
959 npages = get_user_pages_fast(addr, 1, 1, page); 966 npages = get_user_pages_fast(addr, 1, 1, page);
960 } 967 }
@@ -976,6 +983,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
976 983
977 if (vma == NULL || addr < vma->vm_start || 984 if (vma == NULL || addr < vma->vm_start ||
978 !(vma->vm_flags & VM_PFNMAP)) { 985 !(vma->vm_flags & VM_PFNMAP)) {
986 if (async && !(vma->vm_flags & VM_PFNMAP) &&
987 (vma->vm_flags & VM_WRITE))
988 *async = true;
979 up_read(&current->mm->mmap_sem); 989 up_read(&current->mm->mmap_sem);
980return_fault_page: 990return_fault_page:
981 get_page(fault_page); 991 get_page(fault_page);
@@ -993,32 +1003,41 @@ return_fault_page:
993 1003
994pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) 1004pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
995{ 1005{
996 return hva_to_pfn(kvm, addr, true); 1006 return hva_to_pfn(kvm, addr, true, NULL);
997} 1007}
998EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); 1008EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
999 1009
1000static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic) 1010static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
1001{ 1011{
1002 unsigned long addr; 1012 unsigned long addr;
1003 1013
1014 if (async)
1015 *async = false;
1016
1004 addr = gfn_to_hva(kvm, gfn); 1017 addr = gfn_to_hva(kvm, gfn);
1005 if (kvm_is_error_hva(addr)) { 1018 if (kvm_is_error_hva(addr)) {
1006 get_page(bad_page); 1019 get_page(bad_page);
1007 return page_to_pfn(bad_page); 1020 return page_to_pfn(bad_page);
1008 } 1021 }
1009 1022
1010 return hva_to_pfn(kvm, addr, atomic); 1023 return hva_to_pfn(kvm, addr, atomic, async);
1011} 1024}
1012 1025
1013pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) 1026pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
1014{ 1027{
1015 return __gfn_to_pfn(kvm, gfn, true); 1028 return __gfn_to_pfn(kvm, gfn, true, NULL);
1016} 1029}
1017EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); 1030EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
1018 1031
1032pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async)
1033{
1034 return __gfn_to_pfn(kvm, gfn, false, async);
1035}
1036EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
1037
1019pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 1038pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
1020{ 1039{
1021 return __gfn_to_pfn(kvm, gfn, false); 1040 return __gfn_to_pfn(kvm, gfn, false, NULL);
1022} 1041}
1023EXPORT_SYMBOL_GPL(gfn_to_pfn); 1042EXPORT_SYMBOL_GPL(gfn_to_pfn);
1024 1043
@@ -1026,7 +1045,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
1026 struct kvm_memory_slot *slot, gfn_t gfn) 1045 struct kvm_memory_slot *slot, gfn_t gfn)
1027{ 1046{
1028 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 1047 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
1029 return hva_to_pfn(kvm, addr, false); 1048 return hva_to_pfn(kvm, addr, false, NULL);
1030} 1049}
1031 1050
1032int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, 1051int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -2336,6 +2355,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2336 goto out_free_5; 2355 goto out_free_5;
2337 } 2356 }
2338 2357
2358 r = kvm_async_pf_init();
2359 if (r)
2360 goto out_free;
2361
2339 kvm_chardev_ops.owner = module; 2362 kvm_chardev_ops.owner = module;
2340 kvm_vm_fops.owner = module; 2363 kvm_vm_fops.owner = module;
2341 kvm_vcpu_fops.owner = module; 2364 kvm_vcpu_fops.owner = module;
@@ -2343,7 +2366,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2343 r = misc_register(&kvm_dev); 2366 r = misc_register(&kvm_dev);
2344 if (r) { 2367 if (r) {
2345 printk(KERN_ERR "kvm: misc device register failed\n"); 2368 printk(KERN_ERR "kvm: misc device register failed\n");
2346 goto out_free; 2369 goto out_unreg;
2347 } 2370 }
2348 2371
2349 kvm_preempt_ops.sched_in = kvm_sched_in; 2372 kvm_preempt_ops.sched_in = kvm_sched_in;
@@ -2353,6 +2376,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2353 2376
2354 return 0; 2377 return 0;
2355 2378
2379out_unreg:
2380 kvm_async_pf_deinit();
2356out_free: 2381out_free:
2357 kmem_cache_destroy(kvm_vcpu_cache); 2382 kmem_cache_destroy(kvm_vcpu_cache);
2358out_free_5: 2383out_free_5:
@@ -2385,6 +2410,7 @@ void kvm_exit(void)
2385 kvm_exit_debug(); 2410 kvm_exit_debug();
2386 misc_deregister(&kvm_dev); 2411 misc_deregister(&kvm_dev);
2387 kmem_cache_destroy(kvm_vcpu_cache); 2412 kmem_cache_destroy(kvm_vcpu_cache);
2413 kvm_async_pf_deinit();
2388 sysdev_unregister(&kvm_sysdev); 2414 sysdev_unregister(&kvm_sysdev);
2389 sysdev_class_unregister(&kvm_sysdev_class); 2415 sysdev_class_unregister(&kvm_sysdev_class);
2390 unregister_reboot_notifier(&kvm_reboot_notifier); 2416 unregister_reboot_notifier(&kvm_reboot_notifier);