aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorGleb Natapov <gleb@redhat.com>2010-10-14 05:22:46 -0400
committerAvi Kivity <avi@redhat.com>2011-01-12 04:21:39 -0500
commitaf585b921e5d1e919947c4b1164b59507fe7cd7b (patch)
treed0d4cc753d4d58934c5986733d7340fe69e523de /virt/kvm
parent010c520e20413dfd567d568aba2b7238acd37e33 (diff)
KVM: Halt vcpu if page it tries to access is swapped out
If a guest accesses swapped out memory do not swap it in from vcpu thread context. Schedule work to do swapping and put vcpu into halted state instead. Interrupts will still be delivered to the guest and if interrupt will cause reschedule guest will continue to run another task. [avi: remove call to get_user_pages_noio(), nacked by Linus; this makes everything synchrnous again] Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/async_pf.c190
-rw-r--r--virt/kvm/async_pf.h36
-rw-r--r--virt/kvm/kvm_main.c48
4 files changed, 266 insertions, 11 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 7f1178f6b83..f63ccb0a598 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -15,3 +15,6 @@ config KVM_APIC_ARCHITECTURE
15 15
16config KVM_MMIO 16config KVM_MMIO
17 bool 17 bool
18
19config KVM_ASYNC_PF
20 bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
new file mode 100644
index 00000000000..857d63431cb
--- /dev/null
+++ b/virt/kvm/async_pf.c
@@ -0,0 +1,190 @@
1/*
2 * kvm asynchronous fault support
3 *
4 * Copyright 2010 Red Hat, Inc.
5 *
6 * Author:
7 * Gleb Natapov <gleb@redhat.com>
8 *
9 * This file is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
21 */
22
23#include <linux/kvm_host.h>
24#include <linux/slab.h>
25#include <linux/module.h>
26#include <linux/mmu_context.h>
27
28#include "async_pf.h"
29#include <trace/events/kvm.h>
30
31static struct kmem_cache *async_pf_cache;
32
33int kvm_async_pf_init(void)
34{
35 async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
36
37 if (!async_pf_cache)
38 return -ENOMEM;
39
40 return 0;
41}
42
43void kvm_async_pf_deinit(void)
44{
45 if (async_pf_cache)
46 kmem_cache_destroy(async_pf_cache);
47 async_pf_cache = NULL;
48}
49
50void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
51{
52 INIT_LIST_HEAD(&vcpu->async_pf.done);
53 INIT_LIST_HEAD(&vcpu->async_pf.queue);
54 spin_lock_init(&vcpu->async_pf.lock);
55}
56
57static void async_pf_execute(struct work_struct *work)
58{
59 struct page *page = NULL;
60 struct kvm_async_pf *apf =
61 container_of(work, struct kvm_async_pf, work);
62 struct mm_struct *mm = apf->mm;
63 struct kvm_vcpu *vcpu = apf->vcpu;
64 unsigned long addr = apf->addr;
65 gva_t gva = apf->gva;
66
67 might_sleep();
68
69 use_mm(mm);
70 down_read(&mm->mmap_sem);
71 get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
72 up_read(&mm->mmap_sem);
73 unuse_mm(mm);
74
75 spin_lock(&vcpu->async_pf.lock);
76 list_add_tail(&apf->link, &vcpu->async_pf.done);
77 apf->page = page;
78 apf->done = true;
79 spin_unlock(&vcpu->async_pf.lock);
80
81 /*
82 * apf may be freed by kvm_check_async_pf_completion() after
83 * this point
84 */
85
86 trace_kvm_async_pf_completed(addr, page, gva);
87
88 if (waitqueue_active(&vcpu->wq))
89 wake_up_interruptible(&vcpu->wq);
90
91 mmdrop(mm);
92 kvm_put_kvm(vcpu->kvm);
93}
94
95void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
96{
97 /* cancel outstanding work queue item */
98 while (!list_empty(&vcpu->async_pf.queue)) {
99 struct kvm_async_pf *work =
100 list_entry(vcpu->async_pf.queue.next,
101 typeof(*work), queue);
102 cancel_work_sync(&work->work);
103 list_del(&work->queue);
104 if (!work->done) /* work was canceled */
105 kmem_cache_free(async_pf_cache, work);
106 }
107
108 spin_lock(&vcpu->async_pf.lock);
109 while (!list_empty(&vcpu->async_pf.done)) {
110 struct kvm_async_pf *work =
111 list_entry(vcpu->async_pf.done.next,
112 typeof(*work), link);
113 list_del(&work->link);
114 if (work->page)
115 put_page(work->page);
116 kmem_cache_free(async_pf_cache, work);
117 }
118 spin_unlock(&vcpu->async_pf.lock);
119
120 vcpu->async_pf.queued = 0;
121}
122
123void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
124{
125 struct kvm_async_pf *work;
126
127 if (list_empty_careful(&vcpu->async_pf.done))
128 return;
129
130 spin_lock(&vcpu->async_pf.lock);
131 work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
132 list_del(&work->link);
133 spin_unlock(&vcpu->async_pf.lock);
134
135 kvm_arch_async_page_present(vcpu, work);
136
137 list_del(&work->queue);
138 vcpu->async_pf.queued--;
139 if (work->page)
140 put_page(work->page);
141 kmem_cache_free(async_pf_cache, work);
142}
143
144int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
145 struct kvm_arch_async_pf *arch)
146{
147 struct kvm_async_pf *work;
148
149 if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
150 return 0;
151
152 /* setup delayed work */
153
154 /*
155 * do alloc nowait since if we are going to sleep anyway we
156 * may as well sleep faulting in page
157 */
158 work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
159 if (!work)
160 return 0;
161
162 work->page = NULL;
163 work->done = false;
164 work->vcpu = vcpu;
165 work->gva = gva;
166 work->addr = gfn_to_hva(vcpu->kvm, gfn);
167 work->arch = *arch;
168 work->mm = current->mm;
169 atomic_inc(&work->mm->mm_count);
170 kvm_get_kvm(work->vcpu->kvm);
171
172 /* this can't really happen otherwise gfn_to_pfn_async
173 would succeed */
174 if (unlikely(kvm_is_error_hva(work->addr)))
175 goto retry_sync;
176
177 INIT_WORK(&work->work, async_pf_execute);
178 if (!schedule_work(&work->work))
179 goto retry_sync;
180
181 list_add_tail(&work->queue, &vcpu->async_pf.queue);
182 vcpu->async_pf.queued++;
183 kvm_arch_async_page_not_present(vcpu, work);
184 return 1;
185retry_sync:
186 kvm_put_kvm(work->vcpu->kvm);
187 mmdrop(work->mm);
188 kmem_cache_free(async_pf_cache, work);
189 return 0;
190}
diff --git a/virt/kvm/async_pf.h b/virt/kvm/async_pf.h
new file mode 100644
index 00000000000..e7ef6447cb8
--- /dev/null
+++ b/virt/kvm/async_pf.h
@@ -0,0 +1,36 @@
1/*
2 * kvm asynchronous fault support
3 *
4 * Copyright 2010 Red Hat, Inc.
5 *
6 * Author:
7 * Gleb Natapov <gleb@redhat.com>
8 *
9 * This file is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License
11 * as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
21 */
22
23#ifndef __KVM_ASYNC_PF_H__
24#define __KVM_ASYNC_PF_H__
25
26#ifdef CONFIG_KVM_ASYNC_PF
27int kvm_async_pf_init(void);
28void kvm_async_pf_deinit(void);
29void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu);
30#else
31#define kvm_async_pf_init() (0)
32#define kvm_async_pf_deinit() do{}while(0)
33#define kvm_async_pf_vcpu_init(C) do{}while(0)
34#endif
35
36#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5225052aebc..75fd590c021 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -55,6 +55,7 @@
55#include <asm-generic/bitops/le.h> 55#include <asm-generic/bitops/le.h>
56 56
57#include "coalesced_mmio.h" 57#include "coalesced_mmio.h"
58#include "async_pf.h"
58 59
59#define CREATE_TRACE_POINTS 60#define CREATE_TRACE_POINTS
60#include <trace/events/kvm.h> 61#include <trace/events/kvm.h>
@@ -186,6 +187,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
186 vcpu->kvm = kvm; 187 vcpu->kvm = kvm;
187 vcpu->vcpu_id = id; 188 vcpu->vcpu_id = id;
188 init_waitqueue_head(&vcpu->wq); 189 init_waitqueue_head(&vcpu->wq);
190 kvm_async_pf_vcpu_init(vcpu);
189 191
190 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 192 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
191 if (!page) { 193 if (!page) {
@@ -946,15 +948,20 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
946} 948}
947EXPORT_SYMBOL_GPL(gfn_to_hva); 949EXPORT_SYMBOL_GPL(gfn_to_hva);
948 950
949static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic) 951static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
952 bool *async)
950{ 953{
951 struct page *page[1]; 954 struct page *page[1];
952 int npages; 955 int npages = 0;
953 pfn_t pfn; 956 pfn_t pfn;
954 957
955 if (atomic) 958 /* we can do it either atomically or asynchronously, not both */
959 BUG_ON(atomic && async);
960
961 if (atomic || async)
956 npages = __get_user_pages_fast(addr, 1, 1, page); 962 npages = __get_user_pages_fast(addr, 1, 1, page);
957 else { 963
964 if (unlikely(npages != 1) && !atomic) {
958 might_sleep(); 965 might_sleep();
959 npages = get_user_pages_fast(addr, 1, 1, page); 966 npages = get_user_pages_fast(addr, 1, 1, page);
960 } 967 }
@@ -976,6 +983,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
976 983
977 if (vma == NULL || addr < vma->vm_start || 984 if (vma == NULL || addr < vma->vm_start ||
978 !(vma->vm_flags & VM_PFNMAP)) { 985 !(vma->vm_flags & VM_PFNMAP)) {
986 if (async && !(vma->vm_flags & VM_PFNMAP) &&
987 (vma->vm_flags & VM_WRITE))
988 *async = true;
979 up_read(&current->mm->mmap_sem); 989 up_read(&current->mm->mmap_sem);
980return_fault_page: 990return_fault_page:
981 get_page(fault_page); 991 get_page(fault_page);
@@ -993,32 +1003,41 @@ return_fault_page:
993 1003
994pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) 1004pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
995{ 1005{
996 return hva_to_pfn(kvm, addr, true); 1006 return hva_to_pfn(kvm, addr, true, NULL);
997} 1007}
998EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); 1008EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
999 1009
1000static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic) 1010static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
1001{ 1011{
1002 unsigned long addr; 1012 unsigned long addr;
1003 1013
1014 if (async)
1015 *async = false;
1016
1004 addr = gfn_to_hva(kvm, gfn); 1017 addr = gfn_to_hva(kvm, gfn);
1005 if (kvm_is_error_hva(addr)) { 1018 if (kvm_is_error_hva(addr)) {
1006 get_page(bad_page); 1019 get_page(bad_page);
1007 return page_to_pfn(bad_page); 1020 return page_to_pfn(bad_page);
1008 } 1021 }
1009 1022
1010 return hva_to_pfn(kvm, addr, atomic); 1023 return hva_to_pfn(kvm, addr, atomic, async);
1011} 1024}
1012 1025
1013pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) 1026pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
1014{ 1027{
1015 return __gfn_to_pfn(kvm, gfn, true); 1028 return __gfn_to_pfn(kvm, gfn, true, NULL);
1016} 1029}
1017EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic); 1030EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
1018 1031
1032pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async)
1033{
1034 return __gfn_to_pfn(kvm, gfn, false, async);
1035}
1036EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
1037
1019pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 1038pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
1020{ 1039{
1021 return __gfn_to_pfn(kvm, gfn, false); 1040 return __gfn_to_pfn(kvm, gfn, false, NULL);
1022} 1041}
1023EXPORT_SYMBOL_GPL(gfn_to_pfn); 1042EXPORT_SYMBOL_GPL(gfn_to_pfn);
1024 1043
@@ -1026,7 +1045,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
1026 struct kvm_memory_slot *slot, gfn_t gfn) 1045 struct kvm_memory_slot *slot, gfn_t gfn)
1027{ 1046{
1028 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 1047 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
1029 return hva_to_pfn(kvm, addr, false); 1048 return hva_to_pfn(kvm, addr, false, NULL);
1030} 1049}
1031 1050
1032int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, 1051int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -2336,6 +2355,10 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2336 goto out_free_5; 2355 goto out_free_5;
2337 } 2356 }
2338 2357
2358 r = kvm_async_pf_init();
2359 if (r)
2360 goto out_free;
2361
2339 kvm_chardev_ops.owner = module; 2362 kvm_chardev_ops.owner = module;
2340 kvm_vm_fops.owner = module; 2363 kvm_vm_fops.owner = module;
2341 kvm_vcpu_fops.owner = module; 2364 kvm_vcpu_fops.owner = module;
@@ -2343,7 +2366,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2343 r = misc_register(&kvm_dev); 2366 r = misc_register(&kvm_dev);
2344 if (r) { 2367 if (r) {
2345 printk(KERN_ERR "kvm: misc device register failed\n"); 2368 printk(KERN_ERR "kvm: misc device register failed\n");
2346 goto out_free; 2369 goto out_unreg;
2347 } 2370 }
2348 2371
2349 kvm_preempt_ops.sched_in = kvm_sched_in; 2372 kvm_preempt_ops.sched_in = kvm_sched_in;
@@ -2353,6 +2376,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2353 2376
2354 return 0; 2377 return 0;
2355 2378
2379out_unreg:
2380 kvm_async_pf_deinit();
2356out_free: 2381out_free:
2357 kmem_cache_destroy(kvm_vcpu_cache); 2382 kmem_cache_destroy(kvm_vcpu_cache);
2358out_free_5: 2383out_free_5:
@@ -2385,6 +2410,7 @@ void kvm_exit(void)
2385 kvm_exit_debug(); 2410 kvm_exit_debug();
2386 misc_deregister(&kvm_dev); 2411 misc_deregister(&kvm_dev);
2387 kmem_cache_destroy(kvm_vcpu_cache); 2412 kmem_cache_destroy(kvm_vcpu_cache);
2413 kvm_async_pf_deinit();
2388 sysdev_unregister(&kvm_sysdev); 2414 sysdev_unregister(&kvm_sysdev);
2389 sysdev_class_unregister(&kvm_sysdev_class); 2415 sysdev_class_unregister(&kvm_sysdev_class);
2390 unregister_reboot_notifier(&kvm_reboot_notifier); 2416 unregister_reboot_notifier(&kvm_reboot_notifier);