KVM: Halt vcpu if page it tries to access is swapped out

If a guest accesses swapped out memory do not swap it in from vcpu thread context. Schedule work to do swapping and put vcpu into halted state instead. Interrupts will still be delivered to the guest and if interrupt will cause reschedule guest will continue to run another task. [avi: remove call to get_user_pages_noio(), nacked by Linus; this makes everything synchrnous again] Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
author: Gleb Natapov <gleb@redhat.com> 2010-10-14 05:22:46 -0400
committer: Avi Kivity <avi@redhat.com> 2011-01-12 04:21:39 -0500
commit: af585b921e5d1e919947c4b1164b59507fe7cd7b (patch)
tree: d0d4cc753d4d58934c5986733d7340fe69e523de /virt/kvm/async_pf.c
parent: 010c520e20413dfd567d568aba2b7238acd37e33 (diff)
1 files changed, 190 insertions, 0 deletions
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
new file mode 100644
index 000000000000..857d63431cb7
--- /dev/null
+++ b/virt/kvm/async_pf.c
@@ -0,0 +1,190 @@
+/*
+ * kvm asynchronous fault support
+ *
+ * Copyright 2010 Red Hat, Inc.
+ *
+ * Author:
+ *      Gleb Natapov <gleb@redhat.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mmu_context.h>
+#include "async_pf.h"
+#include <trace/events/kvm.h>
+static struct kmem_cache *async_pf_cache;
+int kvm_async_pf_init(void)
+{
+        async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
+        if (!async_pf_cache)
+                return -ENOMEM;
+        return 0;
+}
+void kvm_async_pf_deinit(void)
+{
+        if (async_pf_cache)
+                kmem_cache_destroy(async_pf_cache);
+        async_pf_cache = NULL;
+}
+void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
+{
+        INIT_LIST_HEAD(&vcpu->async_pf.done);
+        INIT_LIST_HEAD(&vcpu->async_pf.queue);
+        spin_lock_init(&vcpu->async_pf.lock);
+}
+static void async_pf_execute(struct work_struct *work)
+{
+        struct page *page = NULL;
+        struct kvm_async_pf *apf =
+                container_of(work, struct kvm_async_pf, work);
+        struct mm_struct *mm = apf->mm;
+        struct kvm_vcpu *vcpu = apf->vcpu;
+        unsigned long addr = apf->addr;
+        gva_t gva = apf->gva;
+        might_sleep();
+        use_mm(mm);
+        down_read(&mm->mmap_sem);
+        get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+        up_read(&mm->mmap_sem);
+        unuse_mm(mm);
+        spin_lock(&vcpu->async_pf.lock);
+        list_add_tail(&apf->link, &vcpu->async_pf.done);
+        apf->page = page;
+        apf->done = true;
+        spin_unlock(&vcpu->async_pf.lock);
+        /*
+         * apf may be freed by kvm_check_async_pf_completion() after
+         * this point
+         */
+        trace_kvm_async_pf_completed(addr, page, gva);
+        if (waitqueue_active(&vcpu->wq))
+                wake_up_interruptible(&vcpu->wq);
+        mmdrop(mm);
+        kvm_put_kvm(vcpu->kvm);
+}
+void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
+{
+        /* cancel outstanding work queue item */
+        while (!list_empty(&vcpu->async_pf.queue)) {
+                struct kvm_async_pf *work =
+                        list_entry(vcpu->async_pf.queue.next,
+                                   typeof(*work), queue);
+                cancel_work_sync(&work->work);
+                list_del(&work->queue);
+                if (!work->done) /* work was canceled */
+                        kmem_cache_free(async_pf_cache, work);
+        }
+        spin_lock(&vcpu->async_pf.lock);
+        while (!list_empty(&vcpu->async_pf.done)) {
+                struct kvm_async_pf *work =
+                        list_entry(vcpu->async_pf.done.next,
+                                   typeof(*work), link);
+                list_del(&work->link);
+                if (work->page)
+                        put_page(work->page);
+                kmem_cache_free(async_pf_cache, work);
+        }
+        spin_unlock(&vcpu->async_pf.lock);
+        vcpu->async_pf.queued = 0;
+}
+void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
+{
+        struct kvm_async_pf *work;
+        if (list_empty_careful(&vcpu->async_pf.done))
+                return;
+        spin_lock(&vcpu->async_pf.lock);
+        work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
+        list_del(&work->link);
+        spin_unlock(&vcpu->async_pf.lock);
+        kvm_arch_async_page_present(vcpu, work);
+        list_del(&work->queue);
+        vcpu->async_pf.queued--;
+        if (work->page)
+                put_page(work->page);
+        kmem_cache_free(async_pf_cache, work);
+}
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+                       struct kvm_arch_async_pf *arch)
+{
+        struct kvm_async_pf *work;
+        if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
+                return 0;
+        /* setup delayed work */
+        /*
+         * do alloc nowait since if we are going to sleep anyway we
+         * may as well sleep faulting in page
+         */
+        work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
+        if (!work)
+                return 0;
+        work->page = NULL;
+        work->done = false;
+        work->vcpu = vcpu;
+        work->gva = gva;
+        work->addr = gfn_to_hva(vcpu->kvm, gfn);
+        work->arch = *arch;
+        work->mm = current->mm;
+        atomic_inc(&work->mm->mm_count);
+        kvm_get_kvm(work->vcpu->kvm);
+        /* this can't really happen otherwise gfn_to_pfn_async
+           would succeed */
+        if (unlikely(kvm_is_error_hva(work->addr)))
+                goto retry_sync;
+        INIT_WORK(&work->work, async_pf_execute);
+        if (!schedule_work(&work->work))
+                goto retry_sync;
+        list_add_tail(&work->queue, &vcpu->async_pf.queue);
+        vcpu->async_pf.queued++;
+        kvm_arch_async_page_not_present(vcpu, work);
+        return 1;
+retry_sync:
+        kvm_put_kvm(work->vcpu->kvm);
+        mmdrop(work->mm);
+        kmem_cache_free(async_pf_cache, work);
+        return 0;
+}
author	Gleb Natapov <gleb@redhat.com>	2010-10-14 05:22:46 -0400
committer	Avi Kivity <avi@redhat.com>	2011-01-12 04:21:39 -0500
commit	af585b921e5d1e919947c4b1164b59507fe7cd7b (patch)
tree	d0d4cc753d4d58934c5986733d7340fe69e523de /virt/kvm/async_pf.c
parent	010c520e20413dfd567d568aba2b7238acd37e33 (diff)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c new file mode 100644 index 000000000000..857d63431cb7 --- /dev/null +++ b/virt/kvm/async_pf.c
@@ -0,0 +1,190 @@
	1	/*
	2	* kvm asynchronous fault support
	3	*
	4	* Copyright 2010 Red Hat, Inc.
	5	*
	6	* Author:
	7	* Gleb Natapov <gleb@redhat.com>
	8	*
	9	* This file is free software; you can redistribute it and/or modify
	10	* it under the terms of version 2 of the GNU General Public License
	11	* as published by the Free Software Foundation.
	12	*
	13	* This program is distributed in the hope that it will be useful,
	14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	* GNU General Public License for more details.
	17	*
	18	* You should have received a copy of the GNU General Public License
	19	* along with this program; if not, write to the Free Software Foundation,
	20	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
	21	*/
	22
	23	#include <linux/kvm_host.h>
	24	#include <linux/slab.h>
	25	#include <linux/module.h>
	26	#include <linux/mmu_context.h>
	27
	28	#include "async_pf.h"
	29	#include <trace/events/kvm.h>
	30
	31	static struct kmem_cache *async_pf_cache;
	32
	33	int kvm_async_pf_init(void)
	34	{
	35	async_pf_cache = KMEM_CACHE(kvm_async_pf, 0);
	36
	37	if (!async_pf_cache)
	38	return -ENOMEM;
	39
	40	return 0;
	41	}
	42
	43	void kvm_async_pf_deinit(void)
	44	{
	45	if (async_pf_cache)
	46	kmem_cache_destroy(async_pf_cache);
	47	async_pf_cache = NULL;
	48	}
	49
	50	void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
	51	{
	52	INIT_LIST_HEAD(&vcpu->async_pf.done);
	53	INIT_LIST_HEAD(&vcpu->async_pf.queue);
	54	spin_lock_init(&vcpu->async_pf.lock);
	55	}
	56
	57	static void async_pf_execute(struct work_struct *work)
	58	{
	59	struct page *page = NULL;
	60	struct kvm_async_pf *apf =
	61	container_of(work, struct kvm_async_pf, work);
	62	struct mm_struct *mm = apf->mm;
	63	struct kvm_vcpu *vcpu = apf->vcpu;
	64	unsigned long addr = apf->addr;
	65	gva_t gva = apf->gva;
	66
	67	might_sleep();
	68
	69	use_mm(mm);
	70	down_read(&mm->mmap_sem);
	71	get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
	72	up_read(&mm->mmap_sem);
	73	unuse_mm(mm);
	74
	75	spin_lock(&vcpu->async_pf.lock);
	76	list_add_tail(&apf->link, &vcpu->async_pf.done);
	77	apf->page = page;
	78	apf->done = true;
	79	spin_unlock(&vcpu->async_pf.lock);
	80
	81	/*
	82	* apf may be freed by kvm_check_async_pf_completion() after
	83	* this point
	84	*/
	85
	86	trace_kvm_async_pf_completed(addr, page, gva);
	87
	88	if (waitqueue_active(&vcpu->wq))
	89	wake_up_interruptible(&vcpu->wq);
	90
	91	mmdrop(mm);
	92	kvm_put_kvm(vcpu->kvm);
	93	}
	94
	95	void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
	96	{
	97	/* cancel outstanding work queue item */
	98	while (!list_empty(&vcpu->async_pf.queue)) {
	99	struct kvm_async_pf *work =
	100	list_entry(vcpu->async_pf.queue.next,
	101	typeof(*work), queue);
	102	cancel_work_sync(&work->work);
	103	list_del(&work->queue);
	104	if (!work->done) /* work was canceled */
	105	kmem_cache_free(async_pf_cache, work);
	106	}
	107
	108	spin_lock(&vcpu->async_pf.lock);
	109	while (!list_empty(&vcpu->async_pf.done)) {
	110	struct kvm_async_pf *work =
	111	list_entry(vcpu->async_pf.done.next,
	112	typeof(*work), link);
	113	list_del(&work->link);
	114	if (work->page)
	115	put_page(work->page);
	116	kmem_cache_free(async_pf_cache, work);
	117	}
	118	spin_unlock(&vcpu->async_pf.lock);
	119
	120	vcpu->async_pf.queued = 0;
	121	}
	122
	123	void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
	124	{
	125	struct kvm_async_pf *work;
	126
	127	if (list_empty_careful(&vcpu->async_pf.done))
	128	return;
	129
	130	spin_lock(&vcpu->async_pf.lock);
	131	work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
	132	list_del(&work->link);
	133	spin_unlock(&vcpu->async_pf.lock);
	134
	135	kvm_arch_async_page_present(vcpu, work);
	136
	137	list_del(&work->queue);
	138	vcpu->async_pf.queued--;
	139	if (work->page)
	140	put_page(work->page);
	141	kmem_cache_free(async_pf_cache, work);
	142	}
	143
	144	int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
	145	struct kvm_arch_async_pf *arch)
	146	{
	147	struct kvm_async_pf *work;
	148
	149	if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
	150	return 0;
	151
	152	/* setup delayed work */
	153
	154	/*
	155	* do alloc nowait since if we are going to sleep anyway we
	156	* may as well sleep faulting in page
	157	*/
	158	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
	159	if (!work)
	160	return 0;
	161
	162	work->page = NULL;
	163	work->done = false;
	164	work->vcpu = vcpu;
	165	work->gva = gva;
	166	work->addr = gfn_to_hva(vcpu->kvm, gfn);
	167	work->arch = *arch;
	168	work->mm = current->mm;
	169	atomic_inc(&work->mm->mm_count);
	170	kvm_get_kvm(work->vcpu->kvm);
	171
	172	/* this can't really happen otherwise gfn_to_pfn_async
	173	would succeed */
	174	if (unlikely(kvm_is_error_hva(work->addr)))
	175	goto retry_sync;
	176
	177	INIT_WORK(&work->work, async_pf_execute);
	178	if (!schedule_work(&work->work))
	179	goto retry_sync;
	180
	181	list_add_tail(&work->queue, &vcpu->async_pf.queue);
	182	vcpu->async_pf.queued++;
	183	kvm_arch_async_page_not_present(vcpu, work);
	184	return 1;
	185	retry_sync:
	186	kvm_put_kvm(work->vcpu->kvm);
	187	mmdrop(work->mm);
	188	kmem_cache_free(async_pf_cache, work);
	189	return 0;
	190	}