KVM: MMU: hypercall based pte updates and TLB flushes

Hypercall based pte updates are faster than faults, and also allow use of the lazy MMU mode to batch operations. Don't report the feature if two dimensional paging is enabled. [avi: - one mmu_op hypercall instead of one per op - allow 64-bit gpa on hypercall - don't pass host errors (-ENOMEM) to guest] [akpm: warning fix on i386] Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Avi Kivity <avi@qumranet.com>
author: Marcelo Tosatti <mtosatti@redhat.com> 2008-02-22 12:21:37 -0500
committer: Avi Kivity <avi@qumranet.com> 2008-04-27 05:00:27 -0400
commit: 2f333bcb4edd8daef99dabe4e7df8277af73cff1 (patch)
tree: c984466e7756e0910bf470a094558b52bd10df33 /arch
parent: 9f81128591ca1e9907f2e7a7b195e33232167d60 (diff)
2 files changed, 152 insertions, 2 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 414405b6ec13..072e9422c914 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
+#include <linux/compiler.h>
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -40,7 +41,7 @@
 * 2. while doing 1. it walks guest-physical to host-physical
 * If the hardware supports that we don't need to do shadow paging.
 */
-static bool tdp_enabled = false;
+bool tdp_enabled = false;
 #undef MMU_DEBUG
@@ -167,6 +168,13 @@ static int dbg = 1;
 #define ACC_USER_MASK    PT_USER_MASK
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+struct kvm_pv_mmu_op_buffer {
+        void *ptr;
+        unsigned len;
+        unsigned processed;
+        char buf[512] __aligned(sizeof(long));
+};
 struct kvm_rmap_desc {
        u64 *shadow_ptes[RMAP_EXT];
        struct kvm_rmap_desc *more;
@@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
        return nr_mmu_pages;
 }
+static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+                                unsigned len)
+{
+        if (len > buffer->len)
+                return NULL;
+        return buffer->ptr;
+}
+static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+                                unsigned len)
+{
+        void *ret;
+        ret = pv_mmu_peek_buffer(buffer, len);
+        if (!ret)
+                return ret;
+        buffer->ptr += len;
+        buffer->len -= len;
+        buffer->processed += len;
+        return ret;
+}
+static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
+                             gpa_t addr, gpa_t value)
+{
+        int bytes = 8;
+        int r;
+        if (!is_long_mode(vcpu) && !is_pae(vcpu))
+                bytes = 4;
+        r = mmu_topup_memory_caches(vcpu);
+        if (r)
+                return r;
+        if (!__emulator_write_phys(vcpu, addr, &value, bytes))
+                return -EFAULT;
+        return 1;
+}
+static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+        kvm_x86_ops->tlb_flush(vcpu);
+        return 1;
+}
+static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+        spin_lock(&vcpu->kvm->mmu_lock);
+        mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+        spin_unlock(&vcpu->kvm->mmu_lock);
+        return 1;
+}
+static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
+                             struct kvm_pv_mmu_op_buffer *buffer)
+{
+        struct kvm_mmu_op_header *header;
+        header = pv_mmu_peek_buffer(buffer, sizeof *header);
+        if (!header)
+                return 0;
+        switch (header->op) {
+        case KVM_MMU_OP_WRITE_PTE: {
+                struct kvm_mmu_op_write_pte *wpte;
+                wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
+                if (!wpte)
+                        return 0;
+                return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
+                                        wpte->pte_val);
+        }
+        case KVM_MMU_OP_FLUSH_TLB: {
+                struct kvm_mmu_op_flush_tlb *ftlb;
+                ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
+                if (!ftlb)
+                        return 0;
+                return kvm_pv_mmu_flush_tlb(vcpu);
+        }
+        case KVM_MMU_OP_RELEASE_PT: {
+                struct kvm_mmu_op_release_pt *rpt;
+                rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
+                if (!rpt)
+                        return 0;
+                return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
+        }
+        default: return 0;
+        }
+}
+int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
+                  gpa_t addr, unsigned long *ret)
+{
+        int r;
+        struct kvm_pv_mmu_op_buffer buffer;
+        down_read(&vcpu->kvm->slots_lock);
+        down_read(&current->mm->mmap_sem);
+        buffer.ptr = buffer.buf;
+        buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
+        buffer.processed = 0;
+        r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
+        if (r)
+                goto out;
+        while (buffer.len) {
+                r = kvm_pv_mmu_op_one(vcpu, &buffer);
+                if (r < 0)
+                        goto out;
+                if (r == 0)
+                        break;
+        }
+        r = 1;
+out:
+        *ret = buffer.processed;
+        up_read(&current->mm->mmap_sem);
+        up_read(&vcpu->kvm->slots_lock);
+        return r;
+}
 #ifdef AUDIT
 static const char *audit_msg;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03ba402c476a..63afca1c295f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_NR_MEMSLOTS:
                r = KVM_MEMORY_SLOTS;
                break;
+        case KVM_CAP_PV_MMU:
+                r = !tdp_enabled;
+                break;
        default:
                r = 0;
                break;
@@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
+static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
+                           unsigned long a1)
+{
+        if (is_long_mode(vcpu))
+                return a0;
+        else
+                return a0 | ((gpa_t)a1 << 32);
+}
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
+        int r = 1;
        kvm_x86_ops->cache_regs(vcpu);
@@ -2476,6 +2489,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        case KVM_HC_VAPIC_POLL_IRQ:
                ret = 0;
                break;
+        case KVM_HC_MMU_OP:
+                r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
+                break;
        default:
                ret = -KVM_ENOSYS;
                break;
@@ -2483,7 +2499,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        vcpu->arch.regs[VCPU_REGS_RAX] = ret;
        kvm_x86_ops->decache_regs(vcpu);
        ++vcpu->stat.hypercalls;
-        return 0;
+        return r;
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
author	Marcelo Tosatti <mtosatti@redhat.com>	2008-02-22 12:21:37 -0500
committer	Avi Kivity <avi@qumranet.com>	2008-04-27 05:00:27 -0400
commit	2f333bcb4edd8daef99dabe4e7df8277af73cff1 (patch)
tree	c984466e7756e0910bf470a094558b52bd10df33 /arch
parent	9f81128591ca1e9907f2e7a7b195e33232167d60 (diff)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 414405b6ec13..072e9422c914 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c
@@ -28,6 +28,7 @@
28	#include <linux/module.h>	28	#include <linux/module.h>
29	#include <linux/swap.h>	29	#include <linux/swap.h>
30	#include <linux/hugetlb.h>	30	#include <linux/hugetlb.h>
		31	#include <linux/compiler.h>
31		32
32	#include <asm/page.h>	33	#include <asm/page.h>
33	#include <asm/cmpxchg.h>	34	#include <asm/cmpxchg.h>
@@ -40,7 +41,7 @@
40	* 2. while doing 1. it walks guest-physical to host-physical	41	* 2. while doing 1. it walks guest-physical to host-physical
41	* If the hardware supports that we don't need to do shadow paging.	42	* If the hardware supports that we don't need to do shadow paging.
42	*/	43	*/
43	static bool tdp_enabled = false;	44	bool tdp_enabled = false;
44		45
45	#undef MMU_DEBUG	46	#undef MMU_DEBUG
46		47
@@ -167,6 +168,13 @@ static int dbg = 1;
167	#define ACC_USER_MASK PT_USER_MASK	168	#define ACC_USER_MASK PT_USER_MASK
168	#define ACC_ALL (ACC_EXEC_MASK \| ACC_WRITE_MASK \| ACC_USER_MASK)	169	#define ACC_ALL (ACC_EXEC_MASK \| ACC_WRITE_MASK \| ACC_USER_MASK)
169		170
		171	struct kvm_pv_mmu_op_buffer {
		172	void *ptr;
		173	unsigned len;
		174	unsigned processed;
		175	char buf[512] __aligned(sizeof(long));
		176	};
		177
170	struct kvm_rmap_desc {	178	struct kvm_rmap_desc {
171	u64 *shadow_ptes[RMAP_EXT];	179	u64 *shadow_ptes[RMAP_EXT];
172	struct kvm_rmap_desc *more;	180	struct kvm_rmap_desc *more;
@@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
2003	return nr_mmu_pages;	2011	return nr_mmu_pages;
2004	}	2012	}
2005		2013
		2014	static void pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer buffer,
		2015	unsigned len)
		2016	{
		2017	if (len > buffer->len)
		2018	return NULL;
		2019	return buffer->ptr;
		2020	}
		2021
		2022	static void pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer buffer,
		2023	unsigned len)
		2024	{
		2025	void *ret;
		2026
		2027	ret = pv_mmu_peek_buffer(buffer, len);
		2028	if (!ret)
		2029	return ret;
		2030	buffer->ptr += len;
		2031	buffer->len -= len;
		2032	buffer->processed += len;
		2033	return ret;
		2034	}
		2035
		2036	static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
		2037	gpa_t addr, gpa_t value)
		2038	{
		2039	int bytes = 8;
		2040	int r;
		2041
		2042	if (!is_long_mode(vcpu) && !is_pae(vcpu))
		2043	bytes = 4;
		2044
		2045	r = mmu_topup_memory_caches(vcpu);
		2046	if (r)
		2047	return r;
		2048
		2049	if (!__emulator_write_phys(vcpu, addr, &value, bytes))
		2050	return -EFAULT;
		2051
		2052	return 1;
		2053	}
		2054
		2055	static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
		2056	{
		2057	kvm_x86_ops->tlb_flush(vcpu);
		2058	return 1;
		2059	}
		2060
		2061	static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
		2062	{
		2063	spin_lock(&vcpu->kvm->mmu_lock);
		2064	mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
		2065	spin_unlock(&vcpu->kvm->mmu_lock);
		2066	return 1;
		2067	}
		2068
		2069	static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
		2070	struct kvm_pv_mmu_op_buffer *buffer)
		2071	{
		2072	struct kvm_mmu_op_header *header;
		2073
		2074	header = pv_mmu_peek_buffer(buffer, sizeof *header);
		2075	if (!header)
		2076	return 0;
		2077	switch (header->op) {
		2078	case KVM_MMU_OP_WRITE_PTE: {
		2079	struct kvm_mmu_op_write_pte *wpte;
		2080
		2081	wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
		2082	if (!wpte)
		2083	return 0;
		2084	return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
		2085	wpte->pte_val);
		2086	}
		2087	case KVM_MMU_OP_FLUSH_TLB: {
		2088	struct kvm_mmu_op_flush_tlb *ftlb;
		2089
		2090	ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
		2091	if (!ftlb)
		2092	return 0;
		2093	return kvm_pv_mmu_flush_tlb(vcpu);
		2094	}
		2095	case KVM_MMU_OP_RELEASE_PT: {
		2096	struct kvm_mmu_op_release_pt *rpt;
		2097
		2098	rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
		2099	if (!rpt)
		2100	return 0;
		2101	return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
		2102	}
		2103	default: return 0;
		2104	}
		2105	}
		2106
		2107	int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
		2108	gpa_t addr, unsigned long *ret)
		2109	{
		2110	int r;
		2111	struct kvm_pv_mmu_op_buffer buffer;
		2112
		2113	down_read(&vcpu->kvm->slots_lock);
		2114	down_read(&current->mm->mmap_sem);
		2115
		2116	buffer.ptr = buffer.buf;
		2117	buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
		2118	buffer.processed = 0;
		2119
		2120	r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
		2121	if (r)
		2122	goto out;
		2123
		2124	while (buffer.len) {
		2125	r = kvm_pv_mmu_op_one(vcpu, &buffer);
		2126	if (r < 0)
		2127	goto out;
		2128	if (r == 0)
		2129	break;
		2130	}
		2131
		2132	r = 1;
		2133	out:
		2134	*ret = buffer.processed;
		2135	up_read(&current->mm->mmap_sem);
		2136	up_read(&vcpu->kvm->slots_lock);
		2137	return r;
		2138	}
		2139
2006	#ifdef AUDIT	2140	#ifdef AUDIT
2007		2141
2008	static const char *audit_msg;	2142	static const char *audit_msg;


diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 03ba402c476a..63afca1c295f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c
@@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext)
832	case KVM_CAP_NR_MEMSLOTS:	832	case KVM_CAP_NR_MEMSLOTS:
833	r = KVM_MEMORY_SLOTS;	833	r = KVM_MEMORY_SLOTS;
834	break;	834	break;
		835	case KVM_CAP_PV_MMU:
		836	r = !tdp_enabled;
		837	break;
835	default:	838	default:
836	r = 0;	839	r = 0;
837	break;	840	break;
@@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2452	}	2455	}
2453	EXPORT_SYMBOL_GPL(kvm_emulate_halt);	2456	EXPORT_SYMBOL_GPL(kvm_emulate_halt);
2454		2457
		2458	static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
		2459	unsigned long a1)
		2460	{
		2461	if (is_long_mode(vcpu))
		2462	return a0;
		2463	else
		2464	return a0 \| ((gpa_t)a1 << 32);
		2465	}
		2466
2455	int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)	2467	int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2456	{	2468	{
2457	unsigned long nr, a0, a1, a2, a3, ret;	2469	unsigned long nr, a0, a1, a2, a3, ret;
		2470	int r = 1;
2458		2471
2459	kvm_x86_ops->cache_regs(vcpu);	2472	kvm_x86_ops->cache_regs(vcpu);
2460		2473
@@ -2476,6 +2489,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2476	case KVM_HC_VAPIC_POLL_IRQ:	2489	case KVM_HC_VAPIC_POLL_IRQ:
2477	ret = 0;	2490	ret = 0;
2478	break;	2491	break;
		2492	case KVM_HC_MMU_OP:
		2493	r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
		2494	break;
2479	default:	2495	default:
2480	ret = -KVM_ENOSYS;	2496	ret = -KVM_ENOSYS;
2481	break;	2497	break;
@@ -2483,7 +2499,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2483	vcpu->arch.regs[VCPU_REGS_RAX] = ret;	2499	vcpu->arch.regs[VCPU_REGS_RAX] = ret;
2484	kvm_x86_ops->decache_regs(vcpu);	2500	kvm_x86_ops->decache_regs(vcpu);
2485	++vcpu->stat.hypercalls;	2501	++vcpu->stat.hypercalls;
2486	return 0;	2502	return r;
2487	}	2503	}
2488	EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);	2504	EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
2489		2505