aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2008-02-22 12:21:37 -0500
committerAvi Kivity <avi@qumranet.com>2008-04-27 05:00:27 -0400
commit2f333bcb4edd8daef99dabe4e7df8277af73cff1 (patch)
treec984466e7756e0910bf470a094558b52bd10df33
parent9f81128591ca1e9907f2e7a7b195e33232167d60 (diff)
KVM: MMU: hypercall based pte updates and TLB flushes
Hypercall based pte updates are faster than faults, and also allow use of the lazy MMU mode to batch operations. Don't report the feature if two dimensional paging is enabled. [avi: - one mmu_op hypercall instead of one per op - allow 64-bit gpa on hypercall - don't pass host errors (-ENOMEM) to guest] [akpm: warning fix on i386] Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--arch/x86/kvm/mmu.c136
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--include/asm-x86/kvm_host.h4
-rw-r--r--include/asm-x86/kvm_para.h29
-rw-r--r--include/linux/kvm.h1
-rw-r--r--include/linux/kvm_para.h5
6 files changed, 190 insertions, 3 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 414405b6ec13..072e9422c914 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -28,6 +28,7 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/swap.h> 29#include <linux/swap.h>
30#include <linux/hugetlb.h> 30#include <linux/hugetlb.h>
31#include <linux/compiler.h>
31 32
32#include <asm/page.h> 33#include <asm/page.h>
33#include <asm/cmpxchg.h> 34#include <asm/cmpxchg.h>
@@ -40,7 +41,7 @@
40 * 2. while doing 1. it walks guest-physical to host-physical 41 * 2. while doing 1. it walks guest-physical to host-physical
41 * If the hardware supports that we don't need to do shadow paging. 42 * If the hardware supports that we don't need to do shadow paging.
42 */ 43 */
43static bool tdp_enabled = false; 44bool tdp_enabled = false;
44 45
45#undef MMU_DEBUG 46#undef MMU_DEBUG
46 47
@@ -167,6 +168,13 @@ static int dbg = 1;
167#define ACC_USER_MASK PT_USER_MASK 168#define ACC_USER_MASK PT_USER_MASK
168#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) 169#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
169 170
171struct kvm_pv_mmu_op_buffer {
172 void *ptr;
173 unsigned len;
174 unsigned processed;
175 char buf[512] __aligned(sizeof(long));
176};
177
170struct kvm_rmap_desc { 178struct kvm_rmap_desc {
171 u64 *shadow_ptes[RMAP_EXT]; 179 u64 *shadow_ptes[RMAP_EXT];
172 struct kvm_rmap_desc *more; 180 struct kvm_rmap_desc *more;
@@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
2003 return nr_mmu_pages; 2011 return nr_mmu_pages;
2004} 2012}
2005 2013
2014static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
2015 unsigned len)
2016{
2017 if (len > buffer->len)
2018 return NULL;
2019 return buffer->ptr;
2020}
2021
2022static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
2023 unsigned len)
2024{
2025 void *ret;
2026
2027 ret = pv_mmu_peek_buffer(buffer, len);
2028 if (!ret)
2029 return ret;
2030 buffer->ptr += len;
2031 buffer->len -= len;
2032 buffer->processed += len;
2033 return ret;
2034}
2035
2036static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
2037 gpa_t addr, gpa_t value)
2038{
2039 int bytes = 8;
2040 int r;
2041
2042 if (!is_long_mode(vcpu) && !is_pae(vcpu))
2043 bytes = 4;
2044
2045 r = mmu_topup_memory_caches(vcpu);
2046 if (r)
2047 return r;
2048
2049 if (!__emulator_write_phys(vcpu, addr, &value, bytes))
2050 return -EFAULT;
2051
2052 return 1;
2053}
2054
2055static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
2056{
2057 kvm_x86_ops->tlb_flush(vcpu);
2058 return 1;
2059}
2060
2061static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
2062{
2063 spin_lock(&vcpu->kvm->mmu_lock);
2064 mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
2065 spin_unlock(&vcpu->kvm->mmu_lock);
2066 return 1;
2067}
2068
2069static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
2070 struct kvm_pv_mmu_op_buffer *buffer)
2071{
2072 struct kvm_mmu_op_header *header;
2073
2074 header = pv_mmu_peek_buffer(buffer, sizeof *header);
2075 if (!header)
2076 return 0;
2077 switch (header->op) {
2078 case KVM_MMU_OP_WRITE_PTE: {
2079 struct kvm_mmu_op_write_pte *wpte;
2080
2081 wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
2082 if (!wpte)
2083 return 0;
2084 return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
2085 wpte->pte_val);
2086 }
2087 case KVM_MMU_OP_FLUSH_TLB: {
2088 struct kvm_mmu_op_flush_tlb *ftlb;
2089
2090 ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
2091 if (!ftlb)
2092 return 0;
2093 return kvm_pv_mmu_flush_tlb(vcpu);
2094 }
2095 case KVM_MMU_OP_RELEASE_PT: {
2096 struct kvm_mmu_op_release_pt *rpt;
2097
2098 rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
2099 if (!rpt)
2100 return 0;
2101 return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
2102 }
2103 default: return 0;
2104 }
2105}
2106
2107int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
2108 gpa_t addr, unsigned long *ret)
2109{
2110 int r;
2111 struct kvm_pv_mmu_op_buffer buffer;
2112
2113 down_read(&vcpu->kvm->slots_lock);
2114 down_read(&current->mm->mmap_sem);
2115
2116 buffer.ptr = buffer.buf;
2117 buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
2118 buffer.processed = 0;
2119
2120 r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
2121 if (r)
2122 goto out;
2123
2124 while (buffer.len) {
2125 r = kvm_pv_mmu_op_one(vcpu, &buffer);
2126 if (r < 0)
2127 goto out;
2128 if (r == 0)
2129 break;
2130 }
2131
2132 r = 1;
2133out:
2134 *ret = buffer.processed;
2135 up_read(&current->mm->mmap_sem);
2136 up_read(&vcpu->kvm->slots_lock);
2137 return r;
2138}
2139
2006#ifdef AUDIT 2140#ifdef AUDIT
2007 2141
2008static const char *audit_msg; 2142static const char *audit_msg;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03ba402c476a..63afca1c295f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -832,6 +832,9 @@ int kvm_dev_ioctl_check_extension(long ext)
832 case KVM_CAP_NR_MEMSLOTS: 832 case KVM_CAP_NR_MEMSLOTS:
833 r = KVM_MEMORY_SLOTS; 833 r = KVM_MEMORY_SLOTS;
834 break; 834 break;
835 case KVM_CAP_PV_MMU:
836 r = !tdp_enabled;
837 break;
835 default: 838 default:
836 r = 0; 839 r = 0;
837 break; 840 break;
@@ -2452,9 +2455,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2452} 2455}
2453EXPORT_SYMBOL_GPL(kvm_emulate_halt); 2456EXPORT_SYMBOL_GPL(kvm_emulate_halt);
2454 2457
2458static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
2459 unsigned long a1)
2460{
2461 if (is_long_mode(vcpu))
2462 return a0;
2463 else
2464 return a0 | ((gpa_t)a1 << 32);
2465}
2466
2455int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 2467int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2456{ 2468{
2457 unsigned long nr, a0, a1, a2, a3, ret; 2469 unsigned long nr, a0, a1, a2, a3, ret;
2470 int r = 1;
2458 2471
2459 kvm_x86_ops->cache_regs(vcpu); 2472 kvm_x86_ops->cache_regs(vcpu);
2460 2473
@@ -2476,6 +2489,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2476 case KVM_HC_VAPIC_POLL_IRQ: 2489 case KVM_HC_VAPIC_POLL_IRQ:
2477 ret = 0; 2490 ret = 0;
2478 break; 2491 break;
2492 case KVM_HC_MMU_OP:
2493 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
2494 break;
2479 default: 2495 default:
2480 ret = -KVM_ENOSYS; 2496 ret = -KVM_ENOSYS;
2481 break; 2497 break;
@@ -2483,7 +2499,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2483 vcpu->arch.regs[VCPU_REGS_RAX] = ret; 2499 vcpu->arch.regs[VCPU_REGS_RAX] = ret;
2484 kvm_x86_ops->decache_regs(vcpu); 2500 kvm_x86_ops->decache_regs(vcpu);
2485 ++vcpu->stat.hypercalls; 2501 ++vcpu->stat.hypercalls;
2486 return 0; 2502 return r;
2487} 2503}
2488EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 2504EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
2489 2505
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 99d31f5ed9ff..772ba95f0a0e 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -434,6 +434,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
434 434
435int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 435int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
436 const void *val, int bytes); 436 const void *val, int bytes);
437int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
438 gpa_t addr, unsigned long *ret);
439
440extern bool tdp_enabled;
437 441
438enum emulation_result { 442enum emulation_result {
439 EMULATE_DONE, /* no further processing */ 443 EMULATE_DONE, /* no further processing */
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index ed5df3a54aab..509845942070 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -12,10 +12,39 @@
12#define KVM_CPUID_FEATURES 0x40000001 12#define KVM_CPUID_FEATURES 0x40000001
13#define KVM_FEATURE_CLOCKSOURCE 0 13#define KVM_FEATURE_CLOCKSOURCE 0
14#define KVM_FEATURE_NOP_IO_DELAY 1 14#define KVM_FEATURE_NOP_IO_DELAY 1
15#define KVM_FEATURE_MMU_OP 2
15 16
16#define MSR_KVM_WALL_CLOCK 0x11 17#define MSR_KVM_WALL_CLOCK 0x11
17#define MSR_KVM_SYSTEM_TIME 0x12 18#define MSR_KVM_SYSTEM_TIME 0x12
18 19
20#define KVM_MAX_MMU_OP_BATCH 32
21
22/* Operations for KVM_HC_MMU_OP */
23#define KVM_MMU_OP_WRITE_PTE 1
24#define KVM_MMU_OP_FLUSH_TLB 2
25#define KVM_MMU_OP_RELEASE_PT 3
26
27/* Payload for KVM_HC_MMU_OP */
28struct kvm_mmu_op_header {
29 __u32 op;
30 __u32 pad;
31};
32
33struct kvm_mmu_op_write_pte {
34 struct kvm_mmu_op_header header;
35 __u64 pte_phys;
36 __u64 pte_val;
37};
38
39struct kvm_mmu_op_flush_tlb {
40 struct kvm_mmu_op_header header;
41};
42
43struct kvm_mmu_op_release_pt {
44 struct kvm_mmu_op_header header;
45 __u64 pt_phys;
46};
47
19#ifdef __KERNEL__ 48#ifdef __KERNEL__
20#include <asm/processor.h> 49#include <asm/processor.h>
21 50
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 76f09474be98..c1b502a50a01 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -238,6 +238,7 @@ struct kvm_vapic_addr {
238#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ 238#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
239#define KVM_CAP_PIT 11 239#define KVM_CAP_PIT 11
240#define KVM_CAP_NOP_IO_DELAY 12 240#define KVM_CAP_NOP_IO_DELAY 12
241#define KVM_CAP_PV_MMU 13
241 242
242/* 243/*
243 * ioctls for VM fds 244 * ioctls for VM fds
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 9c462c91a6b1..3ddce03766ca 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -11,8 +11,11 @@
11 11
12/* Return values for hypercalls */ 12/* Return values for hypercalls */
13#define KVM_ENOSYS 1000 13#define KVM_ENOSYS 1000
14#define KVM_EFAULT EFAULT
15#define KVM_E2BIG E2BIG
14 16
15#define KVM_HC_VAPIC_POLL_IRQ 1 17#define KVM_HC_VAPIC_POLL_IRQ 1
18#define KVM_HC_MMU_OP 2
16 19
17/* 20/*
18 * hypercalls use architecture specific 21 * hypercalls use architecture specific