diff options
Diffstat (limited to 'arch/x86/kernel/kvm.c')
-rw-r--r-- | arch/x86/kernel/kvm.c | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c new file mode 100644 index 000000000000..8b7a3cf37d2b --- /dev/null +++ b/arch/x86/kernel/kvm.c | |||
@@ -0,0 +1,248 @@ | |||
1 | /* | ||
2 | * KVM paravirt_ops implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
17 | * | ||
18 | * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
19 | * Copyright IBM Corporation, 2007 | ||
20 | * Authors: Anthony Liguori <aliguori@us.ibm.com> | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/kvm_para.h> | ||
26 | #include <linux/cpu.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/highmem.h> | ||
29 | #include <linux/hardirq.h> | ||
30 | |||
31 | #define MMU_QUEUE_SIZE 1024 | ||
32 | |||
33 | struct kvm_para_state { | ||
34 | u8 mmu_queue[MMU_QUEUE_SIZE]; | ||
35 | int mmu_queue_len; | ||
36 | enum paravirt_lazy_mode mode; | ||
37 | }; | ||
38 | |||
39 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | ||
40 | |||
41 | static struct kvm_para_state *kvm_para_state(void) | ||
42 | { | ||
43 | return &per_cpu(para_state, raw_smp_processor_id()); | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * No need for any "IO delay" on KVM | ||
48 | */ | ||
49 | static void kvm_io_delay(void) | ||
50 | { | ||
51 | } | ||
52 | |||
53 | static void kvm_mmu_op(void *buffer, unsigned len) | ||
54 | { | ||
55 | int r; | ||
56 | unsigned long a1, a2; | ||
57 | |||
58 | do { | ||
59 | a1 = __pa(buffer); | ||
60 | a2 = 0; /* on i386 __pa() always returns <4G */ | ||
61 | r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); | ||
62 | buffer += r; | ||
63 | len -= r; | ||
64 | } while (len); | ||
65 | } | ||
66 | |||
67 | static void mmu_queue_flush(struct kvm_para_state *state) | ||
68 | { | ||
69 | if (state->mmu_queue_len) { | ||
70 | kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); | ||
71 | state->mmu_queue_len = 0; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | static void kvm_deferred_mmu_op(void *buffer, int len) | ||
76 | { | ||
77 | struct kvm_para_state *state = kvm_para_state(); | ||
78 | |||
79 | if (state->mode != PARAVIRT_LAZY_MMU) { | ||
80 | kvm_mmu_op(buffer, len); | ||
81 | return; | ||
82 | } | ||
83 | if (state->mmu_queue_len + len > sizeof state->mmu_queue) | ||
84 | mmu_queue_flush(state); | ||
85 | memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); | ||
86 | state->mmu_queue_len += len; | ||
87 | } | ||
88 | |||
89 | static void kvm_mmu_write(void *dest, u64 val) | ||
90 | { | ||
91 | __u64 pte_phys; | ||
92 | struct kvm_mmu_op_write_pte wpte; | ||
93 | |||
94 | #ifdef CONFIG_HIGHPTE | ||
95 | struct page *page; | ||
96 | unsigned long dst = (unsigned long) dest; | ||
97 | |||
98 | page = kmap_atomic_to_page(dest); | ||
99 | pte_phys = page_to_pfn(page); | ||
100 | pte_phys <<= PAGE_SHIFT; | ||
101 | pte_phys += (dst & ~(PAGE_MASK)); | ||
102 | #else | ||
103 | pte_phys = (unsigned long)__pa(dest); | ||
104 | #endif | ||
105 | wpte.header.op = KVM_MMU_OP_WRITE_PTE; | ||
106 | wpte.pte_val = val; | ||
107 | wpte.pte_phys = pte_phys; | ||
108 | |||
109 | kvm_deferred_mmu_op(&wpte, sizeof wpte); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * We only need to hook operations that are MMU writes. We hook these so that | ||
114 | * we can use lazy MMU mode to batch these operations. We could probably | ||
115 | * improve the performance of the host code if we used some of the information | ||
116 | * here to simplify processing of batched writes. | ||
117 | */ | ||
118 | static void kvm_set_pte(pte_t *ptep, pte_t pte) | ||
119 | { | ||
120 | kvm_mmu_write(ptep, pte_val(pte)); | ||
121 | } | ||
122 | |||
123 | static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
124 | pte_t *ptep, pte_t pte) | ||
125 | { | ||
126 | kvm_mmu_write(ptep, pte_val(pte)); | ||
127 | } | ||
128 | |||
129 | static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
130 | { | ||
131 | kvm_mmu_write(pmdp, pmd_val(pmd)); | ||
132 | } | ||
133 | |||
134 | #if PAGETABLE_LEVELS >= 3 | ||
135 | #ifdef CONFIG_X86_PAE | ||
136 | static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
137 | { | ||
138 | kvm_mmu_write(ptep, pte_val(pte)); | ||
139 | } | ||
140 | |||
141 | static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, | ||
142 | pte_t *ptep, pte_t pte) | ||
143 | { | ||
144 | kvm_mmu_write(ptep, pte_val(pte)); | ||
145 | } | ||
146 | |||
147 | static void kvm_pte_clear(struct mm_struct *mm, | ||
148 | unsigned long addr, pte_t *ptep) | ||
149 | { | ||
150 | kvm_mmu_write(ptep, 0); | ||
151 | } | ||
152 | |||
153 | static void kvm_pmd_clear(pmd_t *pmdp) | ||
154 | { | ||
155 | kvm_mmu_write(pmdp, 0); | ||
156 | } | ||
157 | #endif | ||
158 | |||
159 | static void kvm_set_pud(pud_t *pudp, pud_t pud) | ||
160 | { | ||
161 | kvm_mmu_write(pudp, pud_val(pud)); | ||
162 | } | ||
163 | |||
164 | #if PAGETABLE_LEVELS == 4 | ||
165 | static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) | ||
166 | { | ||
167 | kvm_mmu_write(pgdp, pgd_val(pgd)); | ||
168 | } | ||
169 | #endif | ||
170 | #endif /* PAGETABLE_LEVELS >= 3 */ | ||
171 | |||
172 | static void kvm_flush_tlb(void) | ||
173 | { | ||
174 | struct kvm_mmu_op_flush_tlb ftlb = { | ||
175 | .header.op = KVM_MMU_OP_FLUSH_TLB, | ||
176 | }; | ||
177 | |||
178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | ||
179 | } | ||
180 | |||
181 | static void kvm_release_pt(u32 pfn) | ||
182 | { | ||
183 | struct kvm_mmu_op_release_pt rpt = { | ||
184 | .header.op = KVM_MMU_OP_RELEASE_PT, | ||
185 | .pt_phys = (u64)pfn << PAGE_SHIFT, | ||
186 | }; | ||
187 | |||
188 | kvm_mmu_op(&rpt, sizeof rpt); | ||
189 | } | ||
190 | |||
191 | static void kvm_enter_lazy_mmu(void) | ||
192 | { | ||
193 | struct kvm_para_state *state = kvm_para_state(); | ||
194 | |||
195 | paravirt_enter_lazy_mmu(); | ||
196 | state->mode = paravirt_get_lazy_mode(); | ||
197 | } | ||
198 | |||
199 | static void kvm_leave_lazy_mmu(void) | ||
200 | { | ||
201 | struct kvm_para_state *state = kvm_para_state(); | ||
202 | |||
203 | mmu_queue_flush(state); | ||
204 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
205 | state->mode = paravirt_get_lazy_mode(); | ||
206 | } | ||
207 | |||
208 | static void paravirt_ops_setup(void) | ||
209 | { | ||
210 | pv_info.name = "KVM"; | ||
211 | pv_info.paravirt_enabled = 1; | ||
212 | |||
213 | if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) | ||
214 | pv_cpu_ops.io_delay = kvm_io_delay; | ||
215 | |||
216 | if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { | ||
217 | pv_mmu_ops.set_pte = kvm_set_pte; | ||
218 | pv_mmu_ops.set_pte_at = kvm_set_pte_at; | ||
219 | pv_mmu_ops.set_pmd = kvm_set_pmd; | ||
220 | #if PAGETABLE_LEVELS >= 3 | ||
221 | #ifdef CONFIG_X86_PAE | ||
222 | pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; | ||
223 | pv_mmu_ops.set_pte_present = kvm_set_pte_present; | ||
224 | pv_mmu_ops.pte_clear = kvm_pte_clear; | ||
225 | pv_mmu_ops.pmd_clear = kvm_pmd_clear; | ||
226 | #endif | ||
227 | pv_mmu_ops.set_pud = kvm_set_pud; | ||
228 | #if PAGETABLE_LEVELS == 4 | ||
229 | pv_mmu_ops.set_pgd = kvm_set_pgd; | ||
230 | #endif | ||
231 | #endif | ||
232 | pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; | ||
233 | pv_mmu_ops.release_pte = kvm_release_pt; | ||
234 | pv_mmu_ops.release_pmd = kvm_release_pt; | ||
235 | pv_mmu_ops.release_pud = kvm_release_pt; | ||
236 | |||
237 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | ||
238 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | ||
239 | } | ||
240 | } | ||
241 | |||
242 | void __init kvm_guest_init(void) | ||
243 | { | ||
244 | if (!kvm_para_available()) | ||
245 | return; | ||
246 | |||
247 | paravirt_ops_setup(); | ||
248 | } | ||