diff options
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r-- | drivers/kvm/mmu.c | 699 |
1 files changed, 699 insertions, 0 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c new file mode 100644 index 000000000000..4e29d9b7211c --- /dev/null +++ b/drivers/kvm/mmu.c | |||
@@ -0,0 +1,699 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine driver for Linux | ||
3 | * | ||
4 | * This module enables machines with Intel VT-x extensions to run virtual | ||
5 | * machines without emulation or binary translation. | ||
6 | * | ||
7 | * MMU support | ||
8 | * | ||
9 | * Copyright (C) 2006 Qumranet, Inc. | ||
10 | * | ||
11 | * Authors: | ||
12 | * Yaniv Kamay <yaniv@qumranet.com> | ||
13 | * Avi Kivity <avi@qumranet.com> | ||
14 | * | ||
15 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
16 | * the COPYING file in the top-level directory. | ||
17 | * | ||
18 | */ | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <asm/page.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/highmem.h> | ||
24 | #include <linux/module.h> | ||
25 | |||
26 | #include "vmx.h" | ||
27 | #include "kvm.h" | ||
28 | |||
29 | #define pgprintk(x...) do { } while (0) | ||
30 | |||
31 | #define ASSERT(x) \ | ||
32 | if (!(x)) { \ | ||
33 | printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ | ||
34 | __FILE__, __LINE__, #x); \ | ||
35 | } | ||
36 | |||
37 | #define PT64_ENT_PER_PAGE 512 | ||
38 | #define PT32_ENT_PER_PAGE 1024 | ||
39 | |||
40 | #define PT_WRITABLE_SHIFT 1 | ||
41 | |||
42 | #define PT_PRESENT_MASK (1ULL << 0) | ||
43 | #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) | ||
44 | #define PT_USER_MASK (1ULL << 2) | ||
45 | #define PT_PWT_MASK (1ULL << 3) | ||
46 | #define PT_PCD_MASK (1ULL << 4) | ||
47 | #define PT_ACCESSED_MASK (1ULL << 5) | ||
48 | #define PT_DIRTY_MASK (1ULL << 6) | ||
49 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | ||
50 | #define PT_PAT_MASK (1ULL << 7) | ||
51 | #define PT_GLOBAL_MASK (1ULL << 8) | ||
52 | #define PT64_NX_MASK (1ULL << 63) | ||
53 | |||
54 | #define PT_PAT_SHIFT 7 | ||
55 | #define PT_DIR_PAT_SHIFT 12 | ||
56 | #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) | ||
57 | |||
58 | #define PT32_DIR_PSE36_SIZE 4 | ||
59 | #define PT32_DIR_PSE36_SHIFT 13 | ||
60 | #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||
61 | |||
62 | |||
63 | #define PT32_PTE_COPY_MASK \ | ||
64 | (PT_PRESENT_MASK | PT_PWT_MASK | PT_PCD_MASK | \ | ||
65 | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAT_MASK | \ | ||
66 | PT_GLOBAL_MASK ) | ||
67 | |||
68 | #define PT32_NON_PTE_COPY_MASK \ | ||
69 | (PT_PRESENT_MASK | PT_PWT_MASK | PT_PCD_MASK | \ | ||
70 | PT_ACCESSED_MASK | PT_DIRTY_MASK) | ||
71 | |||
72 | |||
73 | #define PT64_PTE_COPY_MASK \ | ||
74 | (PT64_NX_MASK | PT32_PTE_COPY_MASK) | ||
75 | |||
76 | #define PT64_NON_PTE_COPY_MASK \ | ||
77 | (PT64_NX_MASK | PT32_NON_PTE_COPY_MASK) | ||
78 | |||
79 | |||
80 | |||
81 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | ||
82 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | ||
83 | |||
84 | #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
85 | #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
86 | |||
87 | #define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1) | ||
88 | #define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT) | ||
89 | |||
90 | #define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1) | ||
91 | #define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT)) | ||
92 | |||
93 | #define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT) | ||
94 | |||
95 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||
96 | |||
97 | #define PT64_LEVEL_BITS 9 | ||
98 | |||
99 | #define PT64_LEVEL_SHIFT(level) \ | ||
100 | ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS ) | ||
101 | |||
102 | #define PT64_LEVEL_MASK(level) \ | ||
103 | (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) | ||
104 | |||
105 | #define PT64_INDEX(address, level)\ | ||
106 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) | ||
107 | |||
108 | |||
109 | #define PT32_LEVEL_BITS 10 | ||
110 | |||
111 | #define PT32_LEVEL_SHIFT(level) \ | ||
112 | ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS ) | ||
113 | |||
114 | #define PT32_LEVEL_MASK(level) \ | ||
115 | (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) | ||
116 | |||
117 | #define PT32_INDEX(address, level)\ | ||
118 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) | ||
119 | |||
120 | |||
121 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK) | ||
122 | #define PT64_DIR_BASE_ADDR_MASK \ | ||
123 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) | ||
124 | |||
125 | #define PT32_BASE_ADDR_MASK PAGE_MASK | ||
126 | #define PT32_DIR_BASE_ADDR_MASK \ | ||
127 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1)) | ||
128 | |||
129 | |||
130 | #define PFERR_PRESENT_MASK (1U << 0) | ||
131 | #define PFERR_WRITE_MASK (1U << 1) | ||
132 | #define PFERR_USER_MASK (1U << 2) | ||
133 | |||
134 | #define PT64_ROOT_LEVEL 4 | ||
135 | #define PT32_ROOT_LEVEL 2 | ||
136 | #define PT32E_ROOT_LEVEL 3 | ||
137 | |||
138 | #define PT_DIRECTORY_LEVEL 2 | ||
139 | #define PT_PAGE_TABLE_LEVEL 1 | ||
140 | |||
141 | static int is_write_protection(struct kvm_vcpu *vcpu) | ||
142 | { | ||
143 | return vcpu->cr0 & CR0_WP_MASK; | ||
144 | } | ||
145 | |||
146 | static int is_cpuid_PSE36(void) | ||
147 | { | ||
148 | return 1; | ||
149 | } | ||
150 | |||
151 | static int is_present_pte(unsigned long pte) | ||
152 | { | ||
153 | return pte & PT_PRESENT_MASK; | ||
154 | } | ||
155 | |||
156 | static int is_writeble_pte(unsigned long pte) | ||
157 | { | ||
158 | return pte & PT_WRITABLE_MASK; | ||
159 | } | ||
160 | |||
161 | static int is_io_pte(unsigned long pte) | ||
162 | { | ||
163 | return pte & PT_SHADOW_IO_MARK; | ||
164 | } | ||
165 | |||
166 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) | ||
167 | { | ||
168 | struct kvm_mmu_page *page_head = page_header(page_hpa); | ||
169 | |||
170 | list_del(&page_head->link); | ||
171 | page_head->page_hpa = page_hpa; | ||
172 | list_add(&page_head->link, &vcpu->free_pages); | ||
173 | } | ||
174 | |||
175 | static int is_empty_shadow_page(hpa_t page_hpa) | ||
176 | { | ||
177 | u32 *pos; | ||
178 | u32 *end; | ||
179 | for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32); | ||
180 | pos != end; pos++) | ||
181 | if (*pos != 0) | ||
182 | return 0; | ||
183 | return 1; | ||
184 | } | ||
185 | |||
186 | static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte) | ||
187 | { | ||
188 | struct kvm_mmu_page *page; | ||
189 | |||
190 | if (list_empty(&vcpu->free_pages)) | ||
191 | return INVALID_PAGE; | ||
192 | |||
193 | page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); | ||
194 | list_del(&page->link); | ||
195 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); | ||
196 | ASSERT(is_empty_shadow_page(page->page_hpa)); | ||
197 | page->slot_bitmap = 0; | ||
198 | page->global = 1; | ||
199 | page->parent_pte = parent_pte; | ||
200 | return page->page_hpa; | ||
201 | } | ||
202 | |||
203 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) | ||
204 | { | ||
205 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); | ||
206 | struct kvm_mmu_page *page_head = page_header(__pa(pte)); | ||
207 | |||
208 | __set_bit(slot, &page_head->slot_bitmap); | ||
209 | } | ||
210 | |||
211 | hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | ||
212 | { | ||
213 | hpa_t hpa = gpa_to_hpa(vcpu, gpa); | ||
214 | |||
215 | return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa; | ||
216 | } | ||
217 | |||
218 | hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) | ||
219 | { | ||
220 | struct kvm_memory_slot *slot; | ||
221 | struct page *page; | ||
222 | |||
223 | ASSERT((gpa & HPA_ERR_MASK) == 0); | ||
224 | slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
225 | if (!slot) | ||
226 | return gpa | HPA_ERR_MASK; | ||
227 | page = gfn_to_page(slot, gpa >> PAGE_SHIFT); | ||
228 | return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | ||
229 | | (gpa & (PAGE_SIZE-1)); | ||
230 | } | ||
231 | |||
232 | hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) | ||
233 | { | ||
234 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | ||
235 | |||
236 | if (gpa == UNMAPPED_GVA) | ||
237 | return UNMAPPED_GVA; | ||
238 | return gpa_to_hpa(vcpu, gpa); | ||
239 | } | ||
240 | |||
241 | |||
242 | static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa, | ||
243 | int level) | ||
244 | { | ||
245 | ASSERT(vcpu); | ||
246 | ASSERT(VALID_PAGE(page_hpa)); | ||
247 | ASSERT(level <= PT64_ROOT_LEVEL && level > 0); | ||
248 | |||
249 | if (level == 1) | ||
250 | memset(__va(page_hpa), 0, PAGE_SIZE); | ||
251 | else { | ||
252 | u64 *pos; | ||
253 | u64 *end; | ||
254 | |||
255 | for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE; | ||
256 | pos != end; pos++) { | ||
257 | u64 current_ent = *pos; | ||
258 | |||
259 | *pos = 0; | ||
260 | if (is_present_pte(current_ent)) | ||
261 | release_pt_page_64(vcpu, | ||
262 | current_ent & | ||
263 | PT64_BASE_ADDR_MASK, | ||
264 | level - 1); | ||
265 | } | ||
266 | } | ||
267 | kvm_mmu_free_page(vcpu, page_hpa); | ||
268 | } | ||
269 | |||
270 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | ||
271 | { | ||
272 | } | ||
273 | |||
274 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | ||
275 | { | ||
276 | int level = PT32E_ROOT_LEVEL; | ||
277 | hpa_t table_addr = vcpu->mmu.root_hpa; | ||
278 | |||
279 | for (; ; level--) { | ||
280 | u32 index = PT64_INDEX(v, level); | ||
281 | u64 *table; | ||
282 | |||
283 | ASSERT(VALID_PAGE(table_addr)); | ||
284 | table = __va(table_addr); | ||
285 | |||
286 | if (level == 1) { | ||
287 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); | ||
288 | page_header_update_slot(vcpu->kvm, table, v); | ||
289 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | | ||
290 | PT_USER_MASK; | ||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | if (table[index] == 0) { | ||
295 | hpa_t new_table = kvm_mmu_alloc_page(vcpu, | ||
296 | &table[index]); | ||
297 | |||
298 | if (!VALID_PAGE(new_table)) { | ||
299 | pgprintk("nonpaging_map: ENOMEM\n"); | ||
300 | return -ENOMEM; | ||
301 | } | ||
302 | |||
303 | if (level == PT32E_ROOT_LEVEL) | ||
304 | table[index] = new_table | PT_PRESENT_MASK; | ||
305 | else | ||
306 | table[index] = new_table | PT_PRESENT_MASK | | ||
307 | PT_WRITABLE_MASK | PT_USER_MASK; | ||
308 | } | ||
309 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | static void nonpaging_flush(struct kvm_vcpu *vcpu) | ||
314 | { | ||
315 | hpa_t root = vcpu->mmu.root_hpa; | ||
316 | |||
317 | ++kvm_stat.tlb_flush; | ||
318 | pgprintk("nonpaging_flush\n"); | ||
319 | ASSERT(VALID_PAGE(root)); | ||
320 | release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); | ||
321 | root = kvm_mmu_alloc_page(vcpu, NULL); | ||
322 | ASSERT(VALID_PAGE(root)); | ||
323 | vcpu->mmu.root_hpa = root; | ||
324 | if (is_paging(vcpu)) | ||
325 | root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)); | ||
326 | kvm_arch_ops->set_cr3(vcpu, root); | ||
327 | kvm_arch_ops->tlb_flush(vcpu); | ||
328 | } | ||
329 | |||
330 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | ||
331 | { | ||
332 | return vaddr; | ||
333 | } | ||
334 | |||
335 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | ||
336 | u32 error_code) | ||
337 | { | ||
338 | int ret; | ||
339 | gpa_t addr = gva; | ||
340 | |||
341 | ASSERT(vcpu); | ||
342 | ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); | ||
343 | |||
344 | for (;;) { | ||
345 | hpa_t paddr; | ||
346 | |||
347 | paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK); | ||
348 | |||
349 | if (is_error_hpa(paddr)) | ||
350 | return 1; | ||
351 | |||
352 | ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr); | ||
353 | if (ret) { | ||
354 | nonpaging_flush(vcpu); | ||
355 | continue; | ||
356 | } | ||
357 | break; | ||
358 | } | ||
359 | return ret; | ||
360 | } | ||
361 | |||
362 | static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) | ||
363 | { | ||
364 | } | ||
365 | |||
366 | static void nonpaging_free(struct kvm_vcpu *vcpu) | ||
367 | { | ||
368 | hpa_t root; | ||
369 | |||
370 | ASSERT(vcpu); | ||
371 | root = vcpu->mmu.root_hpa; | ||
372 | if (VALID_PAGE(root)) | ||
373 | release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); | ||
374 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
375 | } | ||
376 | |||
377 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) | ||
378 | { | ||
379 | struct kvm_mmu *context = &vcpu->mmu; | ||
380 | |||
381 | context->new_cr3 = nonpaging_new_cr3; | ||
382 | context->page_fault = nonpaging_page_fault; | ||
383 | context->inval_page = nonpaging_inval_page; | ||
384 | context->gva_to_gpa = nonpaging_gva_to_gpa; | ||
385 | context->free = nonpaging_free; | ||
386 | context->root_level = PT32E_ROOT_LEVEL; | ||
387 | context->shadow_root_level = PT32E_ROOT_LEVEL; | ||
388 | context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | ||
389 | ASSERT(VALID_PAGE(context->root_hpa)); | ||
390 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa); | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | |||
395 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | ||
396 | { | ||
397 | struct kvm_mmu_page *page, *npage; | ||
398 | |||
399 | list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages, | ||
400 | link) { | ||
401 | if (page->global) | ||
402 | continue; | ||
403 | |||
404 | if (!page->parent_pte) | ||
405 | continue; | ||
406 | |||
407 | *page->parent_pte = 0; | ||
408 | release_pt_page_64(vcpu, page->page_hpa, 1); | ||
409 | } | ||
410 | ++kvm_stat.tlb_flush; | ||
411 | kvm_arch_ops->tlb_flush(vcpu); | ||
412 | } | ||
413 | |||
414 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | ||
415 | { | ||
416 | kvm_mmu_flush_tlb(vcpu); | ||
417 | } | ||
418 | |||
419 | static void mark_pagetable_nonglobal(void *shadow_pte) | ||
420 | { | ||
421 | page_header(__pa(shadow_pte))->global = 0; | ||
422 | } | ||
423 | |||
424 | static inline void set_pte_common(struct kvm_vcpu *vcpu, | ||
425 | u64 *shadow_pte, | ||
426 | gpa_t gaddr, | ||
427 | int dirty, | ||
428 | u64 access_bits) | ||
429 | { | ||
430 | hpa_t paddr; | ||
431 | |||
432 | *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; | ||
433 | if (!dirty) | ||
434 | access_bits &= ~PT_WRITABLE_MASK; | ||
435 | |||
436 | if (access_bits & PT_WRITABLE_MASK) | ||
437 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | ||
438 | |||
439 | *shadow_pte |= access_bits; | ||
440 | |||
441 | paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | ||
442 | |||
443 | if (!(*shadow_pte & PT_GLOBAL_MASK)) | ||
444 | mark_pagetable_nonglobal(shadow_pte); | ||
445 | |||
446 | if (is_error_hpa(paddr)) { | ||
447 | *shadow_pte |= gaddr; | ||
448 | *shadow_pte |= PT_SHADOW_IO_MARK; | ||
449 | *shadow_pte &= ~PT_PRESENT_MASK; | ||
450 | } else { | ||
451 | *shadow_pte |= paddr; | ||
452 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | ||
453 | } | ||
454 | } | ||
455 | |||
456 | static void inject_page_fault(struct kvm_vcpu *vcpu, | ||
457 | u64 addr, | ||
458 | u32 err_code) | ||
459 | { | ||
460 | kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); | ||
461 | } | ||
462 | |||
463 | static inline int fix_read_pf(u64 *shadow_ent) | ||
464 | { | ||
465 | if ((*shadow_ent & PT_SHADOW_USER_MASK) && | ||
466 | !(*shadow_ent & PT_USER_MASK)) { | ||
467 | /* | ||
468 | * If supervisor write protect is disabled, we shadow kernel | ||
469 | * pages as user pages so we can trap the write access. | ||
470 | */ | ||
471 | *shadow_ent |= PT_USER_MASK; | ||
472 | *shadow_ent &= ~PT_WRITABLE_MASK; | ||
473 | |||
474 | return 1; | ||
475 | |||
476 | } | ||
477 | return 0; | ||
478 | } | ||
479 | |||
480 | static int may_access(u64 pte, int write, int user) | ||
481 | { | ||
482 | |||
483 | if (user && !(pte & PT_USER_MASK)) | ||
484 | return 0; | ||
485 | if (write && !(pte & PT_WRITABLE_MASK)) | ||
486 | return 0; | ||
487 | return 1; | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * Remove a shadow pte. | ||
492 | */ | ||
493 | static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) | ||
494 | { | ||
495 | hpa_t page_addr = vcpu->mmu.root_hpa; | ||
496 | int level = vcpu->mmu.shadow_root_level; | ||
497 | |||
498 | ++kvm_stat.invlpg; | ||
499 | |||
500 | for (; ; level--) { | ||
501 | u32 index = PT64_INDEX(addr, level); | ||
502 | u64 *table = __va(page_addr); | ||
503 | |||
504 | if (level == PT_PAGE_TABLE_LEVEL ) { | ||
505 | table[index] = 0; | ||
506 | return; | ||
507 | } | ||
508 | |||
509 | if (!is_present_pte(table[index])) | ||
510 | return; | ||
511 | |||
512 | page_addr = table[index] & PT64_BASE_ADDR_MASK; | ||
513 | |||
514 | if (level == PT_DIRECTORY_LEVEL && | ||
515 | (table[index] & PT_SHADOW_PS_MARK)) { | ||
516 | table[index] = 0; | ||
517 | release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL); | ||
518 | |||
519 | kvm_arch_ops->tlb_flush(vcpu); | ||
520 | return; | ||
521 | } | ||
522 | } | ||
523 | } | ||
524 | |||
525 | static void paging_free(struct kvm_vcpu *vcpu) | ||
526 | { | ||
527 | nonpaging_free(vcpu); | ||
528 | } | ||
529 | |||
530 | #define PTTYPE 64 | ||
531 | #include "paging_tmpl.h" | ||
532 | #undef PTTYPE | ||
533 | |||
534 | #define PTTYPE 32 | ||
535 | #include "paging_tmpl.h" | ||
536 | #undef PTTYPE | ||
537 | |||
538 | static int paging64_init_context(struct kvm_vcpu *vcpu) | ||
539 | { | ||
540 | struct kvm_mmu *context = &vcpu->mmu; | ||
541 | |||
542 | ASSERT(is_pae(vcpu)); | ||
543 | context->new_cr3 = paging_new_cr3; | ||
544 | context->page_fault = paging64_page_fault; | ||
545 | context->inval_page = paging_inval_page; | ||
546 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
547 | context->free = paging_free; | ||
548 | context->root_level = PT64_ROOT_LEVEL; | ||
549 | context->shadow_root_level = PT64_ROOT_LEVEL; | ||
550 | context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | ||
551 | ASSERT(VALID_PAGE(context->root_hpa)); | ||
552 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | ||
553 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | ||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int paging32_init_context(struct kvm_vcpu *vcpu) | ||
558 | { | ||
559 | struct kvm_mmu *context = &vcpu->mmu; | ||
560 | |||
561 | context->new_cr3 = paging_new_cr3; | ||
562 | context->page_fault = paging32_page_fault; | ||
563 | context->inval_page = paging_inval_page; | ||
564 | context->gva_to_gpa = paging32_gva_to_gpa; | ||
565 | context->free = paging_free; | ||
566 | context->root_level = PT32_ROOT_LEVEL; | ||
567 | context->shadow_root_level = PT32E_ROOT_LEVEL; | ||
568 | context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); | ||
569 | ASSERT(VALID_PAGE(context->root_hpa)); | ||
570 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | ||
571 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
575 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | ||
576 | { | ||
577 | int ret; | ||
578 | |||
579 | if ((ret = paging64_init_context(vcpu))) | ||
580 | return ret; | ||
581 | |||
582 | vcpu->mmu.root_level = PT32E_ROOT_LEVEL; | ||
583 | vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL; | ||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | ||
588 | { | ||
589 | ASSERT(vcpu); | ||
590 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | ||
591 | |||
592 | if (!is_paging(vcpu)) | ||
593 | return nonpaging_init_context(vcpu); | ||
594 | else if (kvm_arch_ops->is_long_mode(vcpu)) | ||
595 | return paging64_init_context(vcpu); | ||
596 | else if (is_pae(vcpu)) | ||
597 | return paging32E_init_context(vcpu); | ||
598 | else | ||
599 | return paging32_init_context(vcpu); | ||
600 | } | ||
601 | |||
602 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | ||
603 | { | ||
604 | ASSERT(vcpu); | ||
605 | if (VALID_PAGE(vcpu->mmu.root_hpa)) { | ||
606 | vcpu->mmu.free(vcpu); | ||
607 | vcpu->mmu.root_hpa = INVALID_PAGE; | ||
608 | } | ||
609 | } | ||
610 | |||
611 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | ||
612 | { | ||
613 | destroy_kvm_mmu(vcpu); | ||
614 | return init_kvm_mmu(vcpu); | ||
615 | } | ||
616 | |||
617 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | ||
618 | { | ||
619 | while (!list_empty(&vcpu->free_pages)) { | ||
620 | struct kvm_mmu_page *page; | ||
621 | |||
622 | page = list_entry(vcpu->free_pages.next, | ||
623 | struct kvm_mmu_page, link); | ||
624 | list_del(&page->link); | ||
625 | __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); | ||
626 | page->page_hpa = INVALID_PAGE; | ||
627 | } | ||
628 | } | ||
629 | |||
630 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | ||
631 | { | ||
632 | int i; | ||
633 | |||
634 | ASSERT(vcpu); | ||
635 | |||
636 | for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { | ||
637 | struct page *page; | ||
638 | struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; | ||
639 | |||
640 | INIT_LIST_HEAD(&page_header->link); | ||
641 | if ((page = alloc_page(GFP_KVM_MMU)) == NULL) | ||
642 | goto error_1; | ||
643 | page->private = (unsigned long)page_header; | ||
644 | page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; | ||
645 | memset(__va(page_header->page_hpa), 0, PAGE_SIZE); | ||
646 | list_add(&page_header->link, &vcpu->free_pages); | ||
647 | } | ||
648 | return 0; | ||
649 | |||
650 | error_1: | ||
651 | free_mmu_pages(vcpu); | ||
652 | return -ENOMEM; | ||
653 | } | ||
654 | |||
655 | int kvm_mmu_init(struct kvm_vcpu *vcpu) | ||
656 | { | ||
657 | int r; | ||
658 | |||
659 | ASSERT(vcpu); | ||
660 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | ||
661 | ASSERT(list_empty(&vcpu->free_pages)); | ||
662 | |||
663 | if ((r = alloc_mmu_pages(vcpu))) | ||
664 | return r; | ||
665 | |||
666 | if ((r = init_kvm_mmu(vcpu))) { | ||
667 | free_mmu_pages(vcpu); | ||
668 | return r; | ||
669 | } | ||
670 | return 0; | ||
671 | } | ||
672 | |||
673 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | ||
674 | { | ||
675 | ASSERT(vcpu); | ||
676 | |||
677 | destroy_kvm_mmu(vcpu); | ||
678 | free_mmu_pages(vcpu); | ||
679 | } | ||
680 | |||
681 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | ||
682 | { | ||
683 | struct kvm_mmu_page *page; | ||
684 | |||
685 | list_for_each_entry(page, &kvm->active_mmu_pages, link) { | ||
686 | int i; | ||
687 | u64 *pt; | ||
688 | |||
689 | if (!test_bit(slot, &page->slot_bitmap)) | ||
690 | continue; | ||
691 | |||
692 | pt = __va(page->page_hpa); | ||
693 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
694 | /* avoid RMW */ | ||
695 | if (pt[i] & PT_WRITABLE_MASK) | ||
696 | pt[i] &= ~PT_WRITABLE_MASK; | ||
697 | |||
698 | } | ||
699 | } | ||