diff options
author | Avi Kivity <avi@qumranet.com> | 2007-01-05 19:36:43 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2007-01-06 02:55:24 -0500 |
commit | cea0f0e7ea54753c3265dc77f605a6dad1912cfc (patch) | |
tree | e0a3e64b45fe83f1f0ae89556e1f6fcf92f07185 /drivers/kvm/mmu.c | |
parent | 25c0de2cc6c26cb99553c2444936a7951c120c09 (diff) |
[PATCH] KVM: MMU: Shadow page table caching
Define a hashtable for caching shadow page tables. Look up the cache on
context switch (cr3 change) or during page faults.
The key to the cache is a combination of
- the guest page table frame number
- the number of paging levels in the guest
* we can cache real mode, 32-bit mode, pae, and long mode page
tables simultaneously. this is useful for smp bootup.
- the guest page table table
* some kernels use a page as both a page table and a page directory. this
allows multiple shadow pages to exist for that page, one per level
- the "quadrant"
* 32-bit mode page tables span 4MB, whereas a shadow page table spans
2MB. similarly, a 32-bit page directory spans 4GB, while a shadow
page directory spans 1GB. the quadrant allows caching up to 4 shadow page
tables for one guest page in one level.
- a "metaphysical" bit
* for real mode, and for pse pages, there is no guest page table, so set
the bit to avoid write protecting the page.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r-- | drivers/kvm/mmu.c | 207 |
1 files changed, 183 insertions, 24 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index da4d7ddb9bdc..47c699c21c08 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -26,8 +26,8 @@ | |||
26 | #include "vmx.h" | 26 | #include "vmx.h" |
27 | #include "kvm.h" | 27 | #include "kvm.h" |
28 | 28 | ||
29 | #define pgprintk(x...) do { } while (0) | 29 | #define pgprintk(x...) do { printk(x); } while (0) |
30 | #define rmap_printk(x...) do { } while (0) | 30 | #define rmap_printk(x...) do { printk(x); } while (0) |
31 | 31 | ||
32 | #define ASSERT(x) \ | 32 | #define ASSERT(x) \ |
33 | if (!(x)) { \ | 33 | if (!(x)) { \ |
@@ -35,8 +35,10 @@ | |||
35 | __FILE__, __LINE__, #x); \ | 35 | __FILE__, __LINE__, #x); \ |
36 | } | 36 | } |
37 | 37 | ||
38 | #define PT64_ENT_PER_PAGE 512 | 38 | #define PT64_PT_BITS 9 |
39 | #define PT32_ENT_PER_PAGE 1024 | 39 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
40 | #define PT32_PT_BITS 10 | ||
41 | #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) | ||
40 | 42 | ||
41 | #define PT_WRITABLE_SHIFT 1 | 43 | #define PT_WRITABLE_SHIFT 1 |
42 | 44 | ||
@@ -292,6 +294,11 @@ static int is_empty_shadow_page(hpa_t page_hpa) | |||
292 | return 1; | 294 | return 1; |
293 | } | 295 | } |
294 | 296 | ||
297 | static unsigned kvm_page_table_hashfn(gfn_t gfn) | ||
298 | { | ||
299 | return gfn; | ||
300 | } | ||
301 | |||
295 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 302 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
296 | u64 *parent_pte) | 303 | u64 *parent_pte) |
297 | { | 304 | { |
@@ -306,10 +313,147 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
306 | ASSERT(is_empty_shadow_page(page->page_hpa)); | 313 | ASSERT(is_empty_shadow_page(page->page_hpa)); |
307 | page->slot_bitmap = 0; | 314 | page->slot_bitmap = 0; |
308 | page->global = 1; | 315 | page->global = 1; |
316 | page->multimapped = 0; | ||
309 | page->parent_pte = parent_pte; | 317 | page->parent_pte = parent_pte; |
310 | return page; | 318 | return page; |
311 | } | 319 | } |
312 | 320 | ||
321 | static void mmu_page_add_parent_pte(struct kvm_mmu_page *page, u64 *parent_pte) | ||
322 | { | ||
323 | struct kvm_pte_chain *pte_chain; | ||
324 | struct hlist_node *node; | ||
325 | int i; | ||
326 | |||
327 | if (!parent_pte) | ||
328 | return; | ||
329 | if (!page->multimapped) { | ||
330 | u64 *old = page->parent_pte; | ||
331 | |||
332 | if (!old) { | ||
333 | page->parent_pte = parent_pte; | ||
334 | return; | ||
335 | } | ||
336 | page->multimapped = 1; | ||
337 | pte_chain = kzalloc(sizeof(struct kvm_pte_chain), GFP_NOWAIT); | ||
338 | BUG_ON(!pte_chain); | ||
339 | INIT_HLIST_HEAD(&page->parent_ptes); | ||
340 | hlist_add_head(&pte_chain->link, &page->parent_ptes); | ||
341 | pte_chain->parent_ptes[0] = old; | ||
342 | } | ||
343 | hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) { | ||
344 | if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1]) | ||
345 | continue; | ||
346 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) | ||
347 | if (!pte_chain->parent_ptes[i]) { | ||
348 | pte_chain->parent_ptes[i] = parent_pte; | ||
349 | return; | ||
350 | } | ||
351 | } | ||
352 | pte_chain = kzalloc(sizeof(struct kvm_pte_chain), GFP_NOWAIT); | ||
353 | BUG_ON(!pte_chain); | ||
354 | hlist_add_head(&pte_chain->link, &page->parent_ptes); | ||
355 | pte_chain->parent_ptes[0] = parent_pte; | ||
356 | } | ||
357 | |||
358 | static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page, | ||
359 | u64 *parent_pte) | ||
360 | { | ||
361 | struct kvm_pte_chain *pte_chain; | ||
362 | struct hlist_node *node; | ||
363 | int i; | ||
364 | |||
365 | if (!page->multimapped) { | ||
366 | BUG_ON(page->parent_pte != parent_pte); | ||
367 | page->parent_pte = NULL; | ||
368 | return; | ||
369 | } | ||
370 | hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) | ||
371 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
372 | if (!pte_chain->parent_ptes[i]) | ||
373 | break; | ||
374 | if (pte_chain->parent_ptes[i] != parent_pte) | ||
375 | continue; | ||
376 | while (i + 1 < NR_PTE_CHAIN_ENTRIES) { | ||
377 | pte_chain->parent_ptes[i] | ||
378 | = pte_chain->parent_ptes[i + 1]; | ||
379 | ++i; | ||
380 | } | ||
381 | pte_chain->parent_ptes[i] = NULL; | ||
382 | return; | ||
383 | } | ||
384 | BUG(); | ||
385 | } | ||
386 | |||
387 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu, | ||
388 | gfn_t gfn) | ||
389 | { | ||
390 | unsigned index; | ||
391 | struct hlist_head *bucket; | ||
392 | struct kvm_mmu_page *page; | ||
393 | struct hlist_node *node; | ||
394 | |||
395 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | ||
396 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | ||
397 | bucket = &vcpu->kvm->mmu_page_hash[index]; | ||
398 | hlist_for_each_entry(page, node, bucket, hash_link) | ||
399 | if (page->gfn == gfn && !page->role.metaphysical) { | ||
400 | pgprintk("%s: found role %x\n", | ||
401 | __FUNCTION__, page->role.word); | ||
402 | return page; | ||
403 | } | ||
404 | return NULL; | ||
405 | } | ||
406 | |||
407 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | ||
408 | gfn_t gfn, | ||
409 | gva_t gaddr, | ||
410 | unsigned level, | ||
411 | int metaphysical, | ||
412 | u64 *parent_pte) | ||
413 | { | ||
414 | union kvm_mmu_page_role role; | ||
415 | unsigned index; | ||
416 | unsigned quadrant; | ||
417 | struct hlist_head *bucket; | ||
418 | struct kvm_mmu_page *page; | ||
419 | struct hlist_node *node; | ||
420 | |||
421 | role.word = 0; | ||
422 | role.glevels = vcpu->mmu.root_level; | ||
423 | role.level = level; | ||
424 | role.metaphysical = metaphysical; | ||
425 | if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { | ||
426 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | ||
427 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | ||
428 | role.quadrant = quadrant; | ||
429 | } | ||
430 | pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, | ||
431 | gfn, role.word); | ||
432 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | ||
433 | bucket = &vcpu->kvm->mmu_page_hash[index]; | ||
434 | hlist_for_each_entry(page, node, bucket, hash_link) | ||
435 | if (page->gfn == gfn && page->role.word == role.word) { | ||
436 | mmu_page_add_parent_pte(page, parent_pte); | ||
437 | pgprintk("%s: found\n", __FUNCTION__); | ||
438 | return page; | ||
439 | } | ||
440 | page = kvm_mmu_alloc_page(vcpu, parent_pte); | ||
441 | if (!page) | ||
442 | return page; | ||
443 | pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); | ||
444 | page->gfn = gfn; | ||
445 | page->role = role; | ||
446 | hlist_add_head(&page->hash_link, bucket); | ||
447 | return page; | ||
448 | } | ||
449 | |||
450 | static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, | ||
451 | struct kvm_mmu_page *page, | ||
452 | u64 *parent_pte) | ||
453 | { | ||
454 | mmu_page_remove_parent_pte(page, parent_pte); | ||
455 | } | ||
456 | |||
313 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) | 457 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) |
314 | { | 458 | { |
315 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); | 459 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); |
@@ -389,11 +533,15 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
389 | for (; ; level--) { | 533 | for (; ; level--) { |
390 | u32 index = PT64_INDEX(v, level); | 534 | u32 index = PT64_INDEX(v, level); |
391 | u64 *table; | 535 | u64 *table; |
536 | u64 pte; | ||
392 | 537 | ||
393 | ASSERT(VALID_PAGE(table_addr)); | 538 | ASSERT(VALID_PAGE(table_addr)); |
394 | table = __va(table_addr); | 539 | table = __va(table_addr); |
395 | 540 | ||
396 | if (level == 1) { | 541 | if (level == 1) { |
542 | pte = table[index]; | ||
543 | if (is_present_pte(pte) && is_writeble_pte(pte)) | ||
544 | return 0; | ||
397 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); | 545 | mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); |
398 | page_header_update_slot(vcpu->kvm, table, v); | 546 | page_header_update_slot(vcpu->kvm, table, v); |
399 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 547 | table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
@@ -404,8 +552,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
404 | 552 | ||
405 | if (table[index] == 0) { | 553 | if (table[index] == 0) { |
406 | struct kvm_mmu_page *new_table; | 554 | struct kvm_mmu_page *new_table; |
555 | gfn_t pseudo_gfn; | ||
407 | 556 | ||
408 | new_table = kvm_mmu_alloc_page(vcpu, &table[index]); | 557 | pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK) |
558 | >> PAGE_SHIFT; | ||
559 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | ||
560 | v, level - 1, | ||
561 | 1, &table[index]); | ||
409 | if (!new_table) { | 562 | if (!new_table) { |
410 | pgprintk("nonpaging_map: ENOMEM\n"); | 563 | pgprintk("nonpaging_map: ENOMEM\n"); |
411 | return -ENOMEM; | 564 | return -ENOMEM; |
@@ -427,7 +580,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
427 | hpa_t root = vcpu->mmu.root_hpa; | 580 | hpa_t root = vcpu->mmu.root_hpa; |
428 | 581 | ||
429 | ASSERT(VALID_PAGE(root)); | 582 | ASSERT(VALID_PAGE(root)); |
430 | release_pt_page_64(vcpu, root, PT64_ROOT_LEVEL); | ||
431 | vcpu->mmu.root_hpa = INVALID_PAGE; | 583 | vcpu->mmu.root_hpa = INVALID_PAGE; |
432 | return; | 584 | return; |
433 | } | 585 | } |
@@ -437,7 +589,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
437 | 589 | ||
438 | ASSERT(VALID_PAGE(root)); | 590 | ASSERT(VALID_PAGE(root)); |
439 | root &= PT64_BASE_ADDR_MASK; | 591 | root &= PT64_BASE_ADDR_MASK; |
440 | release_pt_page_64(vcpu, root, PT32E_ROOT_LEVEL - 1); | ||
441 | vcpu->mmu.pae_root[i] = INVALID_PAGE; | 592 | vcpu->mmu.pae_root[i] = INVALID_PAGE; |
442 | } | 593 | } |
443 | vcpu->mmu.root_hpa = INVALID_PAGE; | 594 | vcpu->mmu.root_hpa = INVALID_PAGE; |
@@ -446,13 +597,16 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
446 | static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | 597 | static void mmu_alloc_roots(struct kvm_vcpu *vcpu) |
447 | { | 598 | { |
448 | int i; | 599 | int i; |
600 | gfn_t root_gfn; | ||
601 | root_gfn = vcpu->cr3 >> PAGE_SHIFT; | ||
449 | 602 | ||
450 | #ifdef CONFIG_X86_64 | 603 | #ifdef CONFIG_X86_64 |
451 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 604 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
452 | hpa_t root = vcpu->mmu.root_hpa; | 605 | hpa_t root = vcpu->mmu.root_hpa; |
453 | 606 | ||
454 | ASSERT(!VALID_PAGE(root)); | 607 | ASSERT(!VALID_PAGE(root)); |
455 | root = kvm_mmu_alloc_page(vcpu, NULL)->page_hpa; | 608 | root = kvm_mmu_get_page(vcpu, root_gfn, 0, |
609 | PT64_ROOT_LEVEL, 0, NULL)->page_hpa; | ||
456 | vcpu->mmu.root_hpa = root; | 610 | vcpu->mmu.root_hpa = root; |
457 | return; | 611 | return; |
458 | } | 612 | } |
@@ -461,7 +615,13 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
461 | hpa_t root = vcpu->mmu.pae_root[i]; | 615 | hpa_t root = vcpu->mmu.pae_root[i]; |
462 | 616 | ||
463 | ASSERT(!VALID_PAGE(root)); | 617 | ASSERT(!VALID_PAGE(root)); |
464 | root = kvm_mmu_alloc_page(vcpu, NULL)->page_hpa; | 618 | if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) |
619 | root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; | ||
620 | else if (vcpu->mmu.root_level == 0) | ||
621 | root_gfn = 0; | ||
622 | root = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | ||
623 | PT32_ROOT_LEVEL, !is_paging(vcpu), | ||
624 | NULL)->page_hpa; | ||
465 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; | 625 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; |
466 | } | 626 | } |
467 | vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root); | 627 | vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root); |
@@ -529,7 +689,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
529 | context->inval_page = nonpaging_inval_page; | 689 | context->inval_page = nonpaging_inval_page; |
530 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 690 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
531 | context->free = nonpaging_free; | 691 | context->free = nonpaging_free; |
532 | context->root_level = PT32E_ROOT_LEVEL; | 692 | context->root_level = 0; |
533 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 693 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
534 | mmu_alloc_roots(vcpu); | 694 | mmu_alloc_roots(vcpu); |
535 | ASSERT(VALID_PAGE(context->root_hpa)); | 695 | ASSERT(VALID_PAGE(context->root_hpa)); |
@@ -537,29 +697,18 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
537 | return 0; | 697 | return 0; |
538 | } | 698 | } |
539 | 699 | ||
540 | |||
541 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 700 | static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
542 | { | 701 | { |
543 | struct kvm_mmu_page *page, *npage; | ||
544 | |||
545 | list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages, | ||
546 | link) { | ||
547 | if (page->global) | ||
548 | continue; | ||
549 | |||
550 | if (!page->parent_pte) | ||
551 | continue; | ||
552 | |||
553 | *page->parent_pte = 0; | ||
554 | release_pt_page_64(vcpu, page->page_hpa, 1); | ||
555 | } | ||
556 | ++kvm_stat.tlb_flush; | 702 | ++kvm_stat.tlb_flush; |
557 | kvm_arch_ops->tlb_flush(vcpu); | 703 | kvm_arch_ops->tlb_flush(vcpu); |
558 | } | 704 | } |
559 | 705 | ||
560 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 706 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
561 | { | 707 | { |
708 | mmu_free_roots(vcpu); | ||
709 | mmu_alloc_roots(vcpu); | ||
562 | kvm_mmu_flush_tlb(vcpu); | 710 | kvm_mmu_flush_tlb(vcpu); |
711 | kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | ||
563 | } | 712 | } |
564 | 713 | ||
565 | static void mark_pagetable_nonglobal(void *shadow_pte) | 714 | static void mark_pagetable_nonglobal(void *shadow_pte) |
@@ -578,6 +727,16 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, | |||
578 | *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; | 727 | *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; |
579 | if (!dirty) | 728 | if (!dirty) |
580 | access_bits &= ~PT_WRITABLE_MASK; | 729 | access_bits &= ~PT_WRITABLE_MASK; |
730 | if (access_bits & PT_WRITABLE_MASK) { | ||
731 | struct kvm_mmu_page *shadow; | ||
732 | |||
733 | shadow = kvm_mmu_lookup_page(vcpu, gaddr >> PAGE_SHIFT); | ||
734 | if (shadow) | ||
735 | pgprintk("%s: found shadow page for %lx, marking ro\n", | ||
736 | __FUNCTION__, (gfn_t)(gaddr >> PAGE_SHIFT)); | ||
737 | if (shadow) | ||
738 | access_bits &= ~PT_WRITABLE_MASK; | ||
739 | } | ||
581 | 740 | ||
582 | if (access_bits & PT_WRITABLE_MASK) | 741 | if (access_bits & PT_WRITABLE_MASK) |
583 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | 742 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); |