aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/mmu.c
diff options
context:
space:
mode:
authorAvi Kivity <avi@qumranet.com>2007-01-05 19:36:43 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2007-01-06 02:55:24 -0500
commitcea0f0e7ea54753c3265dc77f605a6dad1912cfc (patch)
treee0a3e64b45fe83f1f0ae89556e1f6fcf92f07185 /drivers/kvm/mmu.c
parent25c0de2cc6c26cb99553c2444936a7951c120c09 (diff)
[PATCH] KVM: MMU: Shadow page table caching
Define a hashtable for caching shadow page tables. Look up the cache on context switch (cr3 change) or during page faults. The key to the cache is a combination of - the guest page table frame number - the number of paging levels in the guest * we can cache real mode, 32-bit mode, pae, and long mode page tables simultaneously. this is useful for smp bootup. - the guest page table table * some kernels use a page as both a page table and a page directory. this allows multiple shadow pages to exist for that page, one per level - the "quadrant" * 32-bit mode page tables span 4MB, whereas a shadow page table spans 2MB. similarly, a 32-bit page directory spans 4GB, while a shadow page directory spans 1GB. the quadrant allows caching up to 4 shadow page tables for one guest page in one level. - a "metaphysical" bit * for real mode, and for pse pages, there is no guest page table, so set the bit to avoid write protecting the page. Signed-off-by: Avi Kivity <avi@qumranet.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r--drivers/kvm/mmu.c207
1 files changed, 183 insertions, 24 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index da4d7ddb9bdc..47c699c21c08 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -26,8 +26,8 @@
26#include "vmx.h" 26#include "vmx.h"
27#include "kvm.h" 27#include "kvm.h"
28 28
29#define pgprintk(x...) do { } while (0) 29#define pgprintk(x...) do { printk(x); } while (0)
30#define rmap_printk(x...) do { } while (0) 30#define rmap_printk(x...) do { printk(x); } while (0)
31 31
32#define ASSERT(x) \ 32#define ASSERT(x) \
33 if (!(x)) { \ 33 if (!(x)) { \
@@ -35,8 +35,10 @@
35 __FILE__, __LINE__, #x); \ 35 __FILE__, __LINE__, #x); \
36 } 36 }
37 37
38#define PT64_ENT_PER_PAGE 512 38#define PT64_PT_BITS 9
39#define PT32_ENT_PER_PAGE 1024 39#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
40#define PT32_PT_BITS 10
41#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
40 42
41#define PT_WRITABLE_SHIFT 1 43#define PT_WRITABLE_SHIFT 1
42 44
@@ -292,6 +294,11 @@ static int is_empty_shadow_page(hpa_t page_hpa)
292 return 1; 294 return 1;
293} 295}
294 296
297static unsigned kvm_page_table_hashfn(gfn_t gfn)
298{
299 return gfn;
300}
301
295static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 302static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
296 u64 *parent_pte) 303 u64 *parent_pte)
297{ 304{
@@ -306,10 +313,147 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
306 ASSERT(is_empty_shadow_page(page->page_hpa)); 313 ASSERT(is_empty_shadow_page(page->page_hpa));
307 page->slot_bitmap = 0; 314 page->slot_bitmap = 0;
308 page->global = 1; 315 page->global = 1;
316 page->multimapped = 0;
309 page->parent_pte = parent_pte; 317 page->parent_pte = parent_pte;
310 return page; 318 return page;
311} 319}
312 320
321static void mmu_page_add_parent_pte(struct kvm_mmu_page *page, u64 *parent_pte)
322{
323 struct kvm_pte_chain *pte_chain;
324 struct hlist_node *node;
325 int i;
326
327 if (!parent_pte)
328 return;
329 if (!page->multimapped) {
330 u64 *old = page->parent_pte;
331
332 if (!old) {
333 page->parent_pte = parent_pte;
334 return;
335 }
336 page->multimapped = 1;
337 pte_chain = kzalloc(sizeof(struct kvm_pte_chain), GFP_NOWAIT);
338 BUG_ON(!pte_chain);
339 INIT_HLIST_HEAD(&page->parent_ptes);
340 hlist_add_head(&pte_chain->link, &page->parent_ptes);
341 pte_chain->parent_ptes[0] = old;
342 }
343 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
344 if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
345 continue;
346 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
347 if (!pte_chain->parent_ptes[i]) {
348 pte_chain->parent_ptes[i] = parent_pte;
349 return;
350 }
351 }
352 pte_chain = kzalloc(sizeof(struct kvm_pte_chain), GFP_NOWAIT);
353 BUG_ON(!pte_chain);
354 hlist_add_head(&pte_chain->link, &page->parent_ptes);
355 pte_chain->parent_ptes[0] = parent_pte;
356}
357
358static void mmu_page_remove_parent_pte(struct kvm_mmu_page *page,
359 u64 *parent_pte)
360{
361 struct kvm_pte_chain *pte_chain;
362 struct hlist_node *node;
363 int i;
364
365 if (!page->multimapped) {
366 BUG_ON(page->parent_pte != parent_pte);
367 page->parent_pte = NULL;
368 return;
369 }
370 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
371 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
372 if (!pte_chain->parent_ptes[i])
373 break;
374 if (pte_chain->parent_ptes[i] != parent_pte)
375 continue;
376 while (i + 1 < NR_PTE_CHAIN_ENTRIES) {
377 pte_chain->parent_ptes[i]
378 = pte_chain->parent_ptes[i + 1];
379 ++i;
380 }
381 pte_chain->parent_ptes[i] = NULL;
382 return;
383 }
384 BUG();
385}
386
387static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
388 gfn_t gfn)
389{
390 unsigned index;
391 struct hlist_head *bucket;
392 struct kvm_mmu_page *page;
393 struct hlist_node *node;
394
395 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
396 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
397 bucket = &vcpu->kvm->mmu_page_hash[index];
398 hlist_for_each_entry(page, node, bucket, hash_link)
399 if (page->gfn == gfn && !page->role.metaphysical) {
400 pgprintk("%s: found role %x\n",
401 __FUNCTION__, page->role.word);
402 return page;
403 }
404 return NULL;
405}
406
407static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
408 gfn_t gfn,
409 gva_t gaddr,
410 unsigned level,
411 int metaphysical,
412 u64 *parent_pte)
413{
414 union kvm_mmu_page_role role;
415 unsigned index;
416 unsigned quadrant;
417 struct hlist_head *bucket;
418 struct kvm_mmu_page *page;
419 struct hlist_node *node;
420
421 role.word = 0;
422 role.glevels = vcpu->mmu.root_level;
423 role.level = level;
424 role.metaphysical = metaphysical;
425 if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
426 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
427 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
428 role.quadrant = quadrant;
429 }
430 pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
431 gfn, role.word);
432 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
433 bucket = &vcpu->kvm->mmu_page_hash[index];
434 hlist_for_each_entry(page, node, bucket, hash_link)
435 if (page->gfn == gfn && page->role.word == role.word) {
436 mmu_page_add_parent_pte(page, parent_pte);
437 pgprintk("%s: found\n", __FUNCTION__);
438 return page;
439 }
440 page = kvm_mmu_alloc_page(vcpu, parent_pte);
441 if (!page)
442 return page;
443 pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
444 page->gfn = gfn;
445 page->role = role;
446 hlist_add_head(&page->hash_link, bucket);
447 return page;
448}
449
450static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
451 struct kvm_mmu_page *page,
452 u64 *parent_pte)
453{
454 mmu_page_remove_parent_pte(page, parent_pte);
455}
456
313static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) 457static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
314{ 458{
315 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); 459 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
@@ -389,11 +533,15 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
389 for (; ; level--) { 533 for (; ; level--) {
390 u32 index = PT64_INDEX(v, level); 534 u32 index = PT64_INDEX(v, level);
391 u64 *table; 535 u64 *table;
536 u64 pte;
392 537
393 ASSERT(VALID_PAGE(table_addr)); 538 ASSERT(VALID_PAGE(table_addr));
394 table = __va(table_addr); 539 table = __va(table_addr);
395 540
396 if (level == 1) { 541 if (level == 1) {
542 pte = table[index];
543 if (is_present_pte(pte) && is_writeble_pte(pte))
544 return 0;
397 mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); 545 mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
398 page_header_update_slot(vcpu->kvm, table, v); 546 page_header_update_slot(vcpu->kvm, table, v);
399 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | 547 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
@@ -404,8 +552,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
404 552
405 if (table[index] == 0) { 553 if (table[index] == 0) {
406 struct kvm_mmu_page *new_table; 554 struct kvm_mmu_page *new_table;
555 gfn_t pseudo_gfn;
407 556
408 new_table = kvm_mmu_alloc_page(vcpu, &table[index]); 557 pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
558 >> PAGE_SHIFT;
559 new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
560 v, level - 1,
561 1, &table[index]);
409 if (!new_table) { 562 if (!new_table) {
410 pgprintk("nonpaging_map: ENOMEM\n"); 563 pgprintk("nonpaging_map: ENOMEM\n");
411 return -ENOMEM; 564 return -ENOMEM;
@@ -427,7 +580,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
427 hpa_t root = vcpu->mmu.root_hpa; 580 hpa_t root = vcpu->mmu.root_hpa;
428 581
429 ASSERT(VALID_PAGE(root)); 582 ASSERT(VALID_PAGE(root));
430 release_pt_page_64(vcpu, root, PT64_ROOT_LEVEL);
431 vcpu->mmu.root_hpa = INVALID_PAGE; 583 vcpu->mmu.root_hpa = INVALID_PAGE;
432 return; 584 return;
433 } 585 }
@@ -437,7 +589,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
437 589
438 ASSERT(VALID_PAGE(root)); 590 ASSERT(VALID_PAGE(root));
439 root &= PT64_BASE_ADDR_MASK; 591 root &= PT64_BASE_ADDR_MASK;
440 release_pt_page_64(vcpu, root, PT32E_ROOT_LEVEL - 1);
441 vcpu->mmu.pae_root[i] = INVALID_PAGE; 592 vcpu->mmu.pae_root[i] = INVALID_PAGE;
442 } 593 }
443 vcpu->mmu.root_hpa = INVALID_PAGE; 594 vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -446,13 +597,16 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
446static void mmu_alloc_roots(struct kvm_vcpu *vcpu) 597static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
447{ 598{
448 int i; 599 int i;
600 gfn_t root_gfn;
601 root_gfn = vcpu->cr3 >> PAGE_SHIFT;
449 602
450#ifdef CONFIG_X86_64 603#ifdef CONFIG_X86_64
451 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { 604 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
452 hpa_t root = vcpu->mmu.root_hpa; 605 hpa_t root = vcpu->mmu.root_hpa;
453 606
454 ASSERT(!VALID_PAGE(root)); 607 ASSERT(!VALID_PAGE(root));
455 root = kvm_mmu_alloc_page(vcpu, NULL)->page_hpa; 608 root = kvm_mmu_get_page(vcpu, root_gfn, 0,
609 PT64_ROOT_LEVEL, 0, NULL)->page_hpa;
456 vcpu->mmu.root_hpa = root; 610 vcpu->mmu.root_hpa = root;
457 return; 611 return;
458 } 612 }
@@ -461,7 +615,13 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
461 hpa_t root = vcpu->mmu.pae_root[i]; 615 hpa_t root = vcpu->mmu.pae_root[i];
462 616
463 ASSERT(!VALID_PAGE(root)); 617 ASSERT(!VALID_PAGE(root));
464 root = kvm_mmu_alloc_page(vcpu, NULL)->page_hpa; 618 if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
619 root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
620 else if (vcpu->mmu.root_level == 0)
621 root_gfn = 0;
622 root = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
623 PT32_ROOT_LEVEL, !is_paging(vcpu),
624 NULL)->page_hpa;
465 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; 625 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
466 } 626 }
467 vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root); 627 vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
@@ -529,7 +689,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
529 context->inval_page = nonpaging_inval_page; 689 context->inval_page = nonpaging_inval_page;
530 context->gva_to_gpa = nonpaging_gva_to_gpa; 690 context->gva_to_gpa = nonpaging_gva_to_gpa;
531 context->free = nonpaging_free; 691 context->free = nonpaging_free;
532 context->root_level = PT32E_ROOT_LEVEL; 692 context->root_level = 0;
533 context->shadow_root_level = PT32E_ROOT_LEVEL; 693 context->shadow_root_level = PT32E_ROOT_LEVEL;
534 mmu_alloc_roots(vcpu); 694 mmu_alloc_roots(vcpu);
535 ASSERT(VALID_PAGE(context->root_hpa)); 695 ASSERT(VALID_PAGE(context->root_hpa));
@@ -537,29 +697,18 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
537 return 0; 697 return 0;
538} 698}
539 699
540
541static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 700static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
542{ 701{
543 struct kvm_mmu_page *page, *npage;
544
545 list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages,
546 link) {
547 if (page->global)
548 continue;
549
550 if (!page->parent_pte)
551 continue;
552
553 *page->parent_pte = 0;
554 release_pt_page_64(vcpu, page->page_hpa, 1);
555 }
556 ++kvm_stat.tlb_flush; 702 ++kvm_stat.tlb_flush;
557 kvm_arch_ops->tlb_flush(vcpu); 703 kvm_arch_ops->tlb_flush(vcpu);
558} 704}
559 705
560static void paging_new_cr3(struct kvm_vcpu *vcpu) 706static void paging_new_cr3(struct kvm_vcpu *vcpu)
561{ 707{
708 mmu_free_roots(vcpu);
709 mmu_alloc_roots(vcpu);
562 kvm_mmu_flush_tlb(vcpu); 710 kvm_mmu_flush_tlb(vcpu);
711 kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
563} 712}
564 713
565static void mark_pagetable_nonglobal(void *shadow_pte) 714static void mark_pagetable_nonglobal(void *shadow_pte)
@@ -578,6 +727,16 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
578 *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; 727 *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
579 if (!dirty) 728 if (!dirty)
580 access_bits &= ~PT_WRITABLE_MASK; 729 access_bits &= ~PT_WRITABLE_MASK;
730 if (access_bits & PT_WRITABLE_MASK) {
731 struct kvm_mmu_page *shadow;
732
733 shadow = kvm_mmu_lookup_page(vcpu, gaddr >> PAGE_SHIFT);
734 if (shadow)
735 pgprintk("%s: found shadow page for %lx, marking ro\n",
736 __FUNCTION__, (gfn_t)(gaddr >> PAGE_SHIFT));
737 if (shadow)
738 access_bits &= ~PT_WRITABLE_MASK;
739 }
581 740
582 if (access_bits & PT_WRITABLE_MASK) 741 if (access_bits & PT_WRITABLE_MASK)
583 mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); 742 mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);