diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 918 |
1 files changed, 485 insertions, 433 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 311f6dad8951..908ea5464a51 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * MMU support | 7 | * MMU support |
8 | * | 8 | * |
9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | 10 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
11 | * | 11 | * |
12 | * Authors: | 12 | * Authors: |
13 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -49,15 +49,25 @@ | |||
49 | */ | 49 | */ |
50 | bool tdp_enabled = false; | 50 | bool tdp_enabled = false; |
51 | 51 | ||
52 | #undef MMU_DEBUG | 52 | enum { |
53 | AUDIT_PRE_PAGE_FAULT, | ||
54 | AUDIT_POST_PAGE_FAULT, | ||
55 | AUDIT_PRE_PTE_WRITE, | ||
56 | AUDIT_POST_PTE_WRITE, | ||
57 | AUDIT_PRE_SYNC, | ||
58 | AUDIT_POST_SYNC | ||
59 | }; | ||
53 | 60 | ||
54 | #undef AUDIT | 61 | char *audit_point_name[] = { |
62 | "pre page fault", | ||
63 | "post page fault", | ||
64 | "pre pte write", | ||
65 | "post pte write", | ||
66 | "pre sync", | ||
67 | "post sync" | ||
68 | }; | ||
55 | 69 | ||
56 | #ifdef AUDIT | 70 | #undef MMU_DEBUG |
57 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg); | ||
58 | #else | ||
59 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} | ||
60 | #endif | ||
61 | 71 | ||
62 | #ifdef MMU_DEBUG | 72 | #ifdef MMU_DEBUG |
63 | 73 | ||
@@ -71,7 +81,7 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} | |||
71 | 81 | ||
72 | #endif | 82 | #endif |
73 | 83 | ||
74 | #if defined(MMU_DEBUG) || defined(AUDIT) | 84 | #ifdef MMU_DEBUG |
75 | static int dbg = 0; | 85 | static int dbg = 0; |
76 | module_param(dbg, bool, 0644); | 86 | module_param(dbg, bool, 0644); |
77 | #endif | 87 | #endif |
@@ -89,6 +99,8 @@ module_param(oos_shadow, bool, 0644); | |||
89 | } | 99 | } |
90 | #endif | 100 | #endif |
91 | 101 | ||
102 | #define PTE_PREFETCH_NUM 8 | ||
103 | |||
92 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 104 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
93 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 105 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
94 | 106 | ||
@@ -178,6 +190,7 @@ typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); | |||
178 | static struct kmem_cache *pte_chain_cache; | 190 | static struct kmem_cache *pte_chain_cache; |
179 | static struct kmem_cache *rmap_desc_cache; | 191 | static struct kmem_cache *rmap_desc_cache; |
180 | static struct kmem_cache *mmu_page_header_cache; | 192 | static struct kmem_cache *mmu_page_header_cache; |
193 | static struct percpu_counter kvm_total_used_mmu_pages; | ||
181 | 194 | ||
182 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 195 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
183 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 196 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
@@ -299,18 +312,50 @@ static u64 __xchg_spte(u64 *sptep, u64 new_spte) | |||
299 | #endif | 312 | #endif |
300 | } | 313 | } |
301 | 314 | ||
315 | static bool spte_has_volatile_bits(u64 spte) | ||
316 | { | ||
317 | if (!shadow_accessed_mask) | ||
318 | return false; | ||
319 | |||
320 | if (!is_shadow_present_pte(spte)) | ||
321 | return false; | ||
322 | |||
323 | if ((spte & shadow_accessed_mask) && | ||
324 | (!is_writable_pte(spte) || (spte & shadow_dirty_mask))) | ||
325 | return false; | ||
326 | |||
327 | return true; | ||
328 | } | ||
329 | |||
330 | static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask) | ||
331 | { | ||
332 | return (old_spte & bit_mask) && !(new_spte & bit_mask); | ||
333 | } | ||
334 | |||
302 | static void update_spte(u64 *sptep, u64 new_spte) | 335 | static void update_spte(u64 *sptep, u64 new_spte) |
303 | { | 336 | { |
304 | u64 old_spte; | 337 | u64 mask, old_spte = *sptep; |
338 | |||
339 | WARN_ON(!is_rmap_spte(new_spte)); | ||
340 | |||
341 | new_spte |= old_spte & shadow_dirty_mask; | ||
305 | 342 | ||
306 | if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) || | 343 | mask = shadow_accessed_mask; |
307 | !is_rmap_spte(*sptep)) | 344 | if (is_writable_pte(old_spte)) |
345 | mask |= shadow_dirty_mask; | ||
346 | |||
347 | if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask) | ||
308 | __set_spte(sptep, new_spte); | 348 | __set_spte(sptep, new_spte); |
309 | else { | 349 | else |
310 | old_spte = __xchg_spte(sptep, new_spte); | 350 | old_spte = __xchg_spte(sptep, new_spte); |
311 | if (old_spte & shadow_accessed_mask) | 351 | |
312 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); | 352 | if (!shadow_accessed_mask) |
313 | } | 353 | return; |
354 | |||
355 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) | ||
356 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); | ||
357 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) | ||
358 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | ||
314 | } | 359 | } |
315 | 360 | ||
316 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 361 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
@@ -367,7 +412,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | |||
367 | if (r) | 412 | if (r) |
368 | goto out; | 413 | goto out; |
369 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, | 414 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, |
370 | rmap_desc_cache, 4); | 415 | rmap_desc_cache, 4 + PTE_PREFETCH_NUM); |
371 | if (r) | 416 | if (r) |
372 | goto out; | 417 | goto out; |
373 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); | 418 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); |
@@ -591,6 +636,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
591 | desc->sptes[0] = (u64 *)*rmapp; | 636 | desc->sptes[0] = (u64 *)*rmapp; |
592 | desc->sptes[1] = spte; | 637 | desc->sptes[1] = spte; |
593 | *rmapp = (unsigned long)desc | 1; | 638 | *rmapp = (unsigned long)desc | 1; |
639 | ++count; | ||
594 | } else { | 640 | } else { |
595 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | 641 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); |
596 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 642 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
@@ -603,7 +649,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
603 | desc = desc->more; | 649 | desc = desc->more; |
604 | } | 650 | } |
605 | for (i = 0; desc->sptes[i]; ++i) | 651 | for (i = 0; desc->sptes[i]; ++i) |
606 | ; | 652 | ++count; |
607 | desc->sptes[i] = spte; | 653 | desc->sptes[i] = spte; |
608 | } | 654 | } |
609 | return count; | 655 | return count; |
@@ -645,18 +691,17 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
645 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); | 691 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); |
646 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); | 692 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); |
647 | if (!*rmapp) { | 693 | if (!*rmapp) { |
648 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 694 | printk(KERN_ERR "rmap_remove: %p 0->BUG\n", spte); |
649 | BUG(); | 695 | BUG(); |
650 | } else if (!(*rmapp & 1)) { | 696 | } else if (!(*rmapp & 1)) { |
651 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); | 697 | rmap_printk("rmap_remove: %p 1->0\n", spte); |
652 | if ((u64 *)*rmapp != spte) { | 698 | if ((u64 *)*rmapp != spte) { |
653 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", | 699 | printk(KERN_ERR "rmap_remove: %p 1->BUG\n", spte); |
654 | spte, *spte); | ||
655 | BUG(); | 700 | BUG(); |
656 | } | 701 | } |
657 | *rmapp = 0; | 702 | *rmapp = 0; |
658 | } else { | 703 | } else { |
659 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); | 704 | rmap_printk("rmap_remove: %p many->many\n", spte); |
660 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 705 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
661 | prev_desc = NULL; | 706 | prev_desc = NULL; |
662 | while (desc) { | 707 | while (desc) { |
@@ -670,7 +715,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
670 | prev_desc = desc; | 715 | prev_desc = desc; |
671 | desc = desc->more; | 716 | desc = desc->more; |
672 | } | 717 | } |
673 | pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); | 718 | pr_err("rmap_remove: %p many->many\n", spte); |
674 | BUG(); | 719 | BUG(); |
675 | } | 720 | } |
676 | } | 721 | } |
@@ -680,18 +725,18 @@ static void set_spte_track_bits(u64 *sptep, u64 new_spte) | |||
680 | pfn_t pfn; | 725 | pfn_t pfn; |
681 | u64 old_spte = *sptep; | 726 | u64 old_spte = *sptep; |
682 | 727 | ||
683 | if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) || | 728 | if (!spte_has_volatile_bits(old_spte)) |
684 | old_spte & shadow_accessed_mask) { | ||
685 | __set_spte(sptep, new_spte); | 729 | __set_spte(sptep, new_spte); |
686 | } else | 730 | else |
687 | old_spte = __xchg_spte(sptep, new_spte); | 731 | old_spte = __xchg_spte(sptep, new_spte); |
688 | 732 | ||
689 | if (!is_rmap_spte(old_spte)) | 733 | if (!is_rmap_spte(old_spte)) |
690 | return; | 734 | return; |
735 | |||
691 | pfn = spte_to_pfn(old_spte); | 736 | pfn = spte_to_pfn(old_spte); |
692 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | 737 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) |
693 | kvm_set_pfn_accessed(pfn); | 738 | kvm_set_pfn_accessed(pfn); |
694 | if (is_writable_pte(old_spte)) | 739 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) |
695 | kvm_set_pfn_dirty(pfn); | 740 | kvm_set_pfn_dirty(pfn); |
696 | } | 741 | } |
697 | 742 | ||
@@ -746,13 +791,6 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
746 | } | 791 | } |
747 | spte = rmap_next(kvm, rmapp, spte); | 792 | spte = rmap_next(kvm, rmapp, spte); |
748 | } | 793 | } |
749 | if (write_protected) { | ||
750 | pfn_t pfn; | ||
751 | |||
752 | spte = rmap_next(kvm, rmapp, NULL); | ||
753 | pfn = spte_to_pfn(*spte); | ||
754 | kvm_set_pfn_dirty(pfn); | ||
755 | } | ||
756 | 794 | ||
757 | /* check for huge page mappings */ | 795 | /* check for huge page mappings */ |
758 | for (i = PT_DIRECTORY_LEVEL; | 796 | for (i = PT_DIRECTORY_LEVEL; |
@@ -947,6 +985,18 @@ static int is_empty_shadow_page(u64 *spt) | |||
947 | } | 985 | } |
948 | #endif | 986 | #endif |
949 | 987 | ||
988 | /* | ||
989 | * This value is the sum of all of the kvm instances's | ||
990 | * kvm->arch.n_used_mmu_pages values. We need a global, | ||
991 | * aggregate version in order to make the slab shrinker | ||
992 | * faster | ||
993 | */ | ||
994 | static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) | ||
995 | { | ||
996 | kvm->arch.n_used_mmu_pages += nr; | ||
997 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); | ||
998 | } | ||
999 | |||
950 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1000 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
951 | { | 1001 | { |
952 | ASSERT(is_empty_shadow_page(sp->spt)); | 1002 | ASSERT(is_empty_shadow_page(sp->spt)); |
@@ -956,7 +1006,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
956 | if (!sp->role.direct) | 1006 | if (!sp->role.direct) |
957 | __free_page(virt_to_page(sp->gfns)); | 1007 | __free_page(virt_to_page(sp->gfns)); |
958 | kmem_cache_free(mmu_page_header_cache, sp); | 1008 | kmem_cache_free(mmu_page_header_cache, sp); |
959 | ++kvm->arch.n_free_mmu_pages; | 1009 | kvm_mod_used_mmu_pages(kvm, -1); |
960 | } | 1010 | } |
961 | 1011 | ||
962 | static unsigned kvm_page_table_hashfn(gfn_t gfn) | 1012 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
@@ -979,7 +1029,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
979 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 1029 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
980 | sp->multimapped = 0; | 1030 | sp->multimapped = 0; |
981 | sp->parent_pte = parent_pte; | 1031 | sp->parent_pte = parent_pte; |
982 | --vcpu->kvm->arch.n_free_mmu_pages; | 1032 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); |
983 | return sp; | 1033 | return sp; |
984 | } | 1034 | } |
985 | 1035 | ||
@@ -1403,7 +1453,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1403 | if (role.direct) | 1453 | if (role.direct) |
1404 | role.cr4_pae = 0; | 1454 | role.cr4_pae = 0; |
1405 | role.access = access; | 1455 | role.access = access; |
1406 | if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1456 | if (!vcpu->arch.mmu.direct_map |
1457 | && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | ||
1407 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1458 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
1408 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 1459 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
1409 | role.quadrant = quadrant; | 1460 | role.quadrant = quadrant; |
@@ -1458,6 +1509,12 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, | |||
1458 | iterator->addr = addr; | 1509 | iterator->addr = addr; |
1459 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; | 1510 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; |
1460 | iterator->level = vcpu->arch.mmu.shadow_root_level; | 1511 | iterator->level = vcpu->arch.mmu.shadow_root_level; |
1512 | |||
1513 | if (iterator->level == PT64_ROOT_LEVEL && | ||
1514 | vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL && | ||
1515 | !vcpu->arch.mmu.direct_map) | ||
1516 | --iterator->level; | ||
1517 | |||
1461 | if (iterator->level == PT32E_ROOT_LEVEL) { | 1518 | if (iterator->level == PT32E_ROOT_LEVEL) { |
1462 | iterator->shadow_addr | 1519 | iterator->shadow_addr |
1463 | = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | 1520 | = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; |
@@ -1665,41 +1722,31 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1665 | 1722 | ||
1666 | /* | 1723 | /* |
1667 | * Changing the number of mmu pages allocated to the vm | 1724 | * Changing the number of mmu pages allocated to the vm |
1668 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock | 1725 | * Note: if goal_nr_mmu_pages is too small, you will get dead lock |
1669 | */ | 1726 | */ |
1670 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | 1727 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) |
1671 | { | 1728 | { |
1672 | int used_pages; | ||
1673 | LIST_HEAD(invalid_list); | 1729 | LIST_HEAD(invalid_list); |
1674 | |||
1675 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; | ||
1676 | used_pages = max(0, used_pages); | ||
1677 | |||
1678 | /* | 1730 | /* |
1679 | * If we set the number of mmu pages to be smaller be than the | 1731 | * If we set the number of mmu pages to be smaller be than the |
1680 | * number of actived pages , we must to free some mmu pages before we | 1732 | * number of actived pages , we must to free some mmu pages before we |
1681 | * change the value | 1733 | * change the value |
1682 | */ | 1734 | */ |
1683 | 1735 | ||
1684 | if (used_pages > kvm_nr_mmu_pages) { | 1736 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { |
1685 | while (used_pages > kvm_nr_mmu_pages && | 1737 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && |
1686 | !list_empty(&kvm->arch.active_mmu_pages)) { | 1738 | !list_empty(&kvm->arch.active_mmu_pages)) { |
1687 | struct kvm_mmu_page *page; | 1739 | struct kvm_mmu_page *page; |
1688 | 1740 | ||
1689 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1741 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1690 | struct kvm_mmu_page, link); | 1742 | struct kvm_mmu_page, link); |
1691 | used_pages -= kvm_mmu_prepare_zap_page(kvm, page, | 1743 | kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); |
1692 | &invalid_list); | 1744 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
1693 | } | 1745 | } |
1694 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 1746 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; |
1695 | kvm_nr_mmu_pages = used_pages; | ||
1696 | kvm->arch.n_free_mmu_pages = 0; | ||
1697 | } | 1747 | } |
1698 | else | ||
1699 | kvm->arch.n_free_mmu_pages += kvm_nr_mmu_pages | ||
1700 | - kvm->arch.n_alloc_mmu_pages; | ||
1701 | 1748 | ||
1702 | kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages; | 1749 | kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; |
1703 | } | 1750 | } |
1704 | 1751 | ||
1705 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 1752 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
@@ -1709,11 +1756,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
1709 | LIST_HEAD(invalid_list); | 1756 | LIST_HEAD(invalid_list); |
1710 | int r; | 1757 | int r; |
1711 | 1758 | ||
1712 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | 1759 | pgprintk("%s: looking for gfn %llx\n", __func__, gfn); |
1713 | r = 0; | 1760 | r = 0; |
1714 | 1761 | ||
1715 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 1762 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1716 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1763 | pgprintk("%s: gfn %llx role %x\n", __func__, gfn, |
1717 | sp->role.word); | 1764 | sp->role.word); |
1718 | r = 1; | 1765 | r = 1; |
1719 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 1766 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
@@ -1729,7 +1776,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1729 | LIST_HEAD(invalid_list); | 1776 | LIST_HEAD(invalid_list); |
1730 | 1777 | ||
1731 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 1778 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1732 | pgprintk("%s: zap %lx %x\n", | 1779 | pgprintk("%s: zap %llx %x\n", |
1733 | __func__, gfn, sp->role.word); | 1780 | __func__, gfn, sp->role.word); |
1734 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 1781 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
1735 | } | 1782 | } |
@@ -1925,7 +1972,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1925 | * whether the guest actually used the pte (in order to detect | 1972 | * whether the guest actually used the pte (in order to detect |
1926 | * demand paging). | 1973 | * demand paging). |
1927 | */ | 1974 | */ |
1928 | spte = shadow_base_present_pte | shadow_dirty_mask; | 1975 | spte = shadow_base_present_pte; |
1929 | if (!speculative) | 1976 | if (!speculative) |
1930 | spte |= shadow_accessed_mask; | 1977 | spte |= shadow_accessed_mask; |
1931 | if (!dirty) | 1978 | if (!dirty) |
@@ -1948,8 +1995,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1948 | spte |= (u64)pfn << PAGE_SHIFT; | 1995 | spte |= (u64)pfn << PAGE_SHIFT; |
1949 | 1996 | ||
1950 | if ((pte_access & ACC_WRITE_MASK) | 1997 | if ((pte_access & ACC_WRITE_MASK) |
1951 | || (!tdp_enabled && write_fault && !is_write_protection(vcpu) | 1998 | || (!vcpu->arch.mmu.direct_map && write_fault |
1952 | && !user_fault)) { | 1999 | && !is_write_protection(vcpu) && !user_fault)) { |
1953 | 2000 | ||
1954 | if (level > PT_PAGE_TABLE_LEVEL && | 2001 | if (level > PT_PAGE_TABLE_LEVEL && |
1955 | has_wrprotected_page(vcpu->kvm, gfn, level)) { | 2002 | has_wrprotected_page(vcpu->kvm, gfn, level)) { |
@@ -1960,7 +2007,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1960 | 2007 | ||
1961 | spte |= PT_WRITABLE_MASK; | 2008 | spte |= PT_WRITABLE_MASK; |
1962 | 2009 | ||
1963 | if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK)) | 2010 | if (!vcpu->arch.mmu.direct_map |
2011 | && !(pte_access & ACC_WRITE_MASK)) | ||
1964 | spte &= ~PT_USER_MASK; | 2012 | spte &= ~PT_USER_MASK; |
1965 | 2013 | ||
1966 | /* | 2014 | /* |
@@ -1973,7 +2021,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1973 | goto set_pte; | 2021 | goto set_pte; |
1974 | 2022 | ||
1975 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 2023 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
1976 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 2024 | pgprintk("%s: found shadow page for %llx, marking ro\n", |
1977 | __func__, gfn); | 2025 | __func__, gfn); |
1978 | ret = 1; | 2026 | ret = 1; |
1979 | pte_access &= ~ACC_WRITE_MASK; | 2027 | pte_access &= ~ACC_WRITE_MASK; |
@@ -1986,8 +2034,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1986 | mark_page_dirty(vcpu->kvm, gfn); | 2034 | mark_page_dirty(vcpu->kvm, gfn); |
1987 | 2035 | ||
1988 | set_pte: | 2036 | set_pte: |
1989 | if (is_writable_pte(*sptep) && !is_writable_pte(spte)) | ||
1990 | kvm_set_pfn_dirty(pfn); | ||
1991 | update_spte(sptep, spte); | 2037 | update_spte(sptep, spte); |
1992 | done: | 2038 | done: |
1993 | return ret; | 2039 | return ret; |
@@ -2004,7 +2050,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2004 | int rmap_count; | 2050 | int rmap_count; |
2005 | 2051 | ||
2006 | pgprintk("%s: spte %llx access %x write_fault %d" | 2052 | pgprintk("%s: spte %llx access %x write_fault %d" |
2007 | " user_fault %d gfn %lx\n", | 2053 | " user_fault %d gfn %llx\n", |
2008 | __func__, *sptep, pt_access, | 2054 | __func__, *sptep, pt_access, |
2009 | write_fault, user_fault, gfn); | 2055 | write_fault, user_fault, gfn); |
2010 | 2056 | ||
@@ -2023,7 +2069,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2023 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 2069 | __set_spte(sptep, shadow_trap_nonpresent_pte); |
2024 | kvm_flush_remote_tlbs(vcpu->kvm); | 2070 | kvm_flush_remote_tlbs(vcpu->kvm); |
2025 | } else if (pfn != spte_to_pfn(*sptep)) { | 2071 | } else if (pfn != spte_to_pfn(*sptep)) { |
2026 | pgprintk("hfn old %lx new %lx\n", | 2072 | pgprintk("hfn old %llx new %llx\n", |
2027 | spte_to_pfn(*sptep), pfn); | 2073 | spte_to_pfn(*sptep), pfn); |
2028 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | 2074 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
2029 | kvm_flush_remote_tlbs(vcpu->kvm); | 2075 | kvm_flush_remote_tlbs(vcpu->kvm); |
@@ -2040,7 +2086,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2040 | } | 2086 | } |
2041 | 2087 | ||
2042 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 2088 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
2043 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", | 2089 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", |
2044 | is_large_pte(*sptep)? "2MB" : "4kB", | 2090 | is_large_pte(*sptep)? "2MB" : "4kB", |
2045 | *sptep & PT_PRESENT_MASK ?"RW":"R", gfn, | 2091 | *sptep & PT_PRESENT_MASK ?"RW":"R", gfn, |
2046 | *sptep, sptep); | 2092 | *sptep, sptep); |
@@ -2064,6 +2110,105 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2064 | { | 2110 | { |
2065 | } | 2111 | } |
2066 | 2112 | ||
2113 | static struct kvm_memory_slot * | ||
2114 | pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) | ||
2115 | { | ||
2116 | struct kvm_memory_slot *slot; | ||
2117 | |||
2118 | slot = gfn_to_memslot(vcpu->kvm, gfn); | ||
2119 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | ||
2120 | (no_dirty_log && slot->dirty_bitmap)) | ||
2121 | slot = NULL; | ||
2122 | |||
2123 | return slot; | ||
2124 | } | ||
2125 | |||
2126 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
2127 | bool no_dirty_log) | ||
2128 | { | ||
2129 | struct kvm_memory_slot *slot; | ||
2130 | unsigned long hva; | ||
2131 | |||
2132 | slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log); | ||
2133 | if (!slot) { | ||
2134 | get_page(bad_page); | ||
2135 | return page_to_pfn(bad_page); | ||
2136 | } | ||
2137 | |||
2138 | hva = gfn_to_hva_memslot(slot, gfn); | ||
2139 | |||
2140 | return hva_to_pfn_atomic(vcpu->kvm, hva); | ||
2141 | } | ||
2142 | |||
2143 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | ||
2144 | struct kvm_mmu_page *sp, | ||
2145 | u64 *start, u64 *end) | ||
2146 | { | ||
2147 | struct page *pages[PTE_PREFETCH_NUM]; | ||
2148 | unsigned access = sp->role.access; | ||
2149 | int i, ret; | ||
2150 | gfn_t gfn; | ||
2151 | |||
2152 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); | ||
2153 | if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK)) | ||
2154 | return -1; | ||
2155 | |||
2156 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); | ||
2157 | if (ret <= 0) | ||
2158 | return -1; | ||
2159 | |||
2160 | for (i = 0; i < ret; i++, gfn++, start++) | ||
2161 | mmu_set_spte(vcpu, start, ACC_ALL, | ||
2162 | access, 0, 0, 1, NULL, | ||
2163 | sp->role.level, gfn, | ||
2164 | page_to_pfn(pages[i]), true, true); | ||
2165 | |||
2166 | return 0; | ||
2167 | } | ||
2168 | |||
2169 | static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, | ||
2170 | struct kvm_mmu_page *sp, u64 *sptep) | ||
2171 | { | ||
2172 | u64 *spte, *start = NULL; | ||
2173 | int i; | ||
2174 | |||
2175 | WARN_ON(!sp->role.direct); | ||
2176 | |||
2177 | i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1); | ||
2178 | spte = sp->spt + i; | ||
2179 | |||
2180 | for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { | ||
2181 | if (*spte != shadow_trap_nonpresent_pte || spte == sptep) { | ||
2182 | if (!start) | ||
2183 | continue; | ||
2184 | if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0) | ||
2185 | break; | ||
2186 | start = NULL; | ||
2187 | } else if (!start) | ||
2188 | start = spte; | ||
2189 | } | ||
2190 | } | ||
2191 | |||
2192 | static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | ||
2193 | { | ||
2194 | struct kvm_mmu_page *sp; | ||
2195 | |||
2196 | /* | ||
2197 | * Since it's no accessed bit on EPT, it's no way to | ||
2198 | * distinguish between actually accessed translations | ||
2199 | * and prefetched, so disable pte prefetch if EPT is | ||
2200 | * enabled. | ||
2201 | */ | ||
2202 | if (!shadow_accessed_mask) | ||
2203 | return; | ||
2204 | |||
2205 | sp = page_header(__pa(sptep)); | ||
2206 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) | ||
2207 | return; | ||
2208 | |||
2209 | __direct_pte_prefetch(vcpu, sp, sptep); | ||
2210 | } | ||
2211 | |||
2067 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 2212 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
2068 | int level, gfn_t gfn, pfn_t pfn) | 2213 | int level, gfn_t gfn, pfn_t pfn) |
2069 | { | 2214 | { |
@@ -2077,6 +2222,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2077 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 2222 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
2078 | 0, write, 1, &pt_write, | 2223 | 0, write, 1, &pt_write, |
2079 | level, gfn, pfn, false, true); | 2224 | level, gfn, pfn, false, true); |
2225 | direct_pte_prefetch(vcpu, iterator.sptep); | ||
2080 | ++vcpu->stat.pf_fixed; | 2226 | ++vcpu->stat.pf_fixed; |
2081 | break; | 2227 | break; |
2082 | } | 2228 | } |
@@ -2098,28 +2244,31 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2098 | __set_spte(iterator.sptep, | 2244 | __set_spte(iterator.sptep, |
2099 | __pa(sp->spt) | 2245 | __pa(sp->spt) |
2100 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | 2246 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
2101 | | shadow_user_mask | shadow_x_mask); | 2247 | | shadow_user_mask | shadow_x_mask |
2248 | | shadow_accessed_mask); | ||
2102 | } | 2249 | } |
2103 | } | 2250 | } |
2104 | return pt_write; | 2251 | return pt_write; |
2105 | } | 2252 | } |
2106 | 2253 | ||
2107 | static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) | 2254 | static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) |
2108 | { | 2255 | { |
2109 | char buf[1]; | 2256 | siginfo_t info; |
2110 | void __user *hva; | 2257 | |
2111 | int r; | 2258 | info.si_signo = SIGBUS; |
2259 | info.si_errno = 0; | ||
2260 | info.si_code = BUS_MCEERR_AR; | ||
2261 | info.si_addr = (void __user *)address; | ||
2262 | info.si_addr_lsb = PAGE_SHIFT; | ||
2112 | 2263 | ||
2113 | /* Touch the page, so send SIGBUS */ | 2264 | send_sig_info(SIGBUS, &info, tsk); |
2114 | hva = (void __user *)gfn_to_hva(kvm, gfn); | ||
2115 | r = copy_from_user(buf, hva, 1); | ||
2116 | } | 2265 | } |
2117 | 2266 | ||
2118 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | 2267 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) |
2119 | { | 2268 | { |
2120 | kvm_release_pfn_clean(pfn); | 2269 | kvm_release_pfn_clean(pfn); |
2121 | if (is_hwpoison_pfn(pfn)) { | 2270 | if (is_hwpoison_pfn(pfn)) { |
2122 | kvm_send_hwpoison_signal(kvm, gfn); | 2271 | kvm_send_hwpoison_signal(gfn_to_hva(kvm, gfn), current); |
2123 | return 0; | 2272 | return 0; |
2124 | } else if (is_fault_pfn(pfn)) | 2273 | } else if (is_fault_pfn(pfn)) |
2125 | return -EFAULT; | 2274 | return -EFAULT; |
@@ -2179,7 +2328,9 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2179 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2328 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2180 | return; | 2329 | return; |
2181 | spin_lock(&vcpu->kvm->mmu_lock); | 2330 | spin_lock(&vcpu->kvm->mmu_lock); |
2182 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2331 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && |
2332 | (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || | ||
2333 | vcpu->arch.mmu.direct_map)) { | ||
2183 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2334 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2184 | 2335 | ||
2185 | sp = page_header(root); | 2336 | sp = page_header(root); |
@@ -2222,80 +2373,158 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) | |||
2222 | return ret; | 2373 | return ret; |
2223 | } | 2374 | } |
2224 | 2375 | ||
2225 | static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | 2376 | static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) |
2226 | { | 2377 | { |
2227 | int i; | ||
2228 | gfn_t root_gfn; | ||
2229 | struct kvm_mmu_page *sp; | 2378 | struct kvm_mmu_page *sp; |
2230 | int direct = 0; | 2379 | unsigned i; |
2231 | u64 pdptr; | ||
2232 | |||
2233 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; | ||
2234 | 2380 | ||
2235 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2381 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
2382 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2383 | kvm_mmu_free_some_pages(vcpu); | ||
2384 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, | ||
2385 | 1, ACC_ALL, NULL); | ||
2386 | ++sp->root_count; | ||
2387 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2388 | vcpu->arch.mmu.root_hpa = __pa(sp->spt); | ||
2389 | } else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) { | ||
2390 | for (i = 0; i < 4; ++i) { | ||
2391 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | ||
2392 | |||
2393 | ASSERT(!VALID_PAGE(root)); | ||
2394 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2395 | kvm_mmu_free_some_pages(vcpu); | ||
2396 | sp = kvm_mmu_get_page(vcpu, i << 30, i << 30, | ||
2397 | PT32_ROOT_LEVEL, 1, ACC_ALL, | ||
2398 | NULL); | ||
2399 | root = __pa(sp->spt); | ||
2400 | ++sp->root_count; | ||
2401 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2402 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | ||
2403 | } | ||
2404 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | ||
2405 | } else | ||
2406 | BUG(); | ||
2407 | |||
2408 | return 0; | ||
2409 | } | ||
2410 | |||
2411 | static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | ||
2412 | { | ||
2413 | struct kvm_mmu_page *sp; | ||
2414 | u64 pdptr, pm_mask; | ||
2415 | gfn_t root_gfn; | ||
2416 | int i; | ||
2417 | |||
2418 | root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT; | ||
2419 | |||
2420 | if (mmu_check_root(vcpu, root_gfn)) | ||
2421 | return 1; | ||
2422 | |||
2423 | /* | ||
2424 | * Do we shadow a long mode page table? If so we need to | ||
2425 | * write-protect the guests page table root. | ||
2426 | */ | ||
2427 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | ||
2236 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2428 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2237 | 2429 | ||
2238 | ASSERT(!VALID_PAGE(root)); | 2430 | ASSERT(!VALID_PAGE(root)); |
2239 | if (mmu_check_root(vcpu, root_gfn)) | 2431 | |
2240 | return 1; | ||
2241 | if (tdp_enabled) { | ||
2242 | direct = 1; | ||
2243 | root_gfn = 0; | ||
2244 | } | ||
2245 | spin_lock(&vcpu->kvm->mmu_lock); | 2432 | spin_lock(&vcpu->kvm->mmu_lock); |
2246 | kvm_mmu_free_some_pages(vcpu); | 2433 | kvm_mmu_free_some_pages(vcpu); |
2247 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2434 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, |
2248 | PT64_ROOT_LEVEL, direct, | 2435 | 0, ACC_ALL, NULL); |
2249 | ACC_ALL, NULL); | ||
2250 | root = __pa(sp->spt); | 2436 | root = __pa(sp->spt); |
2251 | ++sp->root_count; | 2437 | ++sp->root_count; |
2252 | spin_unlock(&vcpu->kvm->mmu_lock); | 2438 | spin_unlock(&vcpu->kvm->mmu_lock); |
2253 | vcpu->arch.mmu.root_hpa = root; | 2439 | vcpu->arch.mmu.root_hpa = root; |
2254 | return 0; | 2440 | return 0; |
2255 | } | 2441 | } |
2256 | direct = !is_paging(vcpu); | 2442 | |
2443 | /* | ||
2444 | * We shadow a 32 bit page table. This may be a legacy 2-level | ||
2445 | * or a PAE 3-level page table. In either case we need to be aware that | ||
2446 | * the shadow page table may be a PAE or a long mode page table. | ||
2447 | */ | ||
2448 | pm_mask = PT_PRESENT_MASK; | ||
2449 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) | ||
2450 | pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; | ||
2451 | |||
2257 | for (i = 0; i < 4; ++i) { | 2452 | for (i = 0; i < 4; ++i) { |
2258 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2453 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2259 | 2454 | ||
2260 | ASSERT(!VALID_PAGE(root)); | 2455 | ASSERT(!VALID_PAGE(root)); |
2261 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { | 2456 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { |
2262 | pdptr = kvm_pdptr_read(vcpu, i); | 2457 | pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i); |
2263 | if (!is_present_gpte(pdptr)) { | 2458 | if (!is_present_gpte(pdptr)) { |
2264 | vcpu->arch.mmu.pae_root[i] = 0; | 2459 | vcpu->arch.mmu.pae_root[i] = 0; |
2265 | continue; | 2460 | continue; |
2266 | } | 2461 | } |
2267 | root_gfn = pdptr >> PAGE_SHIFT; | 2462 | root_gfn = pdptr >> PAGE_SHIFT; |
2268 | } else if (vcpu->arch.mmu.root_level == 0) | 2463 | if (mmu_check_root(vcpu, root_gfn)) |
2269 | root_gfn = 0; | 2464 | return 1; |
2270 | if (mmu_check_root(vcpu, root_gfn)) | ||
2271 | return 1; | ||
2272 | if (tdp_enabled) { | ||
2273 | direct = 1; | ||
2274 | root_gfn = i << 30; | ||
2275 | } | 2465 | } |
2276 | spin_lock(&vcpu->kvm->mmu_lock); | 2466 | spin_lock(&vcpu->kvm->mmu_lock); |
2277 | kvm_mmu_free_some_pages(vcpu); | 2467 | kvm_mmu_free_some_pages(vcpu); |
2278 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2468 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
2279 | PT32_ROOT_LEVEL, direct, | 2469 | PT32_ROOT_LEVEL, 0, |
2280 | ACC_ALL, NULL); | 2470 | ACC_ALL, NULL); |
2281 | root = __pa(sp->spt); | 2471 | root = __pa(sp->spt); |
2282 | ++sp->root_count; | 2472 | ++sp->root_count; |
2283 | spin_unlock(&vcpu->kvm->mmu_lock); | 2473 | spin_unlock(&vcpu->kvm->mmu_lock); |
2284 | 2474 | ||
2285 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 2475 | vcpu->arch.mmu.pae_root[i] = root | pm_mask; |
2286 | } | 2476 | } |
2287 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 2477 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
2478 | |||
2479 | /* | ||
2480 | * If we shadow a 32 bit page table with a long mode page | ||
2481 | * table we enter this path. | ||
2482 | */ | ||
2483 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | ||
2484 | if (vcpu->arch.mmu.lm_root == NULL) { | ||
2485 | /* | ||
2486 | * The additional page necessary for this is only | ||
2487 | * allocated on demand. | ||
2488 | */ | ||
2489 | |||
2490 | u64 *lm_root; | ||
2491 | |||
2492 | lm_root = (void*)get_zeroed_page(GFP_KERNEL); | ||
2493 | if (lm_root == NULL) | ||
2494 | return 1; | ||
2495 | |||
2496 | lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask; | ||
2497 | |||
2498 | vcpu->arch.mmu.lm_root = lm_root; | ||
2499 | } | ||
2500 | |||
2501 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root); | ||
2502 | } | ||
2503 | |||
2288 | return 0; | 2504 | return 0; |
2289 | } | 2505 | } |
2290 | 2506 | ||
2507 | static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | ||
2508 | { | ||
2509 | if (vcpu->arch.mmu.direct_map) | ||
2510 | return mmu_alloc_direct_roots(vcpu); | ||
2511 | else | ||
2512 | return mmu_alloc_shadow_roots(vcpu); | ||
2513 | } | ||
2514 | |||
2291 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) | 2515 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) |
2292 | { | 2516 | { |
2293 | int i; | 2517 | int i; |
2294 | struct kvm_mmu_page *sp; | 2518 | struct kvm_mmu_page *sp; |
2295 | 2519 | ||
2520 | if (vcpu->arch.mmu.direct_map) | ||
2521 | return; | ||
2522 | |||
2296 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2523 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2297 | return; | 2524 | return; |
2298 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2525 | |
2526 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | ||
2527 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | ||
2299 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2528 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2300 | sp = page_header(root); | 2529 | sp = page_header(root); |
2301 | mmu_sync_children(vcpu, sp); | 2530 | mmu_sync_children(vcpu, sp); |
@@ -2310,6 +2539,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2310 | mmu_sync_children(vcpu, sp); | 2539 | mmu_sync_children(vcpu, sp); |
2311 | } | 2540 | } |
2312 | } | 2541 | } |
2542 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | ||
2313 | } | 2543 | } |
2314 | 2544 | ||
2315 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2545 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
@@ -2327,6 +2557,14 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
2327 | return vaddr; | 2557 | return vaddr; |
2328 | } | 2558 | } |
2329 | 2559 | ||
2560 | static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, | ||
2561 | u32 access, u32 *error) | ||
2562 | { | ||
2563 | if (error) | ||
2564 | *error = 0; | ||
2565 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); | ||
2566 | } | ||
2567 | |||
2330 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 2568 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
2331 | u32 error_code) | 2569 | u32 error_code) |
2332 | { | 2570 | { |
@@ -2393,10 +2631,9 @@ static void nonpaging_free(struct kvm_vcpu *vcpu) | |||
2393 | mmu_free_roots(vcpu); | 2631 | mmu_free_roots(vcpu); |
2394 | } | 2632 | } |
2395 | 2633 | ||
2396 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) | 2634 | static int nonpaging_init_context(struct kvm_vcpu *vcpu, |
2635 | struct kvm_mmu *context) | ||
2397 | { | 2636 | { |
2398 | struct kvm_mmu *context = &vcpu->arch.mmu; | ||
2399 | |||
2400 | context->new_cr3 = nonpaging_new_cr3; | 2637 | context->new_cr3 = nonpaging_new_cr3; |
2401 | context->page_fault = nonpaging_page_fault; | 2638 | context->page_fault = nonpaging_page_fault; |
2402 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 2639 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
@@ -2407,6 +2644,8 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
2407 | context->root_level = 0; | 2644 | context->root_level = 0; |
2408 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2645 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2409 | context->root_hpa = INVALID_PAGE; | 2646 | context->root_hpa = INVALID_PAGE; |
2647 | context->direct_map = true; | ||
2648 | context->nx = false; | ||
2410 | return 0; | 2649 | return 0; |
2411 | } | 2650 | } |
2412 | 2651 | ||
@@ -2422,11 +2661,14 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) | |||
2422 | mmu_free_roots(vcpu); | 2661 | mmu_free_roots(vcpu); |
2423 | } | 2662 | } |
2424 | 2663 | ||
2425 | static void inject_page_fault(struct kvm_vcpu *vcpu, | 2664 | static unsigned long get_cr3(struct kvm_vcpu *vcpu) |
2426 | u64 addr, | 2665 | { |
2427 | u32 err_code) | 2666 | return vcpu->arch.cr3; |
2667 | } | ||
2668 | |||
2669 | static void inject_page_fault(struct kvm_vcpu *vcpu) | ||
2428 | { | 2670 | { |
2429 | kvm_inject_page_fault(vcpu, addr, err_code); | 2671 | vcpu->arch.mmu.inject_page_fault(vcpu); |
2430 | } | 2672 | } |
2431 | 2673 | ||
2432 | static void paging_free(struct kvm_vcpu *vcpu) | 2674 | static void paging_free(struct kvm_vcpu *vcpu) |
@@ -2434,12 +2676,12 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
2434 | nonpaging_free(vcpu); | 2676 | nonpaging_free(vcpu); |
2435 | } | 2677 | } |
2436 | 2678 | ||
2437 | static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) | 2679 | static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) |
2438 | { | 2680 | { |
2439 | int bit7; | 2681 | int bit7; |
2440 | 2682 | ||
2441 | bit7 = (gpte >> 7) & 1; | 2683 | bit7 = (gpte >> 7) & 1; |
2442 | return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; | 2684 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; |
2443 | } | 2685 | } |
2444 | 2686 | ||
2445 | #define PTTYPE 64 | 2687 | #define PTTYPE 64 |
@@ -2450,13 +2692,14 @@ static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) | |||
2450 | #include "paging_tmpl.h" | 2692 | #include "paging_tmpl.h" |
2451 | #undef PTTYPE | 2693 | #undef PTTYPE |
2452 | 2694 | ||
2453 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | 2695 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, |
2696 | struct kvm_mmu *context, | ||
2697 | int level) | ||
2454 | { | 2698 | { |
2455 | struct kvm_mmu *context = &vcpu->arch.mmu; | ||
2456 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 2699 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
2457 | u64 exb_bit_rsvd = 0; | 2700 | u64 exb_bit_rsvd = 0; |
2458 | 2701 | ||
2459 | if (!is_nx(vcpu)) | 2702 | if (!context->nx) |
2460 | exb_bit_rsvd = rsvd_bits(63, 63); | 2703 | exb_bit_rsvd = rsvd_bits(63, 63); |
2461 | switch (level) { | 2704 | switch (level) { |
2462 | case PT32_ROOT_LEVEL: | 2705 | case PT32_ROOT_LEVEL: |
@@ -2511,9 +2754,13 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2511 | } | 2754 | } |
2512 | } | 2755 | } |
2513 | 2756 | ||
2514 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | 2757 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, |
2758 | struct kvm_mmu *context, | ||
2759 | int level) | ||
2515 | { | 2760 | { |
2516 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2761 | context->nx = is_nx(vcpu); |
2762 | |||
2763 | reset_rsvds_bits_mask(vcpu, context, level); | ||
2517 | 2764 | ||
2518 | ASSERT(is_pae(vcpu)); | 2765 | ASSERT(is_pae(vcpu)); |
2519 | context->new_cr3 = paging_new_cr3; | 2766 | context->new_cr3 = paging_new_cr3; |
@@ -2526,20 +2773,23 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
2526 | context->root_level = level; | 2773 | context->root_level = level; |
2527 | context->shadow_root_level = level; | 2774 | context->shadow_root_level = level; |
2528 | context->root_hpa = INVALID_PAGE; | 2775 | context->root_hpa = INVALID_PAGE; |
2776 | context->direct_map = false; | ||
2529 | return 0; | 2777 | return 0; |
2530 | } | 2778 | } |
2531 | 2779 | ||
2532 | static int paging64_init_context(struct kvm_vcpu *vcpu) | 2780 | static int paging64_init_context(struct kvm_vcpu *vcpu, |
2781 | struct kvm_mmu *context) | ||
2533 | { | 2782 | { |
2534 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | 2783 | return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); |
2535 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); | ||
2536 | } | 2784 | } |
2537 | 2785 | ||
2538 | static int paging32_init_context(struct kvm_vcpu *vcpu) | 2786 | static int paging32_init_context(struct kvm_vcpu *vcpu, |
2787 | struct kvm_mmu *context) | ||
2539 | { | 2788 | { |
2540 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2789 | context->nx = false; |
2790 | |||
2791 | reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); | ||
2541 | 2792 | ||
2542 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | ||
2543 | context->new_cr3 = paging_new_cr3; | 2793 | context->new_cr3 = paging_new_cr3; |
2544 | context->page_fault = paging32_page_fault; | 2794 | context->page_fault = paging32_page_fault; |
2545 | context->gva_to_gpa = paging32_gva_to_gpa; | 2795 | context->gva_to_gpa = paging32_gva_to_gpa; |
@@ -2550,18 +2800,19 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
2550 | context->root_level = PT32_ROOT_LEVEL; | 2800 | context->root_level = PT32_ROOT_LEVEL; |
2551 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2801 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2552 | context->root_hpa = INVALID_PAGE; | 2802 | context->root_hpa = INVALID_PAGE; |
2803 | context->direct_map = false; | ||
2553 | return 0; | 2804 | return 0; |
2554 | } | 2805 | } |
2555 | 2806 | ||
2556 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | 2807 | static int paging32E_init_context(struct kvm_vcpu *vcpu, |
2808 | struct kvm_mmu *context) | ||
2557 | { | 2809 | { |
2558 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | 2810 | return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); |
2559 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); | ||
2560 | } | 2811 | } |
2561 | 2812 | ||
2562 | static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | 2813 | static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) |
2563 | { | 2814 | { |
2564 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2815 | struct kvm_mmu *context = vcpu->arch.walk_mmu; |
2565 | 2816 | ||
2566 | context->new_cr3 = nonpaging_new_cr3; | 2817 | context->new_cr3 = nonpaging_new_cr3; |
2567 | context->page_fault = tdp_page_fault; | 2818 | context->page_fault = tdp_page_fault; |
@@ -2571,20 +2822,29 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2571 | context->invlpg = nonpaging_invlpg; | 2822 | context->invlpg = nonpaging_invlpg; |
2572 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 2823 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
2573 | context->root_hpa = INVALID_PAGE; | 2824 | context->root_hpa = INVALID_PAGE; |
2825 | context->direct_map = true; | ||
2826 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; | ||
2827 | context->get_cr3 = get_cr3; | ||
2828 | context->inject_page_fault = kvm_inject_page_fault; | ||
2829 | context->nx = is_nx(vcpu); | ||
2574 | 2830 | ||
2575 | if (!is_paging(vcpu)) { | 2831 | if (!is_paging(vcpu)) { |
2832 | context->nx = false; | ||
2576 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 2833 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
2577 | context->root_level = 0; | 2834 | context->root_level = 0; |
2578 | } else if (is_long_mode(vcpu)) { | 2835 | } else if (is_long_mode(vcpu)) { |
2579 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | 2836 | context->nx = is_nx(vcpu); |
2837 | reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL); | ||
2580 | context->gva_to_gpa = paging64_gva_to_gpa; | 2838 | context->gva_to_gpa = paging64_gva_to_gpa; |
2581 | context->root_level = PT64_ROOT_LEVEL; | 2839 | context->root_level = PT64_ROOT_LEVEL; |
2582 | } else if (is_pae(vcpu)) { | 2840 | } else if (is_pae(vcpu)) { |
2583 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | 2841 | context->nx = is_nx(vcpu); |
2842 | reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL); | ||
2584 | context->gva_to_gpa = paging64_gva_to_gpa; | 2843 | context->gva_to_gpa = paging64_gva_to_gpa; |
2585 | context->root_level = PT32E_ROOT_LEVEL; | 2844 | context->root_level = PT32E_ROOT_LEVEL; |
2586 | } else { | 2845 | } else { |
2587 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | 2846 | context->nx = false; |
2847 | reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); | ||
2588 | context->gva_to_gpa = paging32_gva_to_gpa; | 2848 | context->gva_to_gpa = paging32_gva_to_gpa; |
2589 | context->root_level = PT32_ROOT_LEVEL; | 2849 | context->root_level = PT32_ROOT_LEVEL; |
2590 | } | 2850 | } |
@@ -2592,33 +2852,83 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2592 | return 0; | 2852 | return 0; |
2593 | } | 2853 | } |
2594 | 2854 | ||
2595 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | 2855 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) |
2596 | { | 2856 | { |
2597 | int r; | 2857 | int r; |
2598 | |||
2599 | ASSERT(vcpu); | 2858 | ASSERT(vcpu); |
2600 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2859 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
2601 | 2860 | ||
2602 | if (!is_paging(vcpu)) | 2861 | if (!is_paging(vcpu)) |
2603 | r = nonpaging_init_context(vcpu); | 2862 | r = nonpaging_init_context(vcpu, context); |
2604 | else if (is_long_mode(vcpu)) | 2863 | else if (is_long_mode(vcpu)) |
2605 | r = paging64_init_context(vcpu); | 2864 | r = paging64_init_context(vcpu, context); |
2606 | else if (is_pae(vcpu)) | 2865 | else if (is_pae(vcpu)) |
2607 | r = paging32E_init_context(vcpu); | 2866 | r = paging32E_init_context(vcpu, context); |
2608 | else | 2867 | else |
2609 | r = paging32_init_context(vcpu); | 2868 | r = paging32_init_context(vcpu, context); |
2610 | 2869 | ||
2611 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 2870 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
2612 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 2871 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); |
2613 | 2872 | ||
2614 | return r; | 2873 | return r; |
2615 | } | 2874 | } |
2875 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | ||
2876 | |||
2877 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | ||
2878 | { | ||
2879 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); | ||
2880 | |||
2881 | vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; | ||
2882 | vcpu->arch.walk_mmu->get_cr3 = get_cr3; | ||
2883 | vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; | ||
2884 | |||
2885 | return r; | ||
2886 | } | ||
2887 | |||
2888 | static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | ||
2889 | { | ||
2890 | struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; | ||
2891 | |||
2892 | g_context->get_cr3 = get_cr3; | ||
2893 | g_context->inject_page_fault = kvm_inject_page_fault; | ||
2894 | |||
2895 | /* | ||
2896 | * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The | ||
2897 | * translation of l2_gpa to l1_gpa addresses is done using the | ||
2898 | * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa | ||
2899 | * functions between mmu and nested_mmu are swapped. | ||
2900 | */ | ||
2901 | if (!is_paging(vcpu)) { | ||
2902 | g_context->nx = false; | ||
2903 | g_context->root_level = 0; | ||
2904 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; | ||
2905 | } else if (is_long_mode(vcpu)) { | ||
2906 | g_context->nx = is_nx(vcpu); | ||
2907 | reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL); | ||
2908 | g_context->root_level = PT64_ROOT_LEVEL; | ||
2909 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | ||
2910 | } else if (is_pae(vcpu)) { | ||
2911 | g_context->nx = is_nx(vcpu); | ||
2912 | reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL); | ||
2913 | g_context->root_level = PT32E_ROOT_LEVEL; | ||
2914 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | ||
2915 | } else { | ||
2916 | g_context->nx = false; | ||
2917 | reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL); | ||
2918 | g_context->root_level = PT32_ROOT_LEVEL; | ||
2919 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | ||
2920 | } | ||
2921 | |||
2922 | return 0; | ||
2923 | } | ||
2616 | 2924 | ||
2617 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 2925 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) |
2618 | { | 2926 | { |
2619 | vcpu->arch.update_pte.pfn = bad_pfn; | 2927 | vcpu->arch.update_pte.pfn = bad_pfn; |
2620 | 2928 | ||
2621 | if (tdp_enabled) | 2929 | if (mmu_is_nested(vcpu)) |
2930 | return init_kvm_nested_mmu(vcpu); | ||
2931 | else if (tdp_enabled) | ||
2622 | return init_kvm_tdp_mmu(vcpu); | 2932 | return init_kvm_tdp_mmu(vcpu); |
2623 | else | 2933 | else |
2624 | return init_kvm_softmmu(vcpu); | 2934 | return init_kvm_softmmu(vcpu); |
@@ -2653,7 +2963,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
2653 | if (r) | 2963 | if (r) |
2654 | goto out; | 2964 | goto out; |
2655 | /* set_cr3() should ensure TLB has been flushed */ | 2965 | /* set_cr3() should ensure TLB has been flushed */ |
2656 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 2966 | vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
2657 | out: | 2967 | out: |
2658 | return r; | 2968 | return r; |
2659 | } | 2969 | } |
@@ -2663,6 +2973,7 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu) | |||
2663 | { | 2973 | { |
2664 | mmu_free_roots(vcpu); | 2974 | mmu_free_roots(vcpu); |
2665 | } | 2975 | } |
2976 | EXPORT_SYMBOL_GPL(kvm_mmu_unload); | ||
2666 | 2977 | ||
2667 | static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | 2978 | static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, |
2668 | struct kvm_mmu_page *sp, | 2979 | struct kvm_mmu_page *sp, |
@@ -2695,7 +3006,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2695 | return; | 3006 | return; |
2696 | } | 3007 | } |
2697 | 3008 | ||
2698 | if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) | 3009 | if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) |
2699 | return; | 3010 | return; |
2700 | 3011 | ||
2701 | ++vcpu->kvm->stat.mmu_pte_updated; | 3012 | ++vcpu->kvm->stat.mmu_pte_updated; |
@@ -2837,7 +3148,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2837 | kvm_mmu_access_page(vcpu, gfn); | 3148 | kvm_mmu_access_page(vcpu, gfn); |
2838 | kvm_mmu_free_some_pages(vcpu); | 3149 | kvm_mmu_free_some_pages(vcpu); |
2839 | ++vcpu->kvm->stat.mmu_pte_write; | 3150 | ++vcpu->kvm->stat.mmu_pte_write; |
2840 | kvm_mmu_audit(vcpu, "pre pte write"); | 3151 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
2841 | if (guest_initiated) { | 3152 | if (guest_initiated) { |
2842 | if (gfn == vcpu->arch.last_pt_write_gfn | 3153 | if (gfn == vcpu->arch.last_pt_write_gfn |
2843 | && !last_updated_pte_accessed(vcpu)) { | 3154 | && !last_updated_pte_accessed(vcpu)) { |
@@ -2910,7 +3221,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2910 | } | 3221 | } |
2911 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | 3222 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); |
2912 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3223 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
2913 | kvm_mmu_audit(vcpu, "post pte write"); | 3224 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
2914 | spin_unlock(&vcpu->kvm->mmu_lock); | 3225 | spin_unlock(&vcpu->kvm->mmu_lock); |
2915 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | 3226 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
2916 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); | 3227 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); |
@@ -2923,7 +3234,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
2923 | gpa_t gpa; | 3234 | gpa_t gpa; |
2924 | int r; | 3235 | int r; |
2925 | 3236 | ||
2926 | if (tdp_enabled) | 3237 | if (vcpu->arch.mmu.direct_map) |
2927 | return 0; | 3238 | return 0; |
2928 | 3239 | ||
2929 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | 3240 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
@@ -2937,21 +3248,18 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | |||
2937 | 3248 | ||
2938 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 3249 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
2939 | { | 3250 | { |
2940 | int free_pages; | ||
2941 | LIST_HEAD(invalid_list); | 3251 | LIST_HEAD(invalid_list); |
2942 | 3252 | ||
2943 | free_pages = vcpu->kvm->arch.n_free_mmu_pages; | 3253 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES && |
2944 | while (free_pages < KVM_REFILL_PAGES && | ||
2945 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 3254 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { |
2946 | struct kvm_mmu_page *sp; | 3255 | struct kvm_mmu_page *sp; |
2947 | 3256 | ||
2948 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | 3257 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, |
2949 | struct kvm_mmu_page, link); | 3258 | struct kvm_mmu_page, link); |
2950 | free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3259 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
2951 | &invalid_list); | 3260 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
2952 | ++vcpu->kvm->stat.mmu_recycled; | 3261 | ++vcpu->kvm->stat.mmu_recycled; |
2953 | } | 3262 | } |
2954 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2955 | } | 3263 | } |
2956 | 3264 | ||
2957 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 3265 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) |
@@ -3013,6 +3321,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); | |||
3013 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 3321 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
3014 | { | 3322 | { |
3015 | free_page((unsigned long)vcpu->arch.mmu.pae_root); | 3323 | free_page((unsigned long)vcpu->arch.mmu.pae_root); |
3324 | if (vcpu->arch.mmu.lm_root != NULL) | ||
3325 | free_page((unsigned long)vcpu->arch.mmu.lm_root); | ||
3016 | } | 3326 | } |
3017 | 3327 | ||
3018 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | 3328 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) |
@@ -3054,15 +3364,6 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
3054 | return init_kvm_mmu(vcpu); | 3364 | return init_kvm_mmu(vcpu); |
3055 | } | 3365 | } |
3056 | 3366 | ||
3057 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | ||
3058 | { | ||
3059 | ASSERT(vcpu); | ||
3060 | |||
3061 | destroy_kvm_mmu(vcpu); | ||
3062 | free_mmu_pages(vcpu); | ||
3063 | mmu_free_memory_caches(vcpu); | ||
3064 | } | ||
3065 | |||
3066 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 3367 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
3067 | { | 3368 | { |
3068 | struct kvm_mmu_page *sp; | 3369 | struct kvm_mmu_page *sp; |
@@ -3112,23 +3413,22 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3112 | { | 3413 | { |
3113 | struct kvm *kvm; | 3414 | struct kvm *kvm; |
3114 | struct kvm *kvm_freed = NULL; | 3415 | struct kvm *kvm_freed = NULL; |
3115 | int cache_count = 0; | 3416 | |
3417 | if (nr_to_scan == 0) | ||
3418 | goto out; | ||
3116 | 3419 | ||
3117 | spin_lock(&kvm_lock); | 3420 | spin_lock(&kvm_lock); |
3118 | 3421 | ||
3119 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3422 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3120 | int npages, idx, freed_pages; | 3423 | int idx, freed_pages; |
3121 | LIST_HEAD(invalid_list); | 3424 | LIST_HEAD(invalid_list); |
3122 | 3425 | ||
3123 | idx = srcu_read_lock(&kvm->srcu); | 3426 | idx = srcu_read_lock(&kvm->srcu); |
3124 | spin_lock(&kvm->mmu_lock); | 3427 | spin_lock(&kvm->mmu_lock); |
3125 | npages = kvm->arch.n_alloc_mmu_pages - | 3428 | if (!kvm_freed && nr_to_scan > 0 && |
3126 | kvm->arch.n_free_mmu_pages; | 3429 | kvm->arch.n_used_mmu_pages > 0) { |
3127 | cache_count += npages; | ||
3128 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | ||
3129 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, | 3430 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
3130 | &invalid_list); | 3431 | &invalid_list); |
3131 | cache_count -= freed_pages; | ||
3132 | kvm_freed = kvm; | 3432 | kvm_freed = kvm; |
3133 | } | 3433 | } |
3134 | nr_to_scan--; | 3434 | nr_to_scan--; |
@@ -3142,7 +3442,8 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3142 | 3442 | ||
3143 | spin_unlock(&kvm_lock); | 3443 | spin_unlock(&kvm_lock); |
3144 | 3444 | ||
3145 | return cache_count; | 3445 | out: |
3446 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); | ||
3146 | } | 3447 | } |
3147 | 3448 | ||
3148 | static struct shrinker mmu_shrinker = { | 3449 | static struct shrinker mmu_shrinker = { |
@@ -3163,6 +3464,7 @@ static void mmu_destroy_caches(void) | |||
3163 | void kvm_mmu_module_exit(void) | 3464 | void kvm_mmu_module_exit(void) |
3164 | { | 3465 | { |
3165 | mmu_destroy_caches(); | 3466 | mmu_destroy_caches(); |
3467 | percpu_counter_destroy(&kvm_total_used_mmu_pages); | ||
3166 | unregister_shrinker(&mmu_shrinker); | 3468 | unregister_shrinker(&mmu_shrinker); |
3167 | } | 3469 | } |
3168 | 3470 | ||
@@ -3185,6 +3487,9 @@ int kvm_mmu_module_init(void) | |||
3185 | if (!mmu_page_header_cache) | 3487 | if (!mmu_page_header_cache) |
3186 | goto nomem; | 3488 | goto nomem; |
3187 | 3489 | ||
3490 | if (percpu_counter_init(&kvm_total_used_mmu_pages, 0)) | ||
3491 | goto nomem; | ||
3492 | |||
3188 | register_shrinker(&mmu_shrinker); | 3493 | register_shrinker(&mmu_shrinker); |
3189 | 3494 | ||
3190 | return 0; | 3495 | return 0; |
@@ -3355,271 +3660,18 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | |||
3355 | } | 3660 | } |
3356 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); | 3661 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); |
3357 | 3662 | ||
3358 | #ifdef AUDIT | 3663 | #ifdef CONFIG_KVM_MMU_AUDIT |
3359 | 3664 | #include "mmu_audit.c" | |
3360 | static const char *audit_msg; | 3665 | #else |
3361 | 3666 | static void mmu_audit_disable(void) { } | |
3362 | static gva_t canonicalize(gva_t gva) | ||
3363 | { | ||
3364 | #ifdef CONFIG_X86_64 | ||
3365 | gva = (long long)(gva << 16) >> 16; | ||
3366 | #endif | 3667 | #endif |
3367 | return gva; | ||
3368 | } | ||
3369 | |||
3370 | |||
3371 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); | ||
3372 | |||
3373 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | ||
3374 | inspect_spte_fn fn) | ||
3375 | { | ||
3376 | int i; | ||
3377 | |||
3378 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
3379 | u64 ent = sp->spt[i]; | ||
3380 | |||
3381 | if (is_shadow_present_pte(ent)) { | ||
3382 | if (!is_last_spte(ent, sp->role.level)) { | ||
3383 | struct kvm_mmu_page *child; | ||
3384 | child = page_header(ent & PT64_BASE_ADDR_MASK); | ||
3385 | __mmu_spte_walk(kvm, child, fn); | ||
3386 | } else | ||
3387 | fn(kvm, &sp->spt[i]); | ||
3388 | } | ||
3389 | } | ||
3390 | } | ||
3391 | |||
3392 | static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) | ||
3393 | { | ||
3394 | int i; | ||
3395 | struct kvm_mmu_page *sp; | ||
3396 | |||
3397 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
3398 | return; | ||
3399 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | ||
3400 | hpa_t root = vcpu->arch.mmu.root_hpa; | ||
3401 | sp = page_header(root); | ||
3402 | __mmu_spte_walk(vcpu->kvm, sp, fn); | ||
3403 | return; | ||
3404 | } | ||
3405 | for (i = 0; i < 4; ++i) { | ||
3406 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | ||
3407 | |||
3408 | if (root && VALID_PAGE(root)) { | ||
3409 | root &= PT64_BASE_ADDR_MASK; | ||
3410 | sp = page_header(root); | ||
3411 | __mmu_spte_walk(vcpu->kvm, sp, fn); | ||
3412 | } | ||
3413 | } | ||
3414 | return; | ||
3415 | } | ||
3416 | |||
3417 | static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | ||
3418 | gva_t va, int level) | ||
3419 | { | ||
3420 | u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK); | ||
3421 | int i; | ||
3422 | gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1)); | ||
3423 | |||
3424 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { | ||
3425 | u64 ent = pt[i]; | ||
3426 | |||
3427 | if (ent == shadow_trap_nonpresent_pte) | ||
3428 | continue; | ||
3429 | |||
3430 | va = canonicalize(va); | ||
3431 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) | ||
3432 | audit_mappings_page(vcpu, ent, va, level - 1); | ||
3433 | else { | ||
3434 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); | ||
3435 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
3436 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
3437 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | ||
3438 | 3668 | ||
3439 | if (is_error_pfn(pfn)) { | 3669 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) |
3440 | kvm_release_pfn_clean(pfn); | ||
3441 | continue; | ||
3442 | } | ||
3443 | |||
3444 | if (is_shadow_present_pte(ent) | ||
3445 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | ||
3446 | printk(KERN_ERR "xx audit error: (%s) levels %d" | ||
3447 | " gva %lx gpa %llx hpa %llx ent %llx %d\n", | ||
3448 | audit_msg, vcpu->arch.mmu.root_level, | ||
3449 | va, gpa, hpa, ent, | ||
3450 | is_shadow_present_pte(ent)); | ||
3451 | else if (ent == shadow_notrap_nonpresent_pte | ||
3452 | && !is_error_hpa(hpa)) | ||
3453 | printk(KERN_ERR "audit: (%s) notrap shadow," | ||
3454 | " valid guest gva %lx\n", audit_msg, va); | ||
3455 | kvm_release_pfn_clean(pfn); | ||
3456 | |||
3457 | } | ||
3458 | } | ||
3459 | } | ||
3460 | |||
3461 | static void audit_mappings(struct kvm_vcpu *vcpu) | ||
3462 | { | ||
3463 | unsigned i; | ||
3464 | |||
3465 | if (vcpu->arch.mmu.root_level == 4) | ||
3466 | audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4); | ||
3467 | else | ||
3468 | for (i = 0; i < 4; ++i) | ||
3469 | if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK) | ||
3470 | audit_mappings_page(vcpu, | ||
3471 | vcpu->arch.mmu.pae_root[i], | ||
3472 | i << 30, | ||
3473 | 2); | ||
3474 | } | ||
3475 | |||
3476 | static int count_rmaps(struct kvm_vcpu *vcpu) | ||
3477 | { | ||
3478 | struct kvm *kvm = vcpu->kvm; | ||
3479 | struct kvm_memslots *slots; | ||
3480 | int nmaps = 0; | ||
3481 | int i, j, k, idx; | ||
3482 | |||
3483 | idx = srcu_read_lock(&kvm->srcu); | ||
3484 | slots = kvm_memslots(kvm); | ||
3485 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | ||
3486 | struct kvm_memory_slot *m = &slots->memslots[i]; | ||
3487 | struct kvm_rmap_desc *d; | ||
3488 | |||
3489 | for (j = 0; j < m->npages; ++j) { | ||
3490 | unsigned long *rmapp = &m->rmap[j]; | ||
3491 | |||
3492 | if (!*rmapp) | ||
3493 | continue; | ||
3494 | if (!(*rmapp & 1)) { | ||
3495 | ++nmaps; | ||
3496 | continue; | ||
3497 | } | ||
3498 | d = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | ||
3499 | while (d) { | ||
3500 | for (k = 0; k < RMAP_EXT; ++k) | ||
3501 | if (d->sptes[k]) | ||
3502 | ++nmaps; | ||
3503 | else | ||
3504 | break; | ||
3505 | d = d->more; | ||
3506 | } | ||
3507 | } | ||
3508 | } | ||
3509 | srcu_read_unlock(&kvm->srcu, idx); | ||
3510 | return nmaps; | ||
3511 | } | ||
3512 | |||
3513 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | ||
3514 | { | ||
3515 | unsigned long *rmapp; | ||
3516 | struct kvm_mmu_page *rev_sp; | ||
3517 | gfn_t gfn; | ||
3518 | |||
3519 | if (is_writable_pte(*sptep)) { | ||
3520 | rev_sp = page_header(__pa(sptep)); | ||
3521 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); | ||
3522 | |||
3523 | if (!gfn_to_memslot(kvm, gfn)) { | ||
3524 | if (!printk_ratelimit()) | ||
3525 | return; | ||
3526 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | ||
3527 | audit_msg, gfn); | ||
3528 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | ||
3529 | audit_msg, (long int)(sptep - rev_sp->spt), | ||
3530 | rev_sp->gfn); | ||
3531 | dump_stack(); | ||
3532 | return; | ||
3533 | } | ||
3534 | |||
3535 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); | ||
3536 | if (!*rmapp) { | ||
3537 | if (!printk_ratelimit()) | ||
3538 | return; | ||
3539 | printk(KERN_ERR "%s: no rmap for writable spte %llx\n", | ||
3540 | audit_msg, *sptep); | ||
3541 | dump_stack(); | ||
3542 | } | ||
3543 | } | ||
3544 | |||
3545 | } | ||
3546 | |||
3547 | void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu) | ||
3548 | { | ||
3549 | mmu_spte_walk(vcpu, inspect_spte_has_rmap); | ||
3550 | } | ||
3551 | |||
3552 | static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | ||
3553 | { | ||
3554 | struct kvm_mmu_page *sp; | ||
3555 | int i; | ||
3556 | |||
3557 | list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { | ||
3558 | u64 *pt = sp->spt; | ||
3559 | |||
3560 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | ||
3561 | continue; | ||
3562 | |||
3563 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
3564 | u64 ent = pt[i]; | ||
3565 | |||
3566 | if (!(ent & PT_PRESENT_MASK)) | ||
3567 | continue; | ||
3568 | if (!is_writable_pte(ent)) | ||
3569 | continue; | ||
3570 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); | ||
3571 | } | ||
3572 | } | ||
3573 | return; | ||
3574 | } | ||
3575 | |||
3576 | static void audit_rmap(struct kvm_vcpu *vcpu) | ||
3577 | { | ||
3578 | check_writable_mappings_rmap(vcpu); | ||
3579 | count_rmaps(vcpu); | ||
3580 | } | ||
3581 | |||
3582 | static void audit_write_protection(struct kvm_vcpu *vcpu) | ||
3583 | { | ||
3584 | struct kvm_mmu_page *sp; | ||
3585 | struct kvm_memory_slot *slot; | ||
3586 | unsigned long *rmapp; | ||
3587 | u64 *spte; | ||
3588 | gfn_t gfn; | ||
3589 | |||
3590 | list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { | ||
3591 | if (sp->role.direct) | ||
3592 | continue; | ||
3593 | if (sp->unsync) | ||
3594 | continue; | ||
3595 | |||
3596 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); | ||
3597 | rmapp = &slot->rmap[gfn - slot->base_gfn]; | ||
3598 | |||
3599 | spte = rmap_next(vcpu->kvm, rmapp, NULL); | ||
3600 | while (spte) { | ||
3601 | if (is_writable_pte(*spte)) | ||
3602 | printk(KERN_ERR "%s: (%s) shadow page has " | ||
3603 | "writable mappings: gfn %lx role %x\n", | ||
3604 | __func__, audit_msg, sp->gfn, | ||
3605 | sp->role.word); | ||
3606 | spte = rmap_next(vcpu->kvm, rmapp, spte); | ||
3607 | } | ||
3608 | } | ||
3609 | } | ||
3610 | |||
3611 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) | ||
3612 | { | 3670 | { |
3613 | int olddbg = dbg; | 3671 | ASSERT(vcpu); |
3614 | 3672 | ||
3615 | dbg = 0; | 3673 | destroy_kvm_mmu(vcpu); |
3616 | audit_msg = msg; | 3674 | free_mmu_pages(vcpu); |
3617 | audit_rmap(vcpu); | 3675 | mmu_free_memory_caches(vcpu); |
3618 | audit_write_protection(vcpu); | 3676 | mmu_audit_disable(); |
3619 | if (strcmp("pre pte write", audit_msg) != 0) | ||
3620 | audit_mappings(vcpu); | ||
3621 | audit_writable_sptes_have_rmaps(vcpu); | ||
3622 | dbg = olddbg; | ||
3623 | } | 3677 | } |
3624 | |||
3625 | #endif | ||