diff options
author | Avi Kivity <avi@qumranet.com> | 2007-03-23 03:55:25 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2007-05-03 03:52:27 -0400 |
commit | d28c6cfbbc5e2d4fccfe6d733995ed5971ca87f6 (patch) | |
tree | 0da31407f97a92c81d189b18608e54ad2064ebad /drivers/kvm | |
parent | 916ce2360fadc71d924e02403b31280112a31280 (diff) |
KVM: MMU: Fix hugepage pdes mapping same physical address with different access
The kvm mmu keeps a shadow page for hugepage pdes; if several such pdes map
the same physical address, they share the same shadow page. This is a fairly
common case (kernel mappings on i386 nonpae Linux, for example).
However, if the two pdes map the same memory but with different permissions, kvm
will happily use the cached shadow page. If the access through the more
permissive pde will occur after the access to the strict pde, an endless pagefault
loop will be generated and the guest will make no progress.
Fix by making the access permissions part of the cache lookup key.
The fix allows Xen pae to boot on kvm and run guest domains.
Thanks to Jeremy Fitzhardinge for reporting the bug and testing the fix.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm')
-rw-r--r-- | drivers/kvm/kvm.h | 2 | ||||
-rw-r--r-- | drivers/kvm/mmu.c | 8 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 7 |
3 files changed, 13 insertions, 4 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7361c45d70c9..f5e343cb06b0 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h | |||
@@ -109,6 +109,7 @@ struct kvm_pte_chain { | |||
109 | * bits 4:7 - page table level for this shadow (1-4) | 109 | * bits 4:7 - page table level for this shadow (1-4) |
110 | * bits 8:9 - page table quadrant for 2-level guests | 110 | * bits 8:9 - page table quadrant for 2-level guests |
111 | * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) | 111 | * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) |
112 | * bits 17:18 - "access" - the user and writable bits of a huge page pde | ||
112 | */ | 113 | */ |
113 | union kvm_mmu_page_role { | 114 | union kvm_mmu_page_role { |
114 | unsigned word; | 115 | unsigned word; |
@@ -118,6 +119,7 @@ union kvm_mmu_page_role { | |||
118 | unsigned quadrant : 2; | 119 | unsigned quadrant : 2; |
119 | unsigned pad_for_nice_hex_output : 6; | 120 | unsigned pad_for_nice_hex_output : 6; |
120 | unsigned metaphysical : 1; | 121 | unsigned metaphysical : 1; |
122 | unsigned hugepage_access : 2; | ||
121 | }; | 123 | }; |
122 | }; | 124 | }; |
123 | 125 | ||
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2930d7cc7c06..c738fb1cea30 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -568,6 +568,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
568 | gva_t gaddr, | 568 | gva_t gaddr, |
569 | unsigned level, | 569 | unsigned level, |
570 | int metaphysical, | 570 | int metaphysical, |
571 | unsigned hugepage_access, | ||
571 | u64 *parent_pte) | 572 | u64 *parent_pte) |
572 | { | 573 | { |
573 | union kvm_mmu_page_role role; | 574 | union kvm_mmu_page_role role; |
@@ -581,6 +582,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
581 | role.glevels = vcpu->mmu.root_level; | 582 | role.glevels = vcpu->mmu.root_level; |
582 | role.level = level; | 583 | role.level = level; |
583 | role.metaphysical = metaphysical; | 584 | role.metaphysical = metaphysical; |
585 | role.hugepage_access = hugepage_access; | ||
584 | if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { | 586 | if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { |
585 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 587 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
586 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 588 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
@@ -780,7 +782,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
780 | >> PAGE_SHIFT; | 782 | >> PAGE_SHIFT; |
781 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | 783 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, |
782 | v, level - 1, | 784 | v, level - 1, |
783 | 1, &table[index]); | 785 | 1, 0, &table[index]); |
784 | if (!new_table) { | 786 | if (!new_table) { |
785 | pgprintk("nonpaging_map: ENOMEM\n"); | 787 | pgprintk("nonpaging_map: ENOMEM\n"); |
786 | return -ENOMEM; | 788 | return -ENOMEM; |
@@ -835,7 +837,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
835 | 837 | ||
836 | ASSERT(!VALID_PAGE(root)); | 838 | ASSERT(!VALID_PAGE(root)); |
837 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, | 839 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, |
838 | PT64_ROOT_LEVEL, 0, NULL); | 840 | PT64_ROOT_LEVEL, 0, 0, NULL); |
839 | root = page->page_hpa; | 841 | root = page->page_hpa; |
840 | ++page->root_count; | 842 | ++page->root_count; |
841 | vcpu->mmu.root_hpa = root; | 843 | vcpu->mmu.root_hpa = root; |
@@ -852,7 +854,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
852 | root_gfn = 0; | 854 | root_gfn = 0; |
853 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 855 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
854 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 856 | PT32_ROOT_LEVEL, !is_paging(vcpu), |
855 | NULL); | 857 | 0, NULL); |
856 | root = page->page_hpa; | 858 | root = page->page_hpa; |
857 | ++page->root_count; | 859 | ++page->root_count; |
858 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; | 860 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; |
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 17bd4400c92b..b94010dad809 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -247,6 +247,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
247 | u64 shadow_pte; | 247 | u64 shadow_pte; |
248 | int metaphysical; | 248 | int metaphysical; |
249 | gfn_t table_gfn; | 249 | gfn_t table_gfn; |
250 | unsigned hugepage_access = 0; | ||
250 | 251 | ||
251 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { | 252 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { |
252 | if (level == PT_PAGE_TABLE_LEVEL) | 253 | if (level == PT_PAGE_TABLE_LEVEL) |
@@ -276,6 +277,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
276 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 277 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
277 | && walker->level == PT_DIRECTORY_LEVEL) { | 278 | && walker->level == PT_DIRECTORY_LEVEL) { |
278 | metaphysical = 1; | 279 | metaphysical = 1; |
280 | hugepage_access = *guest_ent; | ||
281 | hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; | ||
282 | hugepage_access >>= PT_WRITABLE_SHIFT; | ||
279 | table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) | 283 | table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) |
280 | >> PAGE_SHIFT; | 284 | >> PAGE_SHIFT; |
281 | } else { | 285 | } else { |
@@ -283,7 +287,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
283 | table_gfn = walker->table_gfn[level - 2]; | 287 | table_gfn = walker->table_gfn[level - 2]; |
284 | } | 288 | } |
285 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 289 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, |
286 | metaphysical, shadow_ent); | 290 | metaphysical, hugepage_access, |
291 | shadow_ent); | ||
287 | shadow_addr = shadow_page->page_hpa; | 292 | shadow_addr = shadow_page->page_hpa; |
288 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | 293 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK |
289 | | PT_WRITABLE_MASK | PT_USER_MASK; | 294 | | PT_WRITABLE_MASK | PT_USER_MASK; |