diff options
author | Avi Kivity <avi@qumranet.com> | 2008-02-26 15:12:10 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-03-04 08:19:49 -0500 |
commit | f7d9c7b7b902f9f532738d47593d9679b0b182d9 (patch) | |
tree | 61af54605ead13b71f664a0ce4776720d10a3ef1 | |
parent | 8c35f237fb5664d30aa90448c3d6cea0cbb43f35 (diff) |
KVM: MMU: Fix race when instantiating a shadow pte
For improved concurrency, the guest walk is performed concurrently with other
vcpus. This means that we need to revalidate the guest ptes once we have
write-protected the guest page tables, at which point they can no longer be
modified.
The current code attempts to avoid this check if the shadow page table is not
new, on the assumption that if it has existed before, the guest could not have
modified the pte without the shadow lock. However the assumption is incorrect,
as the racing vcpu could have modified the pte, then instantiated the shadow
page, before our vcpu regains control:
vcpu0 vcpu1
fault
walk pte
modify pte
fault in same pagetable
instantiate shadow page
lookup shadow page
conclude it is old
instantiate spte based on stale guest pte
We could do something clever with generation counters, but a test run by
Marcelo suggests this is unnecessary and we can just do the revalidation
unconditionally. The pte will be in the processor cache and the check can
be quite fast.
Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r-- | arch/x86/kvm/mmu.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 5 |
2 files changed, 6 insertions, 11 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 194ece6974e6..d8172aabc660 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -681,8 +681,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
681 | unsigned level, | 681 | unsigned level, |
682 | int metaphysical, | 682 | int metaphysical, |
683 | unsigned access, | 683 | unsigned access, |
684 | u64 *parent_pte, | 684 | u64 *parent_pte) |
685 | bool *new_page) | ||
686 | { | 685 | { |
687 | union kvm_mmu_page_role role; | 686 | union kvm_mmu_page_role role; |
688 | unsigned index; | 687 | unsigned index; |
@@ -722,8 +721,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
722 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | 721 | vcpu->arch.mmu.prefetch_page(vcpu, sp); |
723 | if (!metaphysical) | 722 | if (!metaphysical) |
724 | rmap_write_protect(vcpu->kvm, gfn); | 723 | rmap_write_protect(vcpu->kvm, gfn); |
725 | if (new_page) | ||
726 | *new_page = 1; | ||
727 | return sp; | 724 | return sp; |
728 | } | 725 | } |
729 | 726 | ||
@@ -1006,8 +1003,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
1006 | >> PAGE_SHIFT; | 1003 | >> PAGE_SHIFT; |
1007 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | 1004 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, |
1008 | v, level - 1, | 1005 | v, level - 1, |
1009 | 1, ACC_ALL, &table[index], | 1006 | 1, ACC_ALL, &table[index]); |
1010 | NULL); | ||
1011 | if (!new_table) { | 1007 | if (!new_table) { |
1012 | pgprintk("nonpaging_map: ENOMEM\n"); | 1008 | pgprintk("nonpaging_map: ENOMEM\n"); |
1013 | kvm_release_page_clean(page); | 1009 | kvm_release_page_clean(page); |
@@ -1100,7 +1096,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1100 | 1096 | ||
1101 | ASSERT(!VALID_PAGE(root)); | 1097 | ASSERT(!VALID_PAGE(root)); |
1102 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 1098 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
1103 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL); | 1099 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); |
1104 | root = __pa(sp->spt); | 1100 | root = __pa(sp->spt); |
1105 | ++sp->root_count; | 1101 | ++sp->root_count; |
1106 | vcpu->arch.mmu.root_hpa = root; | 1102 | vcpu->arch.mmu.root_hpa = root; |
@@ -1121,7 +1117,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1121 | root_gfn = 0; | 1117 | root_gfn = 0; |
1122 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 1118 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
1123 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 1119 | PT32_ROOT_LEVEL, !is_paging(vcpu), |
1124 | ACC_ALL, NULL, NULL); | 1120 | ACC_ALL, NULL); |
1125 | root = __pa(sp->spt); | 1121 | root = __pa(sp->spt); |
1126 | ++sp->root_count; | 1122 | ++sp->root_count; |
1127 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 1123 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 5588fa594b61..ecc0856268c4 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -300,7 +300,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
300 | u64 shadow_pte; | 300 | u64 shadow_pte; |
301 | int metaphysical; | 301 | int metaphysical; |
302 | gfn_t table_gfn; | 302 | gfn_t table_gfn; |
303 | bool new_page = 0; | ||
304 | 303 | ||
305 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 304 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
306 | if (level == PT_PAGE_TABLE_LEVEL) | 305 | if (level == PT_PAGE_TABLE_LEVEL) |
@@ -322,8 +321,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
322 | } | 321 | } |
323 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 322 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, |
324 | metaphysical, access, | 323 | metaphysical, access, |
325 | shadow_ent, &new_page); | 324 | shadow_ent); |
326 | if (new_page && !metaphysical) { | 325 | if (!metaphysical) { |
327 | int r; | 326 | int r; |
328 | pt_element_t curr_pte; | 327 | pt_element_t curr_pte; |
329 | r = kvm_read_guest_atomic(vcpu->kvm, | 328 | r = kvm_read_guest_atomic(vcpu->kvm, |