diff options
author | Marcelo Tosatti <marcelo@kvack.org> | 2007-12-11 19:12:27 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-01-30 10:53:21 -0500 |
commit | 7819026eefee53eaaac3fdce1a2f157c7ea943fe (patch) | |
tree | e5ee690406a8ebe381ce5d712f010a5a0c706c4c | |
parent | 1d075434149c38d457c30d1f11d9c39210b0bb79 (diff) |
KVM: MMU: Fix SMP shadow instantiation race
There is a race where VCPU0 is shadowing a pagetable entry while VCPU1
is updating it, which results in a stale shadow copy.
Fix that by comparing the contents of the cached guest pte with the
current guest pte after write-protecting the guest pagetable.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r-- | drivers/kvm/mmu.c | 12 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 29 |
2 files changed, 29 insertions, 12 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index ba71e8d66761..92ac0d1106b4 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -681,7 +681,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
681 | unsigned level, | 681 | unsigned level, |
682 | int metaphysical, | 682 | int metaphysical, |
683 | unsigned access, | 683 | unsigned access, |
684 | u64 *parent_pte) | 684 | u64 *parent_pte, |
685 | bool *new_page) | ||
685 | { | 686 | { |
686 | union kvm_mmu_page_role role; | 687 | union kvm_mmu_page_role role; |
687 | unsigned index; | 688 | unsigned index; |
@@ -720,6 +721,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
720 | vcpu->mmu.prefetch_page(vcpu, sp); | 721 | vcpu->mmu.prefetch_page(vcpu, sp); |
721 | if (!metaphysical) | 722 | if (!metaphysical) |
722 | rmap_write_protect(vcpu->kvm, gfn); | 723 | rmap_write_protect(vcpu->kvm, gfn); |
724 | if (new_page) | ||
725 | *new_page = 1; | ||
723 | return sp; | 726 | return sp; |
724 | } | 727 | } |
725 | 728 | ||
@@ -993,7 +996,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
993 | >> PAGE_SHIFT; | 996 | >> PAGE_SHIFT; |
994 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | 997 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, |
995 | v, level - 1, | 998 | v, level - 1, |
996 | 1, ACC_ALL, &table[index]); | 999 | 1, ACC_ALL, &table[index], |
1000 | NULL); | ||
997 | if (!new_table) { | 1001 | if (!new_table) { |
998 | pgprintk("nonpaging_map: ENOMEM\n"); | 1002 | pgprintk("nonpaging_map: ENOMEM\n"); |
999 | return -ENOMEM; | 1003 | return -ENOMEM; |
@@ -1059,7 +1063,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1059 | 1063 | ||
1060 | ASSERT(!VALID_PAGE(root)); | 1064 | ASSERT(!VALID_PAGE(root)); |
1061 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 1065 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
1062 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); | 1066 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL); |
1063 | root = __pa(sp->spt); | 1067 | root = __pa(sp->spt); |
1064 | ++sp->root_count; | 1068 | ++sp->root_count; |
1065 | vcpu->mmu.root_hpa = root; | 1069 | vcpu->mmu.root_hpa = root; |
@@ -1080,7 +1084,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1080 | root_gfn = 0; | 1084 | root_gfn = 0; |
1081 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 1085 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
1082 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 1086 | PT32_ROOT_LEVEL, !is_paging(vcpu), |
1083 | ACC_ALL, NULL); | 1087 | ACC_ALL, NULL, NULL); |
1084 | root = __pa(sp->spt); | 1088 | root = __pa(sp->spt); |
1085 | ++sp->root_count; | 1089 | ++sp->root_count; |
1086 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; | 1090 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; |
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 3ab3fb635e16..fb19596c9589 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -65,7 +65,8 @@ | |||
65 | struct guest_walker { | 65 | struct guest_walker { |
66 | int level; | 66 | int level; |
67 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; | 67 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; |
68 | pt_element_t pte; | 68 | pt_element_t ptes[PT_MAX_FULL_LEVELS]; |
69 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; | ||
69 | unsigned pt_access; | 70 | unsigned pt_access; |
70 | unsigned pte_access; | 71 | unsigned pte_access; |
71 | gfn_t gfn; | 72 | gfn_t gfn; |
@@ -150,6 +151,7 @@ walk: | |||
150 | pte_gpa = gfn_to_gpa(table_gfn); | 151 | pte_gpa = gfn_to_gpa(table_gfn); |
151 | pte_gpa += index * sizeof(pt_element_t); | 152 | pte_gpa += index * sizeof(pt_element_t); |
152 | walker->table_gfn[walker->level - 1] = table_gfn; | 153 | walker->table_gfn[walker->level - 1] = table_gfn; |
154 | walker->pte_gpa[walker->level - 1] = pte_gpa; | ||
153 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, | 155 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, |
154 | walker->level - 1, table_gfn); | 156 | walker->level - 1, table_gfn); |
155 | 157 | ||
@@ -180,6 +182,8 @@ walk: | |||
180 | 182 | ||
181 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); | 183 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); |
182 | 184 | ||
185 | walker->ptes[walker->level - 1] = pte; | ||
186 | |||
183 | if (walker->level == PT_PAGE_TABLE_LEVEL) { | 187 | if (walker->level == PT_PAGE_TABLE_LEVEL) { |
184 | walker->gfn = gpte_to_gfn(pte); | 188 | walker->gfn = gpte_to_gfn(pte); |
185 | break; | 189 | break; |
@@ -209,9 +213,9 @@ walk: | |||
209 | goto walk; | 213 | goto walk; |
210 | pte |= PT_DIRTY_MASK; | 214 | pte |= PT_DIRTY_MASK; |
211 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); | 215 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); |
216 | walker->ptes[walker->level - 1] = pte; | ||
212 | } | 217 | } |
213 | 218 | ||
214 | walker->pte = pte; | ||
215 | walker->pt_access = pt_access; | 219 | walker->pt_access = pt_access; |
216 | walker->pte_access = pte_access; | 220 | walker->pte_access = pte_access; |
217 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 221 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
@@ -268,7 +272,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
268 | u64 *shadow_ent; | 272 | u64 *shadow_ent; |
269 | unsigned access = walker->pt_access; | 273 | unsigned access = walker->pt_access; |
270 | 274 | ||
271 | if (!is_present_pte(walker->pte)) | 275 | if (!is_present_pte(walker->ptes[walker->level - 1])) |
272 | return NULL; | 276 | return NULL; |
273 | 277 | ||
274 | shadow_addr = vcpu->mmu.root_hpa; | 278 | shadow_addr = vcpu->mmu.root_hpa; |
@@ -285,6 +289,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
285 | u64 shadow_pte; | 289 | u64 shadow_pte; |
286 | int metaphysical; | 290 | int metaphysical; |
287 | gfn_t table_gfn; | 291 | gfn_t table_gfn; |
292 | bool new_page = 0; | ||
288 | 293 | ||
289 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 294 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
290 | if (is_shadow_present_pte(*shadow_ent)) { | 295 | if (is_shadow_present_pte(*shadow_ent)) { |
@@ -300,16 +305,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
300 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 305 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
301 | && walker->level == PT_DIRECTORY_LEVEL) { | 306 | && walker->level == PT_DIRECTORY_LEVEL) { |
302 | metaphysical = 1; | 307 | metaphysical = 1; |
303 | if (!is_dirty_pte(walker->pte)) | 308 | if (!is_dirty_pte(walker->ptes[level - 1])) |
304 | access &= ~ACC_WRITE_MASK; | 309 | access &= ~ACC_WRITE_MASK; |
305 | table_gfn = gpte_to_gfn(walker->pte); | 310 | table_gfn = gpte_to_gfn(walker->ptes[level - 1]); |
306 | } else { | 311 | } else { |
307 | metaphysical = 0; | 312 | metaphysical = 0; |
308 | table_gfn = walker->table_gfn[level - 2]; | 313 | table_gfn = walker->table_gfn[level - 2]; |
309 | } | 314 | } |
310 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 315 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, |
311 | metaphysical, access, | 316 | metaphysical, access, |
312 | shadow_ent); | 317 | shadow_ent, &new_page); |
318 | if (new_page && !metaphysical) { | ||
319 | pt_element_t curr_pte; | ||
320 | kvm_read_guest(vcpu->kvm, walker->pte_gpa[level - 2], | ||
321 | &curr_pte, sizeof(curr_pte)); | ||
322 | if (curr_pte != walker->ptes[level - 2]) | ||
323 | return NULL; | ||
324 | } | ||
313 | shadow_addr = __pa(shadow_page->spt); | 325 | shadow_addr = __pa(shadow_page->spt); |
314 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | 326 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK |
315 | | PT_WRITABLE_MASK | PT_USER_MASK; | 327 | | PT_WRITABLE_MASK | PT_USER_MASK; |
@@ -317,7 +329,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
317 | } | 329 | } |
318 | 330 | ||
319 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 331 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, |
320 | user_fault, write_fault, walker->pte & PT_DIRTY_MASK, | 332 | user_fault, write_fault, |
333 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | ||
321 | ptwrite, walker->gfn); | 334 | ptwrite, walker->gfn); |
322 | 335 | ||
323 | return shadow_ent; | 336 | return shadow_ent; |
@@ -382,7 +395,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
382 | /* | 395 | /* |
383 | * mmio: emulate if accessible, otherwise its a guest fault. | 396 | * mmio: emulate if accessible, otherwise its a guest fault. |
384 | */ | 397 | */ |
385 | if (is_io_pte(*shadow_pte)) | 398 | if (shadow_pte && is_io_pte(*shadow_pte)) |
386 | return 1; | 399 | return 1; |
387 | 400 | ||
388 | ++vcpu->stat.pf_fixed; | 401 | ++vcpu->stat.pf_fixed; |