diff options
author | Marcelo Tosatti <marcelo@kvack.org> | 2007-12-11 19:12:27 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-01-30 10:53:21 -0500 |
commit | 7819026eefee53eaaac3fdce1a2f157c7ea943fe (patch) | |
tree | e5ee690406a8ebe381ce5d712f010a5a0c706c4c /drivers/kvm/paging_tmpl.h | |
parent | 1d075434149c38d457c30d1f11d9c39210b0bb79 (diff) |
KVM: MMU: Fix SMP shadow instantiation race
There is a race where VCPU0 is shadowing a pagetable entry while VCPU1
is updating it, which results in a stale shadow copy.
Fix that by comparing the contents of the cached guest pte with the
current guest pte after write-protecting the guest pagetable.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm/paging_tmpl.h')
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 29 |
1 files changed, 21 insertions, 8 deletions
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 3ab3fb635e1..fb19596c958 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -65,7 +65,8 @@ | |||
65 | struct guest_walker { | 65 | struct guest_walker { |
66 | int level; | 66 | int level; |
67 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; | 67 | gfn_t table_gfn[PT_MAX_FULL_LEVELS]; |
68 | pt_element_t pte; | 68 | pt_element_t ptes[PT_MAX_FULL_LEVELS]; |
69 | gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; | ||
69 | unsigned pt_access; | 70 | unsigned pt_access; |
70 | unsigned pte_access; | 71 | unsigned pte_access; |
71 | gfn_t gfn; | 72 | gfn_t gfn; |
@@ -150,6 +151,7 @@ walk: | |||
150 | pte_gpa = gfn_to_gpa(table_gfn); | 151 | pte_gpa = gfn_to_gpa(table_gfn); |
151 | pte_gpa += index * sizeof(pt_element_t); | 152 | pte_gpa += index * sizeof(pt_element_t); |
152 | walker->table_gfn[walker->level - 1] = table_gfn; | 153 | walker->table_gfn[walker->level - 1] = table_gfn; |
154 | walker->pte_gpa[walker->level - 1] = pte_gpa; | ||
153 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, | 155 | pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, |
154 | walker->level - 1, table_gfn); | 156 | walker->level - 1, table_gfn); |
155 | 157 | ||
@@ -180,6 +182,8 @@ walk: | |||
180 | 182 | ||
181 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); | 183 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); |
182 | 184 | ||
185 | walker->ptes[walker->level - 1] = pte; | ||
186 | |||
183 | if (walker->level == PT_PAGE_TABLE_LEVEL) { | 187 | if (walker->level == PT_PAGE_TABLE_LEVEL) { |
184 | walker->gfn = gpte_to_gfn(pte); | 188 | walker->gfn = gpte_to_gfn(pte); |
185 | break; | 189 | break; |
@@ -209,9 +213,9 @@ walk: | |||
209 | goto walk; | 213 | goto walk; |
210 | pte |= PT_DIRTY_MASK; | 214 | pte |= PT_DIRTY_MASK; |
211 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); | 215 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); |
216 | walker->ptes[walker->level - 1] = pte; | ||
212 | } | 217 | } |
213 | 218 | ||
214 | walker->pte = pte; | ||
215 | walker->pt_access = pt_access; | 219 | walker->pt_access = pt_access; |
216 | walker->pte_access = pte_access; | 220 | walker->pte_access = pte_access; |
217 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 221 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
@@ -268,7 +272,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
268 | u64 *shadow_ent; | 272 | u64 *shadow_ent; |
269 | unsigned access = walker->pt_access; | 273 | unsigned access = walker->pt_access; |
270 | 274 | ||
271 | if (!is_present_pte(walker->pte)) | 275 | if (!is_present_pte(walker->ptes[walker->level - 1])) |
272 | return NULL; | 276 | return NULL; |
273 | 277 | ||
274 | shadow_addr = vcpu->mmu.root_hpa; | 278 | shadow_addr = vcpu->mmu.root_hpa; |
@@ -285,6 +289,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
285 | u64 shadow_pte; | 289 | u64 shadow_pte; |
286 | int metaphysical; | 290 | int metaphysical; |
287 | gfn_t table_gfn; | 291 | gfn_t table_gfn; |
292 | bool new_page = 0; | ||
288 | 293 | ||
289 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 294 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
290 | if (is_shadow_present_pte(*shadow_ent)) { | 295 | if (is_shadow_present_pte(*shadow_ent)) { |
@@ -300,16 +305,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
300 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 305 | if (level - 1 == PT_PAGE_TABLE_LEVEL |
301 | && walker->level == PT_DIRECTORY_LEVEL) { | 306 | && walker->level == PT_DIRECTORY_LEVEL) { |
302 | metaphysical = 1; | 307 | metaphysical = 1; |
303 | if (!is_dirty_pte(walker->pte)) | 308 | if (!is_dirty_pte(walker->ptes[level - 1])) |
304 | access &= ~ACC_WRITE_MASK; | 309 | access &= ~ACC_WRITE_MASK; |
305 | table_gfn = gpte_to_gfn(walker->pte); | 310 | table_gfn = gpte_to_gfn(walker->ptes[level - 1]); |
306 | } else { | 311 | } else { |
307 | metaphysical = 0; | 312 | metaphysical = 0; |
308 | table_gfn = walker->table_gfn[level - 2]; | 313 | table_gfn = walker->table_gfn[level - 2]; |
309 | } | 314 | } |
310 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 315 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, |
311 | metaphysical, access, | 316 | metaphysical, access, |
312 | shadow_ent); | 317 | shadow_ent, &new_page); |
318 | if (new_page && !metaphysical) { | ||
319 | pt_element_t curr_pte; | ||
320 | kvm_read_guest(vcpu->kvm, walker->pte_gpa[level - 2], | ||
321 | &curr_pte, sizeof(curr_pte)); | ||
322 | if (curr_pte != walker->ptes[level - 2]) | ||
323 | return NULL; | ||
324 | } | ||
313 | shadow_addr = __pa(shadow_page->spt); | 325 | shadow_addr = __pa(shadow_page->spt); |
314 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | 326 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK |
315 | | PT_WRITABLE_MASK | PT_USER_MASK; | 327 | | PT_WRITABLE_MASK | PT_USER_MASK; |
@@ -317,7 +329,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
317 | } | 329 | } |
318 | 330 | ||
319 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 331 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, |
320 | user_fault, write_fault, walker->pte & PT_DIRTY_MASK, | 332 | user_fault, write_fault, |
333 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | ||
321 | ptwrite, walker->gfn); | 334 | ptwrite, walker->gfn); |
322 | 335 | ||
323 | return shadow_ent; | 336 | return shadow_ent; |
@@ -382,7 +395,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
382 | /* | 395 | /* |
383 | * mmio: emulate if accessible, otherwise its a guest fault. | 396 | * mmio: emulate if accessible, otherwise its a guest fault. |
384 | */ | 397 | */ |
385 | if (is_io_pte(*shadow_pte)) | 398 | if (shadow_pte && is_io_pte(*shadow_pte)) |
386 | return 1; | 399 | return 1; |
387 | 400 | ||
388 | ++vcpu->stat.pf_fixed; | 401 | ++vcpu->stat.pf_fixed; |