diff options
-rw-r--r-- | drivers/kvm/mmu.c | 11 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 168 |
2 files changed, 68 insertions, 111 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2079d69f186a..3cdbf687df25 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -731,6 +731,17 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
731 | return r; | 731 | return r; |
732 | } | 732 | } |
733 | 733 | ||
734 | static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
735 | { | ||
736 | struct kvm_mmu_page *page; | ||
737 | |||
738 | while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { | ||
739 | pgprintk("%s: zap %lx %x\n", | ||
740 | __FUNCTION__, gfn, page->role.word); | ||
741 | kvm_mmu_zap_page(vcpu, page); | ||
742 | } | ||
743 | } | ||
744 | |||
734 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) | 745 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) |
735 | { | 746 | { |
736 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); | 747 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); |
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 869582befaf1..c06720385551 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h | |||
@@ -197,11 +197,26 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
197 | gpa_t gaddr, | 197 | gpa_t gaddr, |
198 | pt_element_t *gpte, | 198 | pt_element_t *gpte, |
199 | u64 access_bits, | 199 | u64 access_bits, |
200 | int user_fault, | ||
200 | int write_fault, | 201 | int write_fault, |
202 | int *ptwrite, | ||
203 | struct guest_walker *walker, | ||
201 | gfn_t gfn) | 204 | gfn_t gfn) |
202 | { | 205 | { |
203 | hpa_t paddr; | 206 | hpa_t paddr; |
204 | int dirty = *gpte & PT_DIRTY_MASK; | 207 | int dirty = *gpte & PT_DIRTY_MASK; |
208 | int was_rmapped = is_rmap_pte(*shadow_pte); | ||
209 | |||
210 | pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" | ||
211 | " user_fault %d gfn %lx\n", | ||
212 | __FUNCTION__, *shadow_pte, (u64)*gpte, access_bits, | ||
213 | write_fault, user_fault, gfn); | ||
214 | |||
215 | if (write_fault && !dirty) { | ||
216 | *gpte |= PT_DIRTY_MASK; | ||
217 | dirty = 1; | ||
218 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||
219 | } | ||
205 | 220 | ||
206 | *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; | 221 | *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; |
207 | if (!dirty) | 222 | if (!dirty) |
@@ -209,7 +224,9 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
209 | 224 | ||
210 | paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | 225 | paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); |
211 | 226 | ||
212 | *shadow_pte |= access_bits; | 227 | *shadow_pte |= PT_PRESENT_MASK; |
228 | if (access_bits & PT_USER_MASK) | ||
229 | *shadow_pte |= PT_USER_MASK; | ||
213 | 230 | ||
214 | if (is_error_hpa(paddr)) { | 231 | if (is_error_hpa(paddr)) { |
215 | *shadow_pte |= gaddr; | 232 | *shadow_pte |= gaddr; |
@@ -231,37 +248,50 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | |||
231 | access_bits &= ~PT_WRITABLE_MASK; | 248 | access_bits &= ~PT_WRITABLE_MASK; |
232 | } | 249 | } |
233 | 250 | ||
234 | if (access_bits & PT_WRITABLE_MASK) { | 251 | if ((access_bits & PT_WRITABLE_MASK) |
252 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | ||
235 | struct kvm_mmu_page *shadow; | 253 | struct kvm_mmu_page *shadow; |
236 | 254 | ||
255 | *shadow_pte |= PT_WRITABLE_MASK; | ||
256 | if (user_fault) { | ||
257 | mmu_unshadow(vcpu, gfn); | ||
258 | goto unshadowed; | ||
259 | } | ||
260 | |||
237 | shadow = kvm_mmu_lookup_page(vcpu, gfn); | 261 | shadow = kvm_mmu_lookup_page(vcpu, gfn); |
238 | if (shadow) { | 262 | if (shadow) { |
239 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 263 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
240 | __FUNCTION__, gfn); | 264 | __FUNCTION__, gfn); |
241 | access_bits &= ~PT_WRITABLE_MASK; | 265 | access_bits &= ~PT_WRITABLE_MASK; |
242 | if (is_writeble_pte(*shadow_pte)) { | 266 | if (is_writeble_pte(*shadow_pte)) { |
243 | *shadow_pte &= ~PT_WRITABLE_MASK; | 267 | *shadow_pte &= ~PT_WRITABLE_MASK; |
244 | kvm_arch_ops->tlb_flush(vcpu); | 268 | kvm_arch_ops->tlb_flush(vcpu); |
245 | } | 269 | } |
270 | if (write_fault) | ||
271 | *ptwrite = 1; | ||
246 | } | 272 | } |
247 | } | 273 | } |
248 | 274 | ||
275 | unshadowed: | ||
276 | |||
249 | if (access_bits & PT_WRITABLE_MASK) | 277 | if (access_bits & PT_WRITABLE_MASK) |
250 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | 278 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); |
251 | 279 | ||
252 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | 280 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); |
253 | rmap_add(vcpu, shadow_pte); | 281 | if (!was_rmapped) |
282 | rmap_add(vcpu, shadow_pte); | ||
254 | } | 283 | } |
255 | 284 | ||
256 | static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, | 285 | static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, |
257 | u64 *shadow_pte, u64 access_bits, | 286 | u64 *shadow_pte, u64 access_bits, |
258 | int write_fault, gfn_t gfn) | 287 | int user_fault, int write_fault, int *ptwrite, |
288 | struct guest_walker *walker, gfn_t gfn) | ||
259 | { | 289 | { |
260 | ASSERT(*shadow_pte == 0); | ||
261 | access_bits &= *gpte; | 290 | access_bits &= *gpte; |
262 | *shadow_pte = (*gpte & PT_PTE_COPY_MASK); | 291 | *shadow_pte |= (*gpte & PT_PTE_COPY_MASK); |
263 | FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, | 292 | FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, |
264 | gpte, access_bits, write_fault, gfn); | 293 | gpte, access_bits, user_fault, write_fault, |
294 | ptwrite, walker, gfn); | ||
265 | } | 295 | } |
266 | 296 | ||
267 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 297 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, |
@@ -276,31 +306,34 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
276 | return; | 306 | return; |
277 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); | 307 | pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); |
278 | FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, | 308 | FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, |
309 | 0, NULL, NULL, | ||
279 | (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); | 310 | (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); |
280 | } | 311 | } |
281 | 312 | ||
282 | static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, | 313 | static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, |
283 | u64 *shadow_pte, u64 access_bits, int write_fault, | 314 | u64 *shadow_pte, u64 access_bits, |
284 | gfn_t gfn) | 315 | int user_fault, int write_fault, int *ptwrite, |
316 | struct guest_walker *walker, gfn_t gfn) | ||
285 | { | 317 | { |
286 | gpa_t gaddr; | 318 | gpa_t gaddr; |
287 | 319 | ||
288 | ASSERT(*shadow_pte == 0); | ||
289 | access_bits &= *gpde; | 320 | access_bits &= *gpde; |
290 | gaddr = (gpa_t)gfn << PAGE_SHIFT; | 321 | gaddr = (gpa_t)gfn << PAGE_SHIFT; |
291 | if (PTTYPE == 32 && is_cpuid_PSE36()) | 322 | if (PTTYPE == 32 && is_cpuid_PSE36()) |
292 | gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << | 323 | gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << |
293 | (32 - PT32_DIR_PSE36_SHIFT); | 324 | (32 - PT32_DIR_PSE36_SHIFT); |
294 | *shadow_pte = *gpde & PT_PTE_COPY_MASK; | 325 | *shadow_pte |= *gpde & PT_PTE_COPY_MASK; |
295 | FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, | 326 | FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, |
296 | gpde, access_bits, write_fault, gfn); | 327 | gpde, access_bits, user_fault, write_fault, |
328 | ptwrite, walker, gfn); | ||
297 | } | 329 | } |
298 | 330 | ||
299 | /* | 331 | /* |
300 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 332 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
301 | */ | 333 | */ |
302 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 334 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
303 | struct guest_walker *walker, int write_fault) | 335 | struct guest_walker *walker, |
336 | int user_fault, int write_fault, int *ptwrite) | ||
304 | { | 337 | { |
305 | hpa_t shadow_addr; | 338 | hpa_t shadow_addr; |
306 | int level; | 339 | int level; |
@@ -330,7 +363,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
330 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 363 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
331 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { | 364 | if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { |
332 | if (level == PT_PAGE_TABLE_LEVEL) | 365 | if (level == PT_PAGE_TABLE_LEVEL) |
333 | return shadow_ent; | 366 | break; |
334 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 367 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; |
335 | prev_shadow_ent = shadow_ent; | 368 | prev_shadow_ent = shadow_ent; |
336 | continue; | 369 | continue; |
@@ -365,95 +398,18 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
365 | if (prev_shadow_ent) | 398 | if (prev_shadow_ent) |
366 | *prev_shadow_ent |= PT_SHADOW_PS_MARK; | 399 | *prev_shadow_ent |= PT_SHADOW_PS_MARK; |
367 | FNAME(set_pde)(vcpu, guest_ent, shadow_ent, | 400 | FNAME(set_pde)(vcpu, guest_ent, shadow_ent, |
368 | walker->inherited_ar, write_fault, walker->gfn); | 401 | walker->inherited_ar, user_fault, write_fault, |
402 | ptwrite, walker, walker->gfn); | ||
369 | } else { | 403 | } else { |
370 | ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); | 404 | ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); |
371 | FNAME(set_pte)(vcpu, guest_ent, shadow_ent, | 405 | FNAME(set_pte)(vcpu, guest_ent, shadow_ent, |
372 | walker->inherited_ar, | 406 | walker->inherited_ar, user_fault, write_fault, |
373 | write_fault, walker->gfn); | 407 | ptwrite, walker, walker->gfn); |
374 | } | 408 | } |
375 | return shadow_ent; | 409 | return shadow_ent; |
376 | } | 410 | } |
377 | 411 | ||
378 | /* | 412 | /* |
379 | * The guest faulted for write. We need to | ||
380 | * | ||
381 | * - check write permissions | ||
382 | * - update the guest pte dirty bit | ||
383 | * - update our own dirty page tracking structures | ||
384 | */ | ||
385 | static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, | ||
386 | u64 *shadow_ent, | ||
387 | struct guest_walker *walker, | ||
388 | gva_t addr, | ||
389 | int user, | ||
390 | int *write_pt) | ||
391 | { | ||
392 | pt_element_t *guest_ent; | ||
393 | int writable_shadow; | ||
394 | gfn_t gfn; | ||
395 | struct kvm_mmu_page *page; | ||
396 | |||
397 | if (is_writeble_pte(*shadow_ent)) | ||
398 | return !user || (*shadow_ent & PT_USER_MASK); | ||
399 | |||
400 | writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK; | ||
401 | if (user) { | ||
402 | /* | ||
403 | * User mode access. Fail if it's a kernel page or a read-only | ||
404 | * page. | ||
405 | */ | ||
406 | if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow) | ||
407 | return 0; | ||
408 | ASSERT(*shadow_ent & PT_USER_MASK); | ||
409 | } else | ||
410 | /* | ||
411 | * Kernel mode access. Fail if it's a read-only page and | ||
412 | * supervisor write protection is enabled. | ||
413 | */ | ||
414 | if (!writable_shadow) { | ||
415 | if (is_write_protection(vcpu)) | ||
416 | return 0; | ||
417 | *shadow_ent &= ~PT_USER_MASK; | ||
418 | } | ||
419 | |||
420 | guest_ent = walker->ptep; | ||
421 | |||
422 | if (!is_present_pte(*guest_ent)) { | ||
423 | *shadow_ent = 0; | ||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | gfn = walker->gfn; | ||
428 | |||
429 | if (user) { | ||
430 | /* | ||
431 | * Usermode page faults won't be for page table updates. | ||
432 | */ | ||
433 | while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { | ||
434 | pgprintk("%s: zap %lx %x\n", | ||
435 | __FUNCTION__, gfn, page->role.word); | ||
436 | kvm_mmu_zap_page(vcpu, page); | ||
437 | } | ||
438 | } else if (kvm_mmu_lookup_page(vcpu, gfn)) { | ||
439 | pgprintk("%s: found shadow page for %lx, marking ro\n", | ||
440 | __FUNCTION__, gfn); | ||
441 | mark_page_dirty(vcpu->kvm, gfn); | ||
442 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||
443 | *guest_ent |= PT_DIRTY_MASK; | ||
444 | *write_pt = 1; | ||
445 | return 0; | ||
446 | } | ||
447 | mark_page_dirty(vcpu->kvm, gfn); | ||
448 | *shadow_ent |= PT_WRITABLE_MASK; | ||
449 | FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||
450 | *guest_ent |= PT_DIRTY_MASK; | ||
451 | rmap_add(vcpu, shadow_ent); | ||
452 | |||
453 | return 1; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Page fault handler. There are several causes for a page fault: | 413 | * Page fault handler. There are several causes for a page fault: |
458 | * - there is no shadow pte for the guest pte | 414 | * - there is no shadow pte for the guest pte |
459 | * - write access through a shadow pte marked read only so that we can set | 415 | * - write access through a shadow pte marked read only so that we can set |
@@ -475,7 +431,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
475 | int fetch_fault = error_code & PFERR_FETCH_MASK; | 431 | int fetch_fault = error_code & PFERR_FETCH_MASK; |
476 | struct guest_walker walker; | 432 | struct guest_walker walker; |
477 | u64 *shadow_pte; | 433 | u64 *shadow_pte; |
478 | int fixed; | ||
479 | int write_pt = 0; | 434 | int write_pt = 0; |
480 | int r; | 435 | int r; |
481 | 436 | ||
@@ -503,19 +458,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
503 | return 0; | 458 | return 0; |
504 | } | 459 | } |
505 | 460 | ||
506 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, write_fault); | 461 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
507 | pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, | 462 | &write_pt); |
508 | shadow_pte, *shadow_pte); | 463 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, |
509 | 464 | shadow_pte, *shadow_pte, write_pt); | |
510 | /* | ||
511 | * Update the shadow pte. | ||
512 | */ | ||
513 | if (write_fault) | ||
514 | fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, | ||
515 | user_fault, &write_pt); | ||
516 | |||
517 | pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__, | ||
518 | shadow_pte, *shadow_pte); | ||
519 | 465 | ||
520 | FNAME(release_walker)(&walker); | 466 | FNAME(release_walker)(&walker); |
521 | 467 | ||