diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 18:31:08 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 18:31:08 -0500 |
commit | 66cdd0ceaf65a18996f561b770eedde1d123b019 (patch) | |
tree | 4892eaa422d366fce5d1e866ff1fe0988af95569 /arch/x86/kvm/paging_tmpl.h | |
parent | 896ea17d3da5f44b2625c9cda9874d7dfe447393 (diff) | |
parent | 58b7825bc324da55415034a9f6ca5d716b8fd898 (diff) |
Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti:
"Considerable KVM/PPC work, x86 kvmclock vsyscall support,
IA32_TSC_ADJUST MSR emulation, amongst others."
Fix up trivial conflict in kernel/sched/core.c due to cross-cpu
migration notifier added next to rq migration call-back.
* tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits)
KVM: emulator: fix real mode segment checks in address linearization
VMX: remove unneeded enable_unrestricted_guest check
KVM: VMX: fix DPL during entry to protected mode
x86/kexec: crash_vmclear_local_vmcss needs __rcu
kvm: Fix irqfd resampler list walk
KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump
x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary
KVM: MMU: optimize for set_spte
KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface
KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation
KVM: PPC: bookehv: Add guest computation mode for irq delivery
KVM: PPC: Make EPCR a valid field for booke64 and bookehv
KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit
KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation
KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation
KVM: PPC: e500: Add emulation helper for getting instruction ea
KVM: PPC: bookehv64: Add support for interrupt handling
KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler
KVM: PPC: booke: Fix get_tb() compile error on 64-bit
KVM: PPC: e500: Silence bogus GCC warning in tlb code
...
Diffstat (limited to 'arch/x86/kvm/paging_tmpl.h')
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 115 |
1 files changed, 41 insertions, 74 deletions
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 714e2c01a6fe..891eb6d93b8b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker, | |||
305 | addr, access); | 305 | addr, access); |
306 | } | 306 | } |
307 | 307 | ||
308 | static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | 308 | static bool |
309 | struct kvm_mmu_page *sp, u64 *spte, | 309 | FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
310 | pt_element_t gpte) | 310 | u64 *spte, pt_element_t gpte, bool no_dirty_log) |
311 | { | 311 | { |
312 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
313 | goto no_present; | ||
314 | |||
315 | if (!is_present_gpte(gpte)) | ||
316 | goto no_present; | ||
317 | |||
318 | if (!(gpte & PT_ACCESSED_MASK)) | ||
319 | goto no_present; | ||
320 | |||
321 | return false; | ||
322 | |||
323 | no_present: | ||
324 | drop_spte(vcpu->kvm, spte); | ||
325 | return true; | ||
326 | } | ||
327 | |||
328 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
329 | u64 *spte, const void *pte) | ||
330 | { | ||
331 | pt_element_t gpte; | ||
332 | unsigned pte_access; | 312 | unsigned pte_access; |
313 | gfn_t gfn; | ||
333 | pfn_t pfn; | 314 | pfn_t pfn; |
334 | 315 | ||
335 | gpte = *(const pt_element_t *)pte; | 316 | if (prefetch_invalid_gpte(vcpu, sp, spte, gpte)) |
336 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) | 317 | return false; |
337 | return; | ||
338 | 318 | ||
339 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 319 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
320 | |||
321 | gfn = gpte_to_gfn(gpte); | ||
340 | pte_access = sp->role.access & gpte_access(vcpu, gpte); | 322 | pte_access = sp->role.access & gpte_access(vcpu, gpte); |
341 | protect_clean_gpte(&pte_access, gpte); | 323 | protect_clean_gpte(&pte_access, gpte); |
342 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); | 324 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
343 | if (mmu_invalid_pfn(pfn)) | 325 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); |
344 | return; | 326 | if (is_error_pfn(pfn)) |
327 | return false; | ||
345 | 328 | ||
346 | /* | 329 | /* |
347 | * we call mmu_set_spte() with host_writable = true because that | 330 | * we call mmu_set_spte() with host_writable = true because |
348 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 331 | * pte_prefetch_gfn_to_pfn always gets a writable pfn. |
349 | */ | 332 | */ |
350 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 333 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
351 | NULL, PT_PAGE_TABLE_LEVEL, | 334 | NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); |
352 | gpte_to_gfn(gpte), pfn, true, true); | 335 | |
336 | return true; | ||
337 | } | ||
338 | |||
339 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
340 | u64 *spte, const void *pte) | ||
341 | { | ||
342 | pt_element_t gpte = *(const pt_element_t *)pte; | ||
343 | |||
344 | FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false); | ||
353 | } | 345 | } |
354 | 346 | ||
355 | static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, | 347 | static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, |
@@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
395 | spte = sp->spt + i; | 387 | spte = sp->spt + i; |
396 | 388 | ||
397 | for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { | 389 | for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { |
398 | pt_element_t gpte; | ||
399 | unsigned pte_access; | ||
400 | gfn_t gfn; | ||
401 | pfn_t pfn; | ||
402 | |||
403 | if (spte == sptep) | 390 | if (spte == sptep) |
404 | continue; | 391 | continue; |
405 | 392 | ||
406 | if (is_shadow_present_pte(*spte)) | 393 | if (is_shadow_present_pte(*spte)) |
407 | continue; | 394 | continue; |
408 | 395 | ||
409 | gpte = gptep[i]; | 396 | if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true)) |
410 | |||
411 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) | ||
412 | continue; | ||
413 | |||
414 | pte_access = sp->role.access & gpte_access(vcpu, gpte); | ||
415 | protect_clean_gpte(&pte_access, gpte); | ||
416 | gfn = gpte_to_gfn(gpte); | ||
417 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | ||
418 | pte_access & ACC_WRITE_MASK); | ||
419 | if (mmu_invalid_pfn(pfn)) | ||
420 | break; | 397 | break; |
421 | |||
422 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | ||
423 | NULL, PT_PAGE_TABLE_LEVEL, gfn, | ||
424 | pfn, true, true); | ||
425 | } | 398 | } |
426 | } | 399 | } |
427 | 400 | ||
428 | /* | 401 | /* |
429 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 402 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
403 | * If the guest tries to write a write-protected page, we need to | ||
404 | * emulate this operation, return 1 to indicate this case. | ||
430 | */ | 405 | */ |
431 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 406 | static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
432 | struct guest_walker *gw, | 407 | struct guest_walker *gw, |
433 | int user_fault, int write_fault, int hlevel, | 408 | int user_fault, int write_fault, int hlevel, |
434 | int *emulate, pfn_t pfn, bool map_writable, | 409 | pfn_t pfn, bool map_writable, bool prefault) |
435 | bool prefault) | ||
436 | { | 410 | { |
437 | unsigned access = gw->pt_access; | ||
438 | struct kvm_mmu_page *sp = NULL; | 411 | struct kvm_mmu_page *sp = NULL; |
439 | int top_level; | ||
440 | unsigned direct_access; | ||
441 | struct kvm_shadow_walk_iterator it; | 412 | struct kvm_shadow_walk_iterator it; |
413 | unsigned direct_access, access = gw->pt_access; | ||
414 | int top_level, emulate = 0; | ||
442 | 415 | ||
443 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | 416 | if (!is_present_gpte(gw->ptes[gw->level - 1])) |
444 | return NULL; | 417 | return 0; |
445 | 418 | ||
446 | direct_access = gw->pte_access; | 419 | direct_access = gw->pte_access; |
447 | 420 | ||
@@ -505,17 +478,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
505 | 478 | ||
506 | clear_sp_write_flooding_count(it.sptep); | 479 | clear_sp_write_flooding_count(it.sptep); |
507 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, | 480 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, |
508 | user_fault, write_fault, emulate, it.level, | 481 | user_fault, write_fault, &emulate, it.level, |
509 | gw->gfn, pfn, prefault, map_writable); | 482 | gw->gfn, pfn, prefault, map_writable); |
510 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); | 483 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); |
511 | 484 | ||
512 | return it.sptep; | 485 | return emulate; |
513 | 486 | ||
514 | out_gpte_changed: | 487 | out_gpte_changed: |
515 | if (sp) | 488 | if (sp) |
516 | kvm_mmu_put_page(sp, it.sptep); | 489 | kvm_mmu_put_page(sp, it.sptep); |
517 | kvm_release_pfn_clean(pfn); | 490 | kvm_release_pfn_clean(pfn); |
518 | return NULL; | 491 | return 0; |
519 | } | 492 | } |
520 | 493 | ||
521 | /* | 494 | /* |
@@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
538 | int write_fault = error_code & PFERR_WRITE_MASK; | 511 | int write_fault = error_code & PFERR_WRITE_MASK; |
539 | int user_fault = error_code & PFERR_USER_MASK; | 512 | int user_fault = error_code & PFERR_USER_MASK; |
540 | struct guest_walker walker; | 513 | struct guest_walker walker; |
541 | u64 *sptep; | ||
542 | int emulate = 0; | ||
543 | int r; | 514 | int r; |
544 | pfn_t pfn; | 515 | pfn_t pfn; |
545 | int level = PT_PAGE_TABLE_LEVEL; | 516 | int level = PT_PAGE_TABLE_LEVEL; |
@@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
594 | return r; | 565 | return r; |
595 | 566 | ||
596 | spin_lock(&vcpu->kvm->mmu_lock); | 567 | spin_lock(&vcpu->kvm->mmu_lock); |
597 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 568 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
598 | goto out_unlock; | 569 | goto out_unlock; |
599 | 570 | ||
600 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 571 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
601 | kvm_mmu_free_some_pages(vcpu); | 572 | kvm_mmu_free_some_pages(vcpu); |
602 | if (!force_pt_level) | 573 | if (!force_pt_level) |
603 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 574 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
604 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 575 | r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
605 | level, &emulate, pfn, map_writable, prefault); | 576 | level, pfn, map_writable, prefault); |
606 | (void)sptep; | ||
607 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, | ||
608 | sptep, *sptep, emulate); | ||
609 | |||
610 | ++vcpu->stat.pf_fixed; | 577 | ++vcpu->stat.pf_fixed; |
611 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 578 | kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
612 | spin_unlock(&vcpu->kvm->mmu_lock); | 579 | spin_unlock(&vcpu->kvm->mmu_lock); |
613 | 580 | ||
614 | return emulate; | 581 | return r; |
615 | 582 | ||
616 | out_unlock: | 583 | out_unlock: |
617 | spin_unlock(&vcpu->kvm->mmu_lock); | 584 | spin_unlock(&vcpu->kvm->mmu_lock); |
@@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
757 | sizeof(pt_element_t))) | 724 | sizeof(pt_element_t))) |
758 | return -EINVAL; | 725 | return -EINVAL; |
759 | 726 | ||
760 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { | 727 | if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) { |
761 | vcpu->kvm->tlbs_dirty++; | 728 | vcpu->kvm->tlbs_dirty++; |
762 | continue; | 729 | continue; |
763 | } | 730 | } |