aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_64_mmu_hv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_64_mmu_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c290
1 files changed, 266 insertions, 24 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d31519b8637..83761dd8a924 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -281,8 +281,9 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
281} 281}
282 282
283/* 283/*
284 * We come here on a H_ENTER call from the guest when 284 * We come here on a H_ENTER call from the guest when we are not
285 * we don't have the requested page pinned already. 285 * using mmu notifiers and we don't have the requested page pinned
286 * already.
286 */ 287 */
287long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 288long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
288 long pte_index, unsigned long pteh, unsigned long ptel) 289 long pte_index, unsigned long pteh, unsigned long ptel)
@@ -292,6 +293,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
292 struct kvm_memory_slot *memslot; 293 struct kvm_memory_slot *memslot;
293 long ret; 294 long ret;
294 295
296 if (kvm->arch.using_mmu_notifiers)
297 goto do_insert;
298
295 psize = hpte_page_size(pteh, ptel); 299 psize = hpte_page_size(pteh, ptel);
296 if (!psize) 300 if (!psize)
297 return H_PARAMETER; 301 return H_PARAMETER;
@@ -309,9 +313,12 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
309 return H_PARAMETER; 313 return H_PARAMETER;
310 } 314 }
311 315
312 preempt_disable(); 316 do_insert:
317 /* Protect linux PTE lookup from page table destruction */
318 rcu_read_lock_sched(); /* this disables preemption too */
319 vcpu->arch.pgdir = current->mm->pgd;
313 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); 320 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
314 preempt_enable(); 321 rcu_read_unlock_sched();
315 if (ret == H_TOO_HARD) { 322 if (ret == H_TOO_HARD) {
316 /* this can't happen */ 323 /* this can't happen */
317 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); 324 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
@@ -487,12 +494,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
487 unsigned long ea, unsigned long dsisr) 494 unsigned long ea, unsigned long dsisr)
488{ 495{
489 struct kvm *kvm = vcpu->kvm; 496 struct kvm *kvm = vcpu->kvm;
490 unsigned long *hptep, hpte[3]; 497 unsigned long *hptep, hpte[3], r;
491 unsigned long psize; 498 unsigned long mmu_seq, psize, pte_size;
492 unsigned long gfn; 499 unsigned long gfn, hva, pfn;
493 struct kvm_memory_slot *memslot; 500 struct kvm_memory_slot *memslot;
501 unsigned long *rmap;
494 struct revmap_entry *rev; 502 struct revmap_entry *rev;
495 long index; 503 struct page *page, *pages[1];
504 long index, ret, npages;
505 unsigned long is_io;
506 struct vm_area_struct *vma;
496 507
497 /* 508 /*
498 * Real-mode code has already searched the HPT and found the 509 * Real-mode code has already searched the HPT and found the
@@ -510,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
510 cpu_relax(); 521 cpu_relax();
511 hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; 522 hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
512 hpte[1] = hptep[1]; 523 hpte[1] = hptep[1];
513 hpte[2] = rev->guest_rpte; 524 hpte[2] = r = rev->guest_rpte;
514 asm volatile("lwsync" : : : "memory"); 525 asm volatile("lwsync" : : : "memory");
515 hptep[0] = hpte[0]; 526 hptep[0] = hpte[0];
516 preempt_enable(); 527 preempt_enable();
@@ -520,8 +531,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
520 return RESUME_GUEST; 531 return RESUME_GUEST;
521 532
522 /* Translate the logical address and get the page */ 533 /* Translate the logical address and get the page */
523 psize = hpte_page_size(hpte[0], hpte[1]); 534 psize = hpte_page_size(hpte[0], r);
524 gfn = hpte_rpn(hpte[2], psize); 535 gfn = hpte_rpn(r, psize);
525 memslot = gfn_to_memslot(kvm, gfn); 536 memslot = gfn_to_memslot(kvm, gfn);
526 537
527 /* No memslot means it's an emulated MMIO region */ 538 /* No memslot means it's an emulated MMIO region */
@@ -531,8 +542,228 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
531 dsisr & DSISR_ISSTORE); 542 dsisr & DSISR_ISSTORE);
532 } 543 }
533 544
534 /* should never get here otherwise */ 545 if (!kvm->arch.using_mmu_notifiers)
535 return -EFAULT; 546 return -EFAULT; /* should never get here */
547
548 /* used to check for invalidations in progress */
549 mmu_seq = kvm->mmu_notifier_seq;
550 smp_rmb();
551
552 is_io = 0;
553 pfn = 0;
554 page = NULL;
555 pte_size = PAGE_SIZE;
556 hva = gfn_to_hva_memslot(memslot, gfn);
557 npages = get_user_pages_fast(hva, 1, 1, pages);
558 if (npages < 1) {
559 /* Check if it's an I/O mapping */
560 down_read(&current->mm->mmap_sem);
561 vma = find_vma(current->mm, hva);
562 if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
563 (vma->vm_flags & VM_PFNMAP)) {
564 pfn = vma->vm_pgoff +
565 ((hva - vma->vm_start) >> PAGE_SHIFT);
566 pte_size = psize;
567 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
568 }
569 up_read(&current->mm->mmap_sem);
570 if (!pfn)
571 return -EFAULT;
572 } else {
573 page = pages[0];
574 if (PageHuge(page)) {
575 page = compound_head(page);
576 pte_size <<= compound_order(page);
577 }
578 pfn = page_to_pfn(page);
579 }
580
581 ret = -EFAULT;
582 if (psize > pte_size)
583 goto out_put;
584
585 /* Check WIMG vs. the actual page we're accessing */
586 if (!hpte_cache_flags_ok(r, is_io)) {
587 if (is_io)
588 return -EFAULT;
589 /*
590 * Allow guest to map emulated device memory as
591 * uncacheable, but actually make it cacheable.
592 */
593 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
594 }
595
596 /* Set the HPTE to point to pfn */
597 r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
598 ret = RESUME_GUEST;
599 preempt_disable();
600 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
601 cpu_relax();
602 if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
603 rev->guest_rpte != hpte[2])
604 /* HPTE has been changed under us; let the guest retry */
605 goto out_unlock;
606 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
607
608 rmap = &memslot->rmap[gfn - memslot->base_gfn];
609 lock_rmap(rmap);
610
611 /* Check if we might have been invalidated; let the guest retry if so */
612 ret = RESUME_GUEST;
613 if (mmu_notifier_retry(vcpu, mmu_seq)) {
614 unlock_rmap(rmap);
615 goto out_unlock;
616 }
617 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
618
619 hptep[1] = r;
620 eieio();
621 hptep[0] = hpte[0];
622 asm volatile("ptesync" : : : "memory");
623 preempt_enable();
624 if (page)
625 SetPageDirty(page);
626
627 out_put:
628 if (page)
629 put_page(page);
630 return ret;
631
632 out_unlock:
633 hptep[0] &= ~HPTE_V_HVLOCK;
634 preempt_enable();
635 goto out_put;
636}
637
638static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
639 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
640 unsigned long gfn))
641{
642 int ret;
643 int retval = 0;
644 struct kvm_memslots *slots;
645 struct kvm_memory_slot *memslot;
646
647 slots = kvm_memslots(kvm);
648 kvm_for_each_memslot(memslot, slots) {
649 unsigned long start = memslot->userspace_addr;
650 unsigned long end;
651
652 end = start + (memslot->npages << PAGE_SHIFT);
653 if (hva >= start && hva < end) {
654 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
655
656 ret = handler(kvm, &memslot->rmap[gfn_offset],
657 memslot->base_gfn + gfn_offset);
658 retval |= ret;
659 }
660 }
661
662 return retval;
663}
664
665static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
666 unsigned long gfn)
667{
668 struct revmap_entry *rev = kvm->arch.revmap;
669 unsigned long h, i, j;
670 unsigned long *hptep;
671 unsigned long ptel, psize;
672
673 for (;;) {
674 while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
675 cpu_relax();
676 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
677 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
678 break;
679 }
680
681 /*
682 * To avoid an ABBA deadlock with the HPTE lock bit,
683 * we have to unlock the rmap chain before locking the HPTE.
684 * Thus we remove the first entry, unlock the rmap chain,
685 * lock the HPTE and then check that it is for the
686 * page we're unmapping before changing it to non-present.
687 */
688 i = *rmapp & KVMPPC_RMAP_INDEX;
689 j = rev[i].forw;
690 if (j == i) {
691 /* chain is now empty */
692 j = 0;
693 } else {
694 /* remove i from chain */
695 h = rev[i].back;
696 rev[h].forw = j;
697 rev[j].back = h;
698 rev[i].forw = rev[i].back = i;
699 j |= KVMPPC_RMAP_PRESENT;
700 }
701 smp_wmb();
702 *rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT);
703
704 /* Now lock, check and modify the HPTE */
705 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
706 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
707 cpu_relax();
708 ptel = rev[i].guest_rpte;
709 psize = hpte_page_size(hptep[0], ptel);
710 if ((hptep[0] & HPTE_V_VALID) &&
711 hpte_rpn(ptel, psize) == gfn) {
712 kvmppc_invalidate_hpte(kvm, hptep, i);
713 hptep[0] |= HPTE_V_ABSENT;
714 }
715 hptep[0] &= ~HPTE_V_HVLOCK;
716 }
717 return 0;
718}
719
720int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
721{
722 if (kvm->arch.using_mmu_notifiers)
723 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
724 return 0;
725}
726
727static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
728 unsigned long gfn)
729{
730 if (!kvm->arch.using_mmu_notifiers)
731 return 0;
732 if (!(*rmapp & KVMPPC_RMAP_REFERENCED))
733 return 0;
734 kvm_unmap_rmapp(kvm, rmapp, gfn);
735 while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
736 cpu_relax();
737 __clear_bit(KVMPPC_RMAP_REF_BIT, rmapp);
738 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
739 return 1;
740}
741
742int kvm_age_hva(struct kvm *kvm, unsigned long hva)
743{
744 if (!kvm->arch.using_mmu_notifiers)
745 return 0;
746 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
747}
748
749static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
750 unsigned long gfn)
751{
752 return !!(*rmapp & KVMPPC_RMAP_REFERENCED);
753}
754
755int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
756{
757 if (!kvm->arch.using_mmu_notifiers)
758 return 0;
759 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
760}
761
762void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
763{
764 if (!kvm->arch.using_mmu_notifiers)
765 return;
766 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
536} 767}
537 768
538void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, 769void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
@@ -540,31 +771,42 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
540{ 771{
541 struct kvm_memory_slot *memslot; 772 struct kvm_memory_slot *memslot;
542 unsigned long gfn = gpa >> PAGE_SHIFT; 773 unsigned long gfn = gpa >> PAGE_SHIFT;
543 struct page *page; 774 struct page *page, *pages[1];
544 unsigned long psize, offset; 775 int npages;
776 unsigned long hva, psize, offset;
545 unsigned long pa; 777 unsigned long pa;
546 unsigned long *physp; 778 unsigned long *physp;
547 779
548 memslot = gfn_to_memslot(kvm, gfn); 780 memslot = gfn_to_memslot(kvm, gfn);
549 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 781 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
550 return NULL; 782 return NULL;
551 physp = kvm->arch.slot_phys[memslot->id]; 783 if (!kvm->arch.using_mmu_notifiers) {
552 if (!physp) 784 physp = kvm->arch.slot_phys[memslot->id];
553 return NULL; 785 if (!physp)
554 physp += gfn - memslot->base_gfn;
555 pa = *physp;
556 if (!pa) {
557 if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0)
558 return NULL; 786 return NULL;
787 physp += gfn - memslot->base_gfn;
559 pa = *physp; 788 pa = *physp;
789 if (!pa) {
790 if (kvmppc_get_guest_page(kvm, gfn, memslot,
791 PAGE_SIZE) < 0)
792 return NULL;
793 pa = *physp;
794 }
795 page = pfn_to_page(pa >> PAGE_SHIFT);
796 } else {
797 hva = gfn_to_hva_memslot(memslot, gfn);
798 npages = get_user_pages_fast(hva, 1, 1, pages);
799 if (npages < 1)
800 return NULL;
801 page = pages[0];
560 } 802 }
561 page = pfn_to_page(pa >> PAGE_SHIFT);
562 psize = PAGE_SIZE; 803 psize = PAGE_SIZE;
563 if (PageHuge(page)) { 804 if (PageHuge(page)) {
564 page = compound_head(page); 805 page = compound_head(page);
565 psize <<= compound_order(page); 806 psize <<= compound_order(page);
566 } 807 }
567 get_page(page); 808 if (!kvm->arch.using_mmu_notifiers)
809 get_page(page);
568 offset = gpa & (psize - 1); 810 offset = gpa & (psize - 1);
569 if (nb_ret) 811 if (nb_ret)
570 *nb_ret = psize - offset; 812 *nb_ret = psize - offset;