diff options
Diffstat (limited to 'arch/powerpc/kvm/book3s_64_mmu_hv.c')
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 290 |
1 files changed, 266 insertions, 24 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d31519b8637..83761dd8a924 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -281,8 +281,9 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, | |||
281 | } | 281 | } |
282 | 282 | ||
283 | /* | 283 | /* |
284 | * We come here on a H_ENTER call from the guest when | 284 | * We come here on a H_ENTER call from the guest when we are not |
285 | * we don't have the requested page pinned already. | 285 | * using mmu notifiers and we don't have the requested page pinned |
286 | * already. | ||
286 | */ | 287 | */ |
287 | long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 288 | long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
288 | long pte_index, unsigned long pteh, unsigned long ptel) | 289 | long pte_index, unsigned long pteh, unsigned long ptel) |
@@ -292,6 +293,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
292 | struct kvm_memory_slot *memslot; | 293 | struct kvm_memory_slot *memslot; |
293 | long ret; | 294 | long ret; |
294 | 295 | ||
296 | if (kvm->arch.using_mmu_notifiers) | ||
297 | goto do_insert; | ||
298 | |||
295 | psize = hpte_page_size(pteh, ptel); | 299 | psize = hpte_page_size(pteh, ptel); |
296 | if (!psize) | 300 | if (!psize) |
297 | return H_PARAMETER; | 301 | return H_PARAMETER; |
@@ -309,9 +313,12 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
309 | return H_PARAMETER; | 313 | return H_PARAMETER; |
310 | } | 314 | } |
311 | 315 | ||
312 | preempt_disable(); | 316 | do_insert: |
317 | /* Protect linux PTE lookup from page table destruction */ | ||
318 | rcu_read_lock_sched(); /* this disables preemption too */ | ||
319 | vcpu->arch.pgdir = current->mm->pgd; | ||
313 | ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); | 320 | ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); |
314 | preempt_enable(); | 321 | rcu_read_unlock_sched(); |
315 | if (ret == H_TOO_HARD) { | 322 | if (ret == H_TOO_HARD) { |
316 | /* this can't happen */ | 323 | /* this can't happen */ |
317 | pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); | 324 | pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); |
@@ -487,12 +494,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
487 | unsigned long ea, unsigned long dsisr) | 494 | unsigned long ea, unsigned long dsisr) |
488 | { | 495 | { |
489 | struct kvm *kvm = vcpu->kvm; | 496 | struct kvm *kvm = vcpu->kvm; |
490 | unsigned long *hptep, hpte[3]; | 497 | unsigned long *hptep, hpte[3], r; |
491 | unsigned long psize; | 498 | unsigned long mmu_seq, psize, pte_size; |
492 | unsigned long gfn; | 499 | unsigned long gfn, hva, pfn; |
493 | struct kvm_memory_slot *memslot; | 500 | struct kvm_memory_slot *memslot; |
501 | unsigned long *rmap; | ||
494 | struct revmap_entry *rev; | 502 | struct revmap_entry *rev; |
495 | long index; | 503 | struct page *page, *pages[1]; |
504 | long index, ret, npages; | ||
505 | unsigned long is_io; | ||
506 | struct vm_area_struct *vma; | ||
496 | 507 | ||
497 | /* | 508 | /* |
498 | * Real-mode code has already searched the HPT and found the | 509 | * Real-mode code has already searched the HPT and found the |
@@ -510,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
510 | cpu_relax(); | 521 | cpu_relax(); |
511 | hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; | 522 | hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; |
512 | hpte[1] = hptep[1]; | 523 | hpte[1] = hptep[1]; |
513 | hpte[2] = rev->guest_rpte; | 524 | hpte[2] = r = rev->guest_rpte; |
514 | asm volatile("lwsync" : : : "memory"); | 525 | asm volatile("lwsync" : : : "memory"); |
515 | hptep[0] = hpte[0]; | 526 | hptep[0] = hpte[0]; |
516 | preempt_enable(); | 527 | preempt_enable(); |
@@ -520,8 +531,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
520 | return RESUME_GUEST; | 531 | return RESUME_GUEST; |
521 | 532 | ||
522 | /* Translate the logical address and get the page */ | 533 | /* Translate the logical address and get the page */ |
523 | psize = hpte_page_size(hpte[0], hpte[1]); | 534 | psize = hpte_page_size(hpte[0], r); |
524 | gfn = hpte_rpn(hpte[2], psize); | 535 | gfn = hpte_rpn(r, psize); |
525 | memslot = gfn_to_memslot(kvm, gfn); | 536 | memslot = gfn_to_memslot(kvm, gfn); |
526 | 537 | ||
527 | /* No memslot means it's an emulated MMIO region */ | 538 | /* No memslot means it's an emulated MMIO region */ |
@@ -531,8 +542,228 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
531 | dsisr & DSISR_ISSTORE); | 542 | dsisr & DSISR_ISSTORE); |
532 | } | 543 | } |
533 | 544 | ||
534 | /* should never get here otherwise */ | 545 | if (!kvm->arch.using_mmu_notifiers) |
535 | return -EFAULT; | 546 | return -EFAULT; /* should never get here */ |
547 | |||
548 | /* used to check for invalidations in progress */ | ||
549 | mmu_seq = kvm->mmu_notifier_seq; | ||
550 | smp_rmb(); | ||
551 | |||
552 | is_io = 0; | ||
553 | pfn = 0; | ||
554 | page = NULL; | ||
555 | pte_size = PAGE_SIZE; | ||
556 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
557 | npages = get_user_pages_fast(hva, 1, 1, pages); | ||
558 | if (npages < 1) { | ||
559 | /* Check if it's an I/O mapping */ | ||
560 | down_read(¤t->mm->mmap_sem); | ||
561 | vma = find_vma(current->mm, hva); | ||
562 | if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && | ||
563 | (vma->vm_flags & VM_PFNMAP)) { | ||
564 | pfn = vma->vm_pgoff + | ||
565 | ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
566 | pte_size = psize; | ||
567 | is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); | ||
568 | } | ||
569 | up_read(¤t->mm->mmap_sem); | ||
570 | if (!pfn) | ||
571 | return -EFAULT; | ||
572 | } else { | ||
573 | page = pages[0]; | ||
574 | if (PageHuge(page)) { | ||
575 | page = compound_head(page); | ||
576 | pte_size <<= compound_order(page); | ||
577 | } | ||
578 | pfn = page_to_pfn(page); | ||
579 | } | ||
580 | |||
581 | ret = -EFAULT; | ||
582 | if (psize > pte_size) | ||
583 | goto out_put; | ||
584 | |||
585 | /* Check WIMG vs. the actual page we're accessing */ | ||
586 | if (!hpte_cache_flags_ok(r, is_io)) { | ||
587 | if (is_io) | ||
588 | return -EFAULT; | ||
589 | /* | ||
590 | * Allow guest to map emulated device memory as | ||
591 | * uncacheable, but actually make it cacheable. | ||
592 | */ | ||
593 | r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; | ||
594 | } | ||
595 | |||
596 | /* Set the HPTE to point to pfn */ | ||
597 | r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); | ||
598 | ret = RESUME_GUEST; | ||
599 | preempt_disable(); | ||
600 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
601 | cpu_relax(); | ||
602 | if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || | ||
603 | rev->guest_rpte != hpte[2]) | ||
604 | /* HPTE has been changed under us; let the guest retry */ | ||
605 | goto out_unlock; | ||
606 | hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; | ||
607 | |||
608 | rmap = &memslot->rmap[gfn - memslot->base_gfn]; | ||
609 | lock_rmap(rmap); | ||
610 | |||
611 | /* Check if we might have been invalidated; let the guest retry if so */ | ||
612 | ret = RESUME_GUEST; | ||
613 | if (mmu_notifier_retry(vcpu, mmu_seq)) { | ||
614 | unlock_rmap(rmap); | ||
615 | goto out_unlock; | ||
616 | } | ||
617 | kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); | ||
618 | |||
619 | hptep[1] = r; | ||
620 | eieio(); | ||
621 | hptep[0] = hpte[0]; | ||
622 | asm volatile("ptesync" : : : "memory"); | ||
623 | preempt_enable(); | ||
624 | if (page) | ||
625 | SetPageDirty(page); | ||
626 | |||
627 | out_put: | ||
628 | if (page) | ||
629 | put_page(page); | ||
630 | return ret; | ||
631 | |||
632 | out_unlock: | ||
633 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
634 | preempt_enable(); | ||
635 | goto out_put; | ||
636 | } | ||
637 | |||
638 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
639 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
640 | unsigned long gfn)) | ||
641 | { | ||
642 | int ret; | ||
643 | int retval = 0; | ||
644 | struct kvm_memslots *slots; | ||
645 | struct kvm_memory_slot *memslot; | ||
646 | |||
647 | slots = kvm_memslots(kvm); | ||
648 | kvm_for_each_memslot(memslot, slots) { | ||
649 | unsigned long start = memslot->userspace_addr; | ||
650 | unsigned long end; | ||
651 | |||
652 | end = start + (memslot->npages << PAGE_SHIFT); | ||
653 | if (hva >= start && hva < end) { | ||
654 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | ||
655 | |||
656 | ret = handler(kvm, &memslot->rmap[gfn_offset], | ||
657 | memslot->base_gfn + gfn_offset); | ||
658 | retval |= ret; | ||
659 | } | ||
660 | } | ||
661 | |||
662 | return retval; | ||
663 | } | ||
664 | |||
665 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
666 | unsigned long gfn) | ||
667 | { | ||
668 | struct revmap_entry *rev = kvm->arch.revmap; | ||
669 | unsigned long h, i, j; | ||
670 | unsigned long *hptep; | ||
671 | unsigned long ptel, psize; | ||
672 | |||
673 | for (;;) { | ||
674 | while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) | ||
675 | cpu_relax(); | ||
676 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { | ||
677 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); | ||
678 | break; | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * To avoid an ABBA deadlock with the HPTE lock bit, | ||
683 | * we have to unlock the rmap chain before locking the HPTE. | ||
684 | * Thus we remove the first entry, unlock the rmap chain, | ||
685 | * lock the HPTE and then check that it is for the | ||
686 | * page we're unmapping before changing it to non-present. | ||
687 | */ | ||
688 | i = *rmapp & KVMPPC_RMAP_INDEX; | ||
689 | j = rev[i].forw; | ||
690 | if (j == i) { | ||
691 | /* chain is now empty */ | ||
692 | j = 0; | ||
693 | } else { | ||
694 | /* remove i from chain */ | ||
695 | h = rev[i].back; | ||
696 | rev[h].forw = j; | ||
697 | rev[j].back = h; | ||
698 | rev[i].forw = rev[i].back = i; | ||
699 | j |= KVMPPC_RMAP_PRESENT; | ||
700 | } | ||
701 | smp_wmb(); | ||
702 | *rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT); | ||
703 | |||
704 | /* Now lock, check and modify the HPTE */ | ||
705 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | ||
706 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
707 | cpu_relax(); | ||
708 | ptel = rev[i].guest_rpte; | ||
709 | psize = hpte_page_size(hptep[0], ptel); | ||
710 | if ((hptep[0] & HPTE_V_VALID) && | ||
711 | hpte_rpn(ptel, psize) == gfn) { | ||
712 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
713 | hptep[0] |= HPTE_V_ABSENT; | ||
714 | } | ||
715 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
716 | } | ||
717 | return 0; | ||
718 | } | ||
719 | |||
720 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
721 | { | ||
722 | if (kvm->arch.using_mmu_notifiers) | ||
723 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
728 | unsigned long gfn) | ||
729 | { | ||
730 | if (!kvm->arch.using_mmu_notifiers) | ||
731 | return 0; | ||
732 | if (!(*rmapp & KVMPPC_RMAP_REFERENCED)) | ||
733 | return 0; | ||
734 | kvm_unmap_rmapp(kvm, rmapp, gfn); | ||
735 | while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) | ||
736 | cpu_relax(); | ||
737 | __clear_bit(KVMPPC_RMAP_REF_BIT, rmapp); | ||
738 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); | ||
739 | return 1; | ||
740 | } | ||
741 | |||
742 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
743 | { | ||
744 | if (!kvm->arch.using_mmu_notifiers) | ||
745 | return 0; | ||
746 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | ||
747 | } | ||
748 | |||
749 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
750 | unsigned long gfn) | ||
751 | { | ||
752 | return !!(*rmapp & KVMPPC_RMAP_REFERENCED); | ||
753 | } | ||
754 | |||
755 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
756 | { | ||
757 | if (!kvm->arch.using_mmu_notifiers) | ||
758 | return 0; | ||
759 | return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); | ||
760 | } | ||
761 | |||
762 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | ||
763 | { | ||
764 | if (!kvm->arch.using_mmu_notifiers) | ||
765 | return; | ||
766 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
536 | } | 767 | } |
537 | 768 | ||
538 | void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | 769 | void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, |
@@ -540,31 +771,42 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
540 | { | 771 | { |
541 | struct kvm_memory_slot *memslot; | 772 | struct kvm_memory_slot *memslot; |
542 | unsigned long gfn = gpa >> PAGE_SHIFT; | 773 | unsigned long gfn = gpa >> PAGE_SHIFT; |
543 | struct page *page; | 774 | struct page *page, *pages[1]; |
544 | unsigned long psize, offset; | 775 | int npages; |
776 | unsigned long hva, psize, offset; | ||
545 | unsigned long pa; | 777 | unsigned long pa; |
546 | unsigned long *physp; | 778 | unsigned long *physp; |
547 | 779 | ||
548 | memslot = gfn_to_memslot(kvm, gfn); | 780 | memslot = gfn_to_memslot(kvm, gfn); |
549 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | 781 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) |
550 | return NULL; | 782 | return NULL; |
551 | physp = kvm->arch.slot_phys[memslot->id]; | 783 | if (!kvm->arch.using_mmu_notifiers) { |
552 | if (!physp) | 784 | physp = kvm->arch.slot_phys[memslot->id]; |
553 | return NULL; | 785 | if (!physp) |
554 | physp += gfn - memslot->base_gfn; | ||
555 | pa = *physp; | ||
556 | if (!pa) { | ||
557 | if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) | ||
558 | return NULL; | 786 | return NULL; |
787 | physp += gfn - memslot->base_gfn; | ||
559 | pa = *physp; | 788 | pa = *physp; |
789 | if (!pa) { | ||
790 | if (kvmppc_get_guest_page(kvm, gfn, memslot, | ||
791 | PAGE_SIZE) < 0) | ||
792 | return NULL; | ||
793 | pa = *physp; | ||
794 | } | ||
795 | page = pfn_to_page(pa >> PAGE_SHIFT); | ||
796 | } else { | ||
797 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
798 | npages = get_user_pages_fast(hva, 1, 1, pages); | ||
799 | if (npages < 1) | ||
800 | return NULL; | ||
801 | page = pages[0]; | ||
560 | } | 802 | } |
561 | page = pfn_to_page(pa >> PAGE_SHIFT); | ||
562 | psize = PAGE_SIZE; | 803 | psize = PAGE_SIZE; |
563 | if (PageHuge(page)) { | 804 | if (PageHuge(page)) { |
564 | page = compound_head(page); | 805 | page = compound_head(page); |
565 | psize <<= compound_order(page); | 806 | psize <<= compound_order(page); |
566 | } | 807 | } |
567 | get_page(page); | 808 | if (!kvm->arch.using_mmu_notifiers) |
809 | get_page(page); | ||
568 | offset = gpa & (psize - 1); | 810 | offset = gpa & (psize - 1); |
569 | if (nb_ret) | 811 | if (nb_ret) |
570 | *nb_ret = psize - offset; | 812 | *nb_ret = psize - offset; |