diff options
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s.h | 4 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 31 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 16 | ||||
-rw-r--r-- | arch/powerpc/include/asm/reg.h | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 290 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 25 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 140 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 49 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 2 |
11 files changed, 499 insertions, 65 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3a9e51f43397..9240cebf8bad 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -143,6 +143,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, | |||
143 | extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | 143 | extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); |
144 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); | 144 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); |
145 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); | 145 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); |
146 | extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | ||
147 | unsigned long *rmap, long pte_index, int realmode); | ||
148 | extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, | ||
149 | unsigned long pte_index); | ||
146 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, | 150 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, |
147 | unsigned long *nb_ret); | 151 | unsigned long *nb_ret); |
148 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); | 152 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 79dc37fb86b5..c21e46da4a3b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -136,6 +136,37 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) | |||
136 | return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type; | 136 | return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type; |
137 | } | 137 | } |
138 | 138 | ||
139 | /* | ||
140 | * Lock and read a linux PTE. If it's present and writable, atomically | ||
141 | * set dirty and referenced bits and return the PTE, otherwise return 0. | ||
142 | */ | ||
143 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *p) | ||
144 | { | ||
145 | pte_t pte, tmp; | ||
146 | |||
147 | /* wait until _PAGE_BUSY is clear then set it atomically */ | ||
148 | __asm__ __volatile__ ( | ||
149 | "1: ldarx %0,0,%3\n" | ||
150 | " andi. %1,%0,%4\n" | ||
151 | " bne- 1b\n" | ||
152 | " ori %1,%0,%4\n" | ||
153 | " stdcx. %1,0,%3\n" | ||
154 | " bne- 1b" | ||
155 | : "=&r" (pte), "=&r" (tmp), "=m" (*p) | ||
156 | : "r" (p), "i" (_PAGE_BUSY) | ||
157 | : "cc"); | ||
158 | |||
159 | if (pte_present(pte)) { | ||
160 | pte = pte_mkyoung(pte); | ||
161 | if (pte_write(pte)) | ||
162 | pte = pte_mkdirty(pte); | ||
163 | } | ||
164 | |||
165 | *p = pte; /* clears _PAGE_BUSY */ | ||
166 | |||
167 | return pte; | ||
168 | } | ||
169 | |||
139 | /* Return HPTE cache control bits corresponding to Linux pte bits */ | 170 | /* Return HPTE cache control bits corresponding to Linux pte bits */ |
140 | static inline unsigned long hpte_cache_bits(unsigned long pte_val) | 171 | static inline unsigned long hpte_cache_bits(unsigned long pte_val) |
141 | { | 172 | { |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 937cacaaf236..968f3aa61cd1 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/atomic.h> | 32 | #include <linux/atomic.h> |
33 | #include <asm/kvm_asm.h> | 33 | #include <asm/kvm_asm.h> |
34 | #include <asm/processor.h> | 34 | #include <asm/processor.h> |
35 | #include <asm/page.h> | ||
35 | 36 | ||
36 | #define KVM_MAX_VCPUS NR_CPUS | 37 | #define KVM_MAX_VCPUS NR_CPUS |
37 | #define KVM_MAX_VCORES NR_CPUS | 38 | #define KVM_MAX_VCORES NR_CPUS |
@@ -44,6 +45,19 @@ | |||
44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 45 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
45 | #endif | 46 | #endif |
46 | 47 | ||
48 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
49 | #include <linux/mmu_notifier.h> | ||
50 | |||
51 | #define KVM_ARCH_WANT_MMU_NOTIFIER | ||
52 | |||
53 | struct kvm; | ||
54 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | ||
55 | extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); | ||
56 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | ||
57 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | ||
58 | |||
59 | #endif | ||
60 | |||
47 | /* We don't currently support large pages. */ | 61 | /* We don't currently support large pages. */ |
48 | #define KVM_HPAGE_GFN_SHIFT(x) 0 | 62 | #define KVM_HPAGE_GFN_SHIFT(x) 0 |
49 | #define KVM_NR_PAGE_SIZES 1 | 63 | #define KVM_NR_PAGE_SIZES 1 |
@@ -212,6 +226,7 @@ struct kvm_arch { | |||
212 | struct kvmppc_rma_info *rma; | 226 | struct kvmppc_rma_info *rma; |
213 | unsigned long vrma_slb_v; | 227 | unsigned long vrma_slb_v; |
214 | int rma_setup_done; | 228 | int rma_setup_done; |
229 | int using_mmu_notifiers; | ||
215 | struct list_head spapr_tce_tables; | 230 | struct list_head spapr_tce_tables; |
216 | spinlock_t slot_phys_lock; | 231 | spinlock_t slot_phys_lock; |
217 | unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; | 232 | unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; |
@@ -460,6 +475,7 @@ struct kvm_vcpu_arch { | |||
460 | struct list_head run_list; | 475 | struct list_head run_list; |
461 | struct task_struct *run_task; | 476 | struct task_struct *run_task; |
462 | struct kvm_run *kvm_run; | 477 | struct kvm_run *kvm_run; |
478 | pgd_t *pgdir; | ||
463 | #endif | 479 | #endif |
464 | }; | 480 | }; |
465 | 481 | ||
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 16efb3151c20..35c9309bf038 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -495,6 +495,9 @@ | |||
495 | #define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ | 495 | #define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ |
496 | #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ | 496 | #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ |
497 | #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ | 497 | #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ |
498 | #define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */ | ||
499 | #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ | ||
500 | #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ | ||
498 | #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ | 501 | #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ |
499 | #define SRR1_WAKESYSERR 0x00300000 /* System error */ | 502 | #define SRR1_WAKESYSERR 0x00300000 /* System error */ |
500 | #define SRR1_WAKEEE 0x00200000 /* External interrupt */ | 503 | #define SRR1_WAKEEE 0x00200000 /* External interrupt */ |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 78133deb4b64..8f64709ae331 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -69,6 +69,7 @@ config KVM_BOOK3S_64 | |||
69 | config KVM_BOOK3S_64_HV | 69 | config KVM_BOOK3S_64_HV |
70 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" | 70 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" |
71 | depends on KVM_BOOK3S_64 | 71 | depends on KVM_BOOK3S_64 |
72 | select MMU_NOTIFIER | ||
72 | ---help--- | 73 | ---help--- |
73 | Support running unmodified book3s_64 guest kernels in | 74 | Support running unmodified book3s_64 guest kernels in |
74 | virtual machines on POWER7 and PPC970 processors that have | 75 | virtual machines on POWER7 and PPC970 processors that have |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d31519b8637..83761dd8a924 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -281,8 +281,9 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, | |||
281 | } | 281 | } |
282 | 282 | ||
283 | /* | 283 | /* |
284 | * We come here on a H_ENTER call from the guest when | 284 | * We come here on a H_ENTER call from the guest when we are not |
285 | * we don't have the requested page pinned already. | 285 | * using mmu notifiers and we don't have the requested page pinned |
286 | * already. | ||
286 | */ | 287 | */ |
287 | long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 288 | long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
288 | long pte_index, unsigned long pteh, unsigned long ptel) | 289 | long pte_index, unsigned long pteh, unsigned long ptel) |
@@ -292,6 +293,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
292 | struct kvm_memory_slot *memslot; | 293 | struct kvm_memory_slot *memslot; |
293 | long ret; | 294 | long ret; |
294 | 295 | ||
296 | if (kvm->arch.using_mmu_notifiers) | ||
297 | goto do_insert; | ||
298 | |||
295 | psize = hpte_page_size(pteh, ptel); | 299 | psize = hpte_page_size(pteh, ptel); |
296 | if (!psize) | 300 | if (!psize) |
297 | return H_PARAMETER; | 301 | return H_PARAMETER; |
@@ -309,9 +313,12 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
309 | return H_PARAMETER; | 313 | return H_PARAMETER; |
310 | } | 314 | } |
311 | 315 | ||
312 | preempt_disable(); | 316 | do_insert: |
317 | /* Protect linux PTE lookup from page table destruction */ | ||
318 | rcu_read_lock_sched(); /* this disables preemption too */ | ||
319 | vcpu->arch.pgdir = current->mm->pgd; | ||
313 | ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); | 320 | ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); |
314 | preempt_enable(); | 321 | rcu_read_unlock_sched(); |
315 | if (ret == H_TOO_HARD) { | 322 | if (ret == H_TOO_HARD) { |
316 | /* this can't happen */ | 323 | /* this can't happen */ |
317 | pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); | 324 | pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); |
@@ -487,12 +494,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
487 | unsigned long ea, unsigned long dsisr) | 494 | unsigned long ea, unsigned long dsisr) |
488 | { | 495 | { |
489 | struct kvm *kvm = vcpu->kvm; | 496 | struct kvm *kvm = vcpu->kvm; |
490 | unsigned long *hptep, hpte[3]; | 497 | unsigned long *hptep, hpte[3], r; |
491 | unsigned long psize; | 498 | unsigned long mmu_seq, psize, pte_size; |
492 | unsigned long gfn; | 499 | unsigned long gfn, hva, pfn; |
493 | struct kvm_memory_slot *memslot; | 500 | struct kvm_memory_slot *memslot; |
501 | unsigned long *rmap; | ||
494 | struct revmap_entry *rev; | 502 | struct revmap_entry *rev; |
495 | long index; | 503 | struct page *page, *pages[1]; |
504 | long index, ret, npages; | ||
505 | unsigned long is_io; | ||
506 | struct vm_area_struct *vma; | ||
496 | 507 | ||
497 | /* | 508 | /* |
498 | * Real-mode code has already searched the HPT and found the | 509 | * Real-mode code has already searched the HPT and found the |
@@ -510,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
510 | cpu_relax(); | 521 | cpu_relax(); |
511 | hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; | 522 | hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; |
512 | hpte[1] = hptep[1]; | 523 | hpte[1] = hptep[1]; |
513 | hpte[2] = rev->guest_rpte; | 524 | hpte[2] = r = rev->guest_rpte; |
514 | asm volatile("lwsync" : : : "memory"); | 525 | asm volatile("lwsync" : : : "memory"); |
515 | hptep[0] = hpte[0]; | 526 | hptep[0] = hpte[0]; |
516 | preempt_enable(); | 527 | preempt_enable(); |
@@ -520,8 +531,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
520 | return RESUME_GUEST; | 531 | return RESUME_GUEST; |
521 | 532 | ||
522 | /* Translate the logical address and get the page */ | 533 | /* Translate the logical address and get the page */ |
523 | psize = hpte_page_size(hpte[0], hpte[1]); | 534 | psize = hpte_page_size(hpte[0], r); |
524 | gfn = hpte_rpn(hpte[2], psize); | 535 | gfn = hpte_rpn(r, psize); |
525 | memslot = gfn_to_memslot(kvm, gfn); | 536 | memslot = gfn_to_memslot(kvm, gfn); |
526 | 537 | ||
527 | /* No memslot means it's an emulated MMIO region */ | 538 | /* No memslot means it's an emulated MMIO region */ |
@@ -531,8 +542,228 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
531 | dsisr & DSISR_ISSTORE); | 542 | dsisr & DSISR_ISSTORE); |
532 | } | 543 | } |
533 | 544 | ||
534 | /* should never get here otherwise */ | 545 | if (!kvm->arch.using_mmu_notifiers) |
535 | return -EFAULT; | 546 | return -EFAULT; /* should never get here */ |
547 | |||
548 | /* used to check for invalidations in progress */ | ||
549 | mmu_seq = kvm->mmu_notifier_seq; | ||
550 | smp_rmb(); | ||
551 | |||
552 | is_io = 0; | ||
553 | pfn = 0; | ||
554 | page = NULL; | ||
555 | pte_size = PAGE_SIZE; | ||
556 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
557 | npages = get_user_pages_fast(hva, 1, 1, pages); | ||
558 | if (npages < 1) { | ||
559 | /* Check if it's an I/O mapping */ | ||
560 | down_read(¤t->mm->mmap_sem); | ||
561 | vma = find_vma(current->mm, hva); | ||
562 | if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && | ||
563 | (vma->vm_flags & VM_PFNMAP)) { | ||
564 | pfn = vma->vm_pgoff + | ||
565 | ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
566 | pte_size = psize; | ||
567 | is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); | ||
568 | } | ||
569 | up_read(¤t->mm->mmap_sem); | ||
570 | if (!pfn) | ||
571 | return -EFAULT; | ||
572 | } else { | ||
573 | page = pages[0]; | ||
574 | if (PageHuge(page)) { | ||
575 | page = compound_head(page); | ||
576 | pte_size <<= compound_order(page); | ||
577 | } | ||
578 | pfn = page_to_pfn(page); | ||
579 | } | ||
580 | |||
581 | ret = -EFAULT; | ||
582 | if (psize > pte_size) | ||
583 | goto out_put; | ||
584 | |||
585 | /* Check WIMG vs. the actual page we're accessing */ | ||
586 | if (!hpte_cache_flags_ok(r, is_io)) { | ||
587 | if (is_io) | ||
588 | return -EFAULT; | ||
589 | /* | ||
590 | * Allow guest to map emulated device memory as | ||
591 | * uncacheable, but actually make it cacheable. | ||
592 | */ | ||
593 | r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; | ||
594 | } | ||
595 | |||
596 | /* Set the HPTE to point to pfn */ | ||
597 | r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); | ||
598 | ret = RESUME_GUEST; | ||
599 | preempt_disable(); | ||
600 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
601 | cpu_relax(); | ||
602 | if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || | ||
603 | rev->guest_rpte != hpte[2]) | ||
604 | /* HPTE has been changed under us; let the guest retry */ | ||
605 | goto out_unlock; | ||
606 | hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; | ||
607 | |||
608 | rmap = &memslot->rmap[gfn - memslot->base_gfn]; | ||
609 | lock_rmap(rmap); | ||
610 | |||
611 | /* Check if we might have been invalidated; let the guest retry if so */ | ||
612 | ret = RESUME_GUEST; | ||
613 | if (mmu_notifier_retry(vcpu, mmu_seq)) { | ||
614 | unlock_rmap(rmap); | ||
615 | goto out_unlock; | ||
616 | } | ||
617 | kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); | ||
618 | |||
619 | hptep[1] = r; | ||
620 | eieio(); | ||
621 | hptep[0] = hpte[0]; | ||
622 | asm volatile("ptesync" : : : "memory"); | ||
623 | preempt_enable(); | ||
624 | if (page) | ||
625 | SetPageDirty(page); | ||
626 | |||
627 | out_put: | ||
628 | if (page) | ||
629 | put_page(page); | ||
630 | return ret; | ||
631 | |||
632 | out_unlock: | ||
633 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
634 | preempt_enable(); | ||
635 | goto out_put; | ||
636 | } | ||
637 | |||
638 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
639 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
640 | unsigned long gfn)) | ||
641 | { | ||
642 | int ret; | ||
643 | int retval = 0; | ||
644 | struct kvm_memslots *slots; | ||
645 | struct kvm_memory_slot *memslot; | ||
646 | |||
647 | slots = kvm_memslots(kvm); | ||
648 | kvm_for_each_memslot(memslot, slots) { | ||
649 | unsigned long start = memslot->userspace_addr; | ||
650 | unsigned long end; | ||
651 | |||
652 | end = start + (memslot->npages << PAGE_SHIFT); | ||
653 | if (hva >= start && hva < end) { | ||
654 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | ||
655 | |||
656 | ret = handler(kvm, &memslot->rmap[gfn_offset], | ||
657 | memslot->base_gfn + gfn_offset); | ||
658 | retval |= ret; | ||
659 | } | ||
660 | } | ||
661 | |||
662 | return retval; | ||
663 | } | ||
664 | |||
665 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
666 | unsigned long gfn) | ||
667 | { | ||
668 | struct revmap_entry *rev = kvm->arch.revmap; | ||
669 | unsigned long h, i, j; | ||
670 | unsigned long *hptep; | ||
671 | unsigned long ptel, psize; | ||
672 | |||
673 | for (;;) { | ||
674 | while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) | ||
675 | cpu_relax(); | ||
676 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { | ||
677 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); | ||
678 | break; | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * To avoid an ABBA deadlock with the HPTE lock bit, | ||
683 | * we have to unlock the rmap chain before locking the HPTE. | ||
684 | * Thus we remove the first entry, unlock the rmap chain, | ||
685 | * lock the HPTE and then check that it is for the | ||
686 | * page we're unmapping before changing it to non-present. | ||
687 | */ | ||
688 | i = *rmapp & KVMPPC_RMAP_INDEX; | ||
689 | j = rev[i].forw; | ||
690 | if (j == i) { | ||
691 | /* chain is now empty */ | ||
692 | j = 0; | ||
693 | } else { | ||
694 | /* remove i from chain */ | ||
695 | h = rev[i].back; | ||
696 | rev[h].forw = j; | ||
697 | rev[j].back = h; | ||
698 | rev[i].forw = rev[i].back = i; | ||
699 | j |= KVMPPC_RMAP_PRESENT; | ||
700 | } | ||
701 | smp_wmb(); | ||
702 | *rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT); | ||
703 | |||
704 | /* Now lock, check and modify the HPTE */ | ||
705 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | ||
706 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
707 | cpu_relax(); | ||
708 | ptel = rev[i].guest_rpte; | ||
709 | psize = hpte_page_size(hptep[0], ptel); | ||
710 | if ((hptep[0] & HPTE_V_VALID) && | ||
711 | hpte_rpn(ptel, psize) == gfn) { | ||
712 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
713 | hptep[0] |= HPTE_V_ABSENT; | ||
714 | } | ||
715 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
716 | } | ||
717 | return 0; | ||
718 | } | ||
719 | |||
720 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
721 | { | ||
722 | if (kvm->arch.using_mmu_notifiers) | ||
723 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
728 | unsigned long gfn) | ||
729 | { | ||
730 | if (!kvm->arch.using_mmu_notifiers) | ||
731 | return 0; | ||
732 | if (!(*rmapp & KVMPPC_RMAP_REFERENCED)) | ||
733 | return 0; | ||
734 | kvm_unmap_rmapp(kvm, rmapp, gfn); | ||
735 | while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) | ||
736 | cpu_relax(); | ||
737 | __clear_bit(KVMPPC_RMAP_REF_BIT, rmapp); | ||
738 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); | ||
739 | return 1; | ||
740 | } | ||
741 | |||
742 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
743 | { | ||
744 | if (!kvm->arch.using_mmu_notifiers) | ||
745 | return 0; | ||
746 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | ||
747 | } | ||
748 | |||
749 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
750 | unsigned long gfn) | ||
751 | { | ||
752 | return !!(*rmapp & KVMPPC_RMAP_REFERENCED); | ||
753 | } | ||
754 | |||
755 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
756 | { | ||
757 | if (!kvm->arch.using_mmu_notifiers) | ||
758 | return 0; | ||
759 | return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); | ||
760 | } | ||
761 | |||
762 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | ||
763 | { | ||
764 | if (!kvm->arch.using_mmu_notifiers) | ||
765 | return; | ||
766 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
536 | } | 767 | } |
537 | 768 | ||
538 | void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | 769 | void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, |
@@ -540,31 +771,42 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
540 | { | 771 | { |
541 | struct kvm_memory_slot *memslot; | 772 | struct kvm_memory_slot *memslot; |
542 | unsigned long gfn = gpa >> PAGE_SHIFT; | 773 | unsigned long gfn = gpa >> PAGE_SHIFT; |
543 | struct page *page; | 774 | struct page *page, *pages[1]; |
544 | unsigned long psize, offset; | 775 | int npages; |
776 | unsigned long hva, psize, offset; | ||
545 | unsigned long pa; | 777 | unsigned long pa; |
546 | unsigned long *physp; | 778 | unsigned long *physp; |
547 | 779 | ||
548 | memslot = gfn_to_memslot(kvm, gfn); | 780 | memslot = gfn_to_memslot(kvm, gfn); |
549 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | 781 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) |
550 | return NULL; | 782 | return NULL; |
551 | physp = kvm->arch.slot_phys[memslot->id]; | 783 | if (!kvm->arch.using_mmu_notifiers) { |
552 | if (!physp) | 784 | physp = kvm->arch.slot_phys[memslot->id]; |
553 | return NULL; | 785 | if (!physp) |
554 | physp += gfn - memslot->base_gfn; | ||
555 | pa = *physp; | ||
556 | if (!pa) { | ||
557 | if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) | ||
558 | return NULL; | 786 | return NULL; |
787 | physp += gfn - memslot->base_gfn; | ||
559 | pa = *physp; | 788 | pa = *physp; |
789 | if (!pa) { | ||
790 | if (kvmppc_get_guest_page(kvm, gfn, memslot, | ||
791 | PAGE_SIZE) < 0) | ||
792 | return NULL; | ||
793 | pa = *physp; | ||
794 | } | ||
795 | page = pfn_to_page(pa >> PAGE_SHIFT); | ||
796 | } else { | ||
797 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
798 | npages = get_user_pages_fast(hva, 1, 1, pages); | ||
799 | if (npages < 1) | ||
800 | return NULL; | ||
801 | page = pages[0]; | ||
560 | } | 802 | } |
561 | page = pfn_to_page(pa >> PAGE_SHIFT); | ||
562 | psize = PAGE_SIZE; | 803 | psize = PAGE_SIZE; |
563 | if (PageHuge(page)) { | 804 | if (PageHuge(page)) { |
564 | page = compound_head(page); | 805 | page = compound_head(page); |
565 | psize <<= compound_order(page); | 806 | psize <<= compound_order(page); |
566 | } | 807 | } |
567 | get_page(page); | 808 | if (!kvm->arch.using_mmu_notifiers) |
809 | get_page(page); | ||
568 | offset = gpa & (psize - 1); | 810 | offset = gpa & (psize - 1); |
569 | if (nb_ret) | 811 | if (nb_ret) |
570 | *nb_ret = psize - offset; | 812 | *nb_ret = psize - offset; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 45aabb9a527f..86c4191cb75b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -326,19 +326,19 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
326 | break; | 326 | break; |
327 | } | 327 | } |
328 | /* | 328 | /* |
329 | * We get this if the guest accesses a page which it thinks | 329 | * We get these next two if the guest accesses a page which it thinks |
330 | * it has mapped but which is not actually present, because | 330 | * it has mapped but which is not actually present, either because |
331 | * it is for an emulated I/O device. | 331 | * it is for an emulated I/O device or because the corresonding |
332 | * Any other HDSI interrupt has been handled already. | 332 | * host page has been paged out. Any other HDSI/HISI interrupts |
333 | * have been handled already. | ||
333 | */ | 334 | */ |
334 | case BOOK3S_INTERRUPT_H_DATA_STORAGE: | 335 | case BOOK3S_INTERRUPT_H_DATA_STORAGE: |
335 | r = kvmppc_book3s_hv_page_fault(run, vcpu, | 336 | r = kvmppc_book3s_hv_page_fault(run, vcpu, |
336 | vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); | 337 | vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); |
337 | break; | 338 | break; |
338 | case BOOK3S_INTERRUPT_H_INST_STORAGE: | 339 | case BOOK3S_INTERRUPT_H_INST_STORAGE: |
339 | kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, | 340 | r = kvmppc_book3s_hv_page_fault(run, vcpu, |
340 | vcpu->arch.shregs.msr & 0x58000000); | 341 | kvmppc_get_pc(vcpu), 0); |
341 | r = RESUME_GUEST; | ||
342 | break; | 342 | break; |
343 | /* | 343 | /* |
344 | * This occurs if the guest executes an illegal instruction. | 344 | * This occurs if the guest executes an illegal instruction. |
@@ -867,6 +867,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
867 | flush_altivec_to_thread(current); | 867 | flush_altivec_to_thread(current); |
868 | flush_vsx_to_thread(current); | 868 | flush_vsx_to_thread(current); |
869 | vcpu->arch.wqp = &vcpu->arch.vcore->wq; | 869 | vcpu->arch.wqp = &vcpu->arch.vcore->wq; |
870 | vcpu->arch.pgdir = current->mm->pgd; | ||
870 | 871 | ||
871 | do { | 872 | do { |
872 | r = kvmppc_run_vcpu(run, vcpu); | 873 | r = kvmppc_run_vcpu(run, vcpu); |
@@ -1090,9 +1091,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1090 | unsigned long *phys; | 1091 | unsigned long *phys; |
1091 | 1092 | ||
1092 | /* Allocate a slot_phys array */ | 1093 | /* Allocate a slot_phys array */ |
1093 | npages = mem->memory_size >> PAGE_SHIFT; | ||
1094 | phys = kvm->arch.slot_phys[mem->slot]; | 1094 | phys = kvm->arch.slot_phys[mem->slot]; |
1095 | if (!phys) { | 1095 | if (!kvm->arch.using_mmu_notifiers && !phys) { |
1096 | npages = mem->memory_size >> PAGE_SHIFT; | ||
1096 | phys = vzalloc(npages * sizeof(unsigned long)); | 1097 | phys = vzalloc(npages * sizeof(unsigned long)); |
1097 | if (!phys) | 1098 | if (!phys) |
1098 | return -ENOMEM; | 1099 | return -ENOMEM; |
@@ -1298,6 +1299,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1298 | } | 1299 | } |
1299 | kvm->arch.lpcr = lpcr; | 1300 | kvm->arch.lpcr = lpcr; |
1300 | 1301 | ||
1302 | kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); | ||
1301 | spin_lock_init(&kvm->arch.slot_phys_lock); | 1303 | spin_lock_init(&kvm->arch.slot_phys_lock); |
1302 | return 0; | 1304 | return 0; |
1303 | } | 1305 | } |
@@ -1306,8 +1308,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) | |||
1306 | { | 1308 | { |
1307 | unsigned long i; | 1309 | unsigned long i; |
1308 | 1310 | ||
1309 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | 1311 | if (!kvm->arch.using_mmu_notifiers) |
1310 | unpin_slot(kvm, i); | 1312 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) |
1313 | unpin_slot(kvm, i); | ||
1311 | 1314 | ||
1312 | if (kvm->arch.rma) { | 1315 | if (kvm->arch.rma) { |
1313 | kvm_release_rma(kvm->arch.rma); | 1316 | kvm_release_rma(kvm->arch.rma); |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index a5176dc37e7e..81d16ed9767d 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -58,7 +58,7 @@ static void *real_vmalloc_addr(void *x) | |||
58 | * Add this HPTE into the chain for the real page. | 58 | * Add this HPTE into the chain for the real page. |
59 | * Must be called with the chain locked; it unlocks the chain. | 59 | * Must be called with the chain locked; it unlocks the chain. |
60 | */ | 60 | */ |
61 | static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | 61 | void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, |
62 | unsigned long *rmap, long pte_index, int realmode) | 62 | unsigned long *rmap, long pte_index, int realmode) |
63 | { | 63 | { |
64 | struct revmap_entry *head, *tail; | 64 | struct revmap_entry *head, *tail; |
@@ -83,6 +83,7 @@ static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | |||
83 | smp_wmb(); | 83 | smp_wmb(); |
84 | *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ | 84 | *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ |
85 | } | 85 | } |
86 | EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); | ||
86 | 87 | ||
87 | /* Remove this HPTE from the chain for a real page */ | 88 | /* Remove this HPTE from the chain for a real page */ |
88 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, | 89 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, |
@@ -118,12 +119,33 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, | |||
118 | unlock_rmap(rmap); | 119 | unlock_rmap(rmap); |
119 | } | 120 | } |
120 | 121 | ||
122 | static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, | ||
123 | unsigned long *pte_sizep) | ||
124 | { | ||
125 | pte_t *ptep; | ||
126 | unsigned long ps = *pte_sizep; | ||
127 | unsigned int shift; | ||
128 | |||
129 | ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift); | ||
130 | if (!ptep) | ||
131 | return __pte(0); | ||
132 | if (shift) | ||
133 | *pte_sizep = 1ul << shift; | ||
134 | else | ||
135 | *pte_sizep = PAGE_SIZE; | ||
136 | if (ps > *pte_sizep) | ||
137 | return __pte(0); | ||
138 | if (!pte_present(*ptep)) | ||
139 | return __pte(0); | ||
140 | return kvmppc_read_update_linux_pte(ptep); | ||
141 | } | ||
142 | |||
121 | long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 143 | long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
122 | long pte_index, unsigned long pteh, unsigned long ptel) | 144 | long pte_index, unsigned long pteh, unsigned long ptel) |
123 | { | 145 | { |
124 | struct kvm *kvm = vcpu->kvm; | 146 | struct kvm *kvm = vcpu->kvm; |
125 | unsigned long i, pa, gpa, gfn, psize; | 147 | unsigned long i, pa, gpa, gfn, psize; |
126 | unsigned long slot_fn; | 148 | unsigned long slot_fn, hva; |
127 | unsigned long *hpte; | 149 | unsigned long *hpte; |
128 | struct revmap_entry *rev; | 150 | struct revmap_entry *rev; |
129 | unsigned long g_ptel = ptel; | 151 | unsigned long g_ptel = ptel; |
@@ -131,6 +153,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
131 | unsigned long *physp, pte_size; | 153 | unsigned long *physp, pte_size; |
132 | unsigned long is_io; | 154 | unsigned long is_io; |
133 | unsigned long *rmap; | 155 | unsigned long *rmap; |
156 | pte_t pte; | ||
157 | unsigned long mmu_seq; | ||
134 | bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; | 158 | bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; |
135 | 159 | ||
136 | psize = hpte_page_size(pteh, ptel); | 160 | psize = hpte_page_size(pteh, ptel); |
@@ -138,11 +162,16 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
138 | return H_PARAMETER; | 162 | return H_PARAMETER; |
139 | pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); | 163 | pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); |
140 | 164 | ||
165 | /* used later to detect if we might have been invalidated */ | ||
166 | mmu_seq = kvm->mmu_notifier_seq; | ||
167 | smp_rmb(); | ||
168 | |||
141 | /* Find the memslot (if any) for this address */ | 169 | /* Find the memslot (if any) for this address */ |
142 | gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); | 170 | gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); |
143 | gfn = gpa >> PAGE_SHIFT; | 171 | gfn = gpa >> PAGE_SHIFT; |
144 | memslot = builtin_gfn_to_memslot(kvm, gfn); | 172 | memslot = builtin_gfn_to_memslot(kvm, gfn); |
145 | pa = 0; | 173 | pa = 0; |
174 | is_io = ~0ul; | ||
146 | rmap = NULL; | 175 | rmap = NULL; |
147 | if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { | 176 | if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { |
148 | /* PPC970 can't do emulated MMIO */ | 177 | /* PPC970 can't do emulated MMIO */ |
@@ -160,19 +189,31 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
160 | slot_fn = gfn - memslot->base_gfn; | 189 | slot_fn = gfn - memslot->base_gfn; |
161 | rmap = &memslot->rmap[slot_fn]; | 190 | rmap = &memslot->rmap[slot_fn]; |
162 | 191 | ||
163 | physp = kvm->arch.slot_phys[memslot->id]; | 192 | if (!kvm->arch.using_mmu_notifiers) { |
164 | if (!physp) | 193 | physp = kvm->arch.slot_phys[memslot->id]; |
165 | return H_PARAMETER; | 194 | if (!physp) |
166 | physp += slot_fn; | 195 | return H_PARAMETER; |
167 | if (realmode) | 196 | physp += slot_fn; |
168 | physp = real_vmalloc_addr(physp); | 197 | if (realmode) |
169 | pa = *physp; | 198 | physp = real_vmalloc_addr(physp); |
170 | if (!pa) | 199 | pa = *physp; |
171 | return H_TOO_HARD; | 200 | if (!pa) |
172 | is_io = pa & (HPTE_R_I | HPTE_R_W); | 201 | return H_TOO_HARD; |
173 | pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); | 202 | is_io = pa & (HPTE_R_I | HPTE_R_W); |
174 | pa &= PAGE_MASK; | 203 | pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); |
175 | 204 | pa &= PAGE_MASK; | |
205 | } else { | ||
206 | /* Translate to host virtual address */ | ||
207 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
208 | |||
209 | /* Look up the Linux PTE for the backing page */ | ||
210 | pte_size = psize; | ||
211 | pte = lookup_linux_pte(vcpu, hva, &pte_size); | ||
212 | if (pte_present(pte)) { | ||
213 | is_io = hpte_cache_bits(pte_val(pte)); | ||
214 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
215 | } | ||
216 | } | ||
176 | if (pte_size < psize) | 217 | if (pte_size < psize) |
177 | return H_PARAMETER; | 218 | return H_PARAMETER; |
178 | if (pa && pte_size > psize) | 219 | if (pa && pte_size > psize) |
@@ -180,10 +221,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
180 | 221 | ||
181 | ptel &= ~(HPTE_R_PP0 - psize); | 222 | ptel &= ~(HPTE_R_PP0 - psize); |
182 | ptel |= pa; | 223 | ptel |= pa; |
183 | pteh |= HPTE_V_VALID; | 224 | |
225 | if (pa) | ||
226 | pteh |= HPTE_V_VALID; | ||
227 | else | ||
228 | pteh |= HPTE_V_ABSENT; | ||
184 | 229 | ||
185 | /* Check WIMG */ | 230 | /* Check WIMG */ |
186 | if (!hpte_cache_flags_ok(ptel, is_io)) { | 231 | if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { |
187 | if (is_io) | 232 | if (is_io) |
188 | return H_PARAMETER; | 233 | return H_PARAMETER; |
189 | /* | 234 | /* |
@@ -194,6 +239,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
194 | ptel |= HPTE_R_M; | 239 | ptel |= HPTE_R_M; |
195 | } | 240 | } |
196 | 241 | ||
242 | /* Find and lock the HPTEG slot to use */ | ||
197 | do_insert: | 243 | do_insert: |
198 | if (pte_index >= HPT_NPTE) | 244 | if (pte_index >= HPT_NPTE) |
199 | return H_PARAMETER; | 245 | return H_PARAMETER; |
@@ -253,7 +299,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | |||
253 | if (realmode) | 299 | if (realmode) |
254 | rmap = real_vmalloc_addr(rmap); | 300 | rmap = real_vmalloc_addr(rmap); |
255 | lock_rmap(rmap); | 301 | lock_rmap(rmap); |
256 | kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, realmode); | 302 | /* Check for pending invalidations under the rmap chain lock */ |
303 | if (kvm->arch.using_mmu_notifiers && | ||
304 | mmu_notifier_retry(vcpu, mmu_seq)) { | ||
305 | /* inval in progress, write a non-present HPTE */ | ||
306 | pteh |= HPTE_V_ABSENT; | ||
307 | pteh &= ~HPTE_V_VALID; | ||
308 | unlock_rmap(rmap); | ||
309 | } else { | ||
310 | kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, | ||
311 | realmode); | ||
312 | } | ||
257 | } | 313 | } |
258 | 314 | ||
259 | hpte[1] = ptel; | 315 | hpte[1] = ptel; |
@@ -516,6 +572,23 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, | |||
516 | return H_SUCCESS; | 572 | return H_SUCCESS; |
517 | } | 573 | } |
518 | 574 | ||
575 | void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, | ||
576 | unsigned long pte_index) | ||
577 | { | ||
578 | unsigned long rb; | ||
579 | |||
580 | hptep[0] &= ~HPTE_V_VALID; | ||
581 | rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); | ||
582 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
583 | cpu_relax(); | ||
584 | asm volatile("ptesync" : : : "memory"); | ||
585 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
586 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
587 | asm volatile("ptesync" : : : "memory"); | ||
588 | kvm->arch.tlbie_lock = 0; | ||
589 | } | ||
590 | EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); | ||
591 | |||
519 | static int slb_base_page_shift[4] = { | 592 | static int slb_base_page_shift[4] = { |
520 | 24, /* 16M */ | 593 | 24, /* 16M */ |
521 | 16, /* 64k */ | 594 | 16, /* 64k */ |
@@ -605,15 +678,15 @@ EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); | |||
605 | 678 | ||
606 | /* | 679 | /* |
607 | * Called in real mode to check whether an HPTE not found fault | 680 | * Called in real mode to check whether an HPTE not found fault |
608 | * is due to accessing an emulated MMIO page. | 681 | * is due to accessing a paged-out page or an emulated MMIO page. |
609 | * Returns a possibly modified status (DSISR) value if not | 682 | * Returns a possibly modified status (DSISR) value if not |
610 | * (i.e. pass the interrupt to the guest), | 683 | * (i.e. pass the interrupt to the guest), |
611 | * -1 to pass the fault up to host kernel mode code, -2 to do that | 684 | * -1 to pass the fault up to host kernel mode code, -2 to do that |
612 | * and also load the instruction word, | 685 | * and also load the instruction word (for MMIO emulation), |
613 | * or 0 if we should make the guest retry the access. | 686 | * or 0 if we should make the guest retry the access. |
614 | */ | 687 | */ |
615 | long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 688 | long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
616 | unsigned long slb_v, unsigned int status) | 689 | unsigned long slb_v, unsigned int status, bool data) |
617 | { | 690 | { |
618 | struct kvm *kvm = vcpu->kvm; | 691 | struct kvm *kvm = vcpu->kvm; |
619 | long int index; | 692 | long int index; |
@@ -624,6 +697,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
624 | unsigned long pp, key; | 697 | unsigned long pp, key; |
625 | 698 | ||
626 | valid = HPTE_V_VALID | HPTE_V_ABSENT; | 699 | valid = HPTE_V_VALID | HPTE_V_ABSENT; |
700 | |||
627 | index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); | 701 | index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); |
628 | if (index < 0) | 702 | if (index < 0) |
629 | return status; /* there really was no HPTE */ | 703 | return status; /* there really was no HPTE */ |
@@ -645,22 +719,28 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
645 | /* Check access permissions to the page */ | 719 | /* Check access permissions to the page */ |
646 | pp = gr & (HPTE_R_PP0 | HPTE_R_PP); | 720 | pp = gr & (HPTE_R_PP0 | HPTE_R_PP); |
647 | key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; | 721 | key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; |
648 | if (status & DSISR_ISSTORE) { | 722 | status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ |
723 | if (!data) { | ||
724 | if (gr & (HPTE_R_N | HPTE_R_G)) | ||
725 | return status | SRR1_ISI_N_OR_G; | ||
726 | if (!hpte_read_permission(pp, slb_v & key)) | ||
727 | return status | SRR1_ISI_PROT; | ||
728 | } else if (status & DSISR_ISSTORE) { | ||
649 | /* check write permission */ | 729 | /* check write permission */ |
650 | if (!hpte_write_permission(pp, slb_v & key)) | 730 | if (!hpte_write_permission(pp, slb_v & key)) |
651 | goto protfault; | 731 | return status | DSISR_PROTFAULT; |
652 | } else { | 732 | } else { |
653 | if (!hpte_read_permission(pp, slb_v & key)) | 733 | if (!hpte_read_permission(pp, slb_v & key)) |
654 | goto protfault; | 734 | return status | DSISR_PROTFAULT; |
655 | } | 735 | } |
656 | 736 | ||
657 | /* Check storage key, if applicable */ | 737 | /* Check storage key, if applicable */ |
658 | if (vcpu->arch.shregs.msr & MSR_DR) { | 738 | if (data && (vcpu->arch.shregs.msr & MSR_DR)) { |
659 | unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); | 739 | unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); |
660 | if (status & DSISR_ISSTORE) | 740 | if (status & DSISR_ISSTORE) |
661 | perm >>= 1; | 741 | perm >>= 1; |
662 | if (perm & 1) | 742 | if (perm & 1) |
663 | return (status & ~DSISR_NOHPTE) | DSISR_KEYFAULT; | 743 | return status | DSISR_KEYFAULT; |
664 | } | 744 | } |
665 | 745 | ||
666 | /* Save HPTE info for virtual-mode handler */ | 746 | /* Save HPTE info for virtual-mode handler */ |
@@ -669,11 +749,11 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
669 | vcpu->arch.pgfault_hpte[0] = v; | 749 | vcpu->arch.pgfault_hpte[0] = v; |
670 | vcpu->arch.pgfault_hpte[1] = r; | 750 | vcpu->arch.pgfault_hpte[1] = r; |
671 | 751 | ||
672 | if (vcpu->arch.shregs.msr & MSR_IR) | 752 | /* Check the storage key to see if it is possibly emulated MMIO */ |
753 | if (data && (vcpu->arch.shregs.msr & MSR_IR) && | ||
754 | (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == | ||
755 | (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) | ||
673 | return -2; /* MMIO emulation - load instr word */ | 756 | return -2; /* MMIO emulation - load instr word */ |
674 | 757 | ||
675 | return -1; /* send fault up to host kernel mode */ | 758 | return -1; /* send fault up to host kernel mode */ |
676 | |||
677 | protfault: | ||
678 | return (status & ~DSISR_NOHPTE) | DSISR_PROTFAULT; | ||
679 | } | 759 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index d07b64d5f37e..7d4990665d00 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -621,6 +621,8 @@ BEGIN_FTR_SECTION | |||
621 | /* If this is a page table miss then see if it's theirs or ours */ | 621 | /* If this is a page table miss then see if it's theirs or ours */ |
622 | cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE | 622 | cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE |
623 | beq kvmppc_hdsi | 623 | beq kvmppc_hdsi |
624 | cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE | ||
625 | beq kvmppc_hisi | ||
624 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | 626 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) |
625 | 627 | ||
626 | /* See if this is a leftover HDEC interrupt */ | 628 | /* See if this is a leftover HDEC interrupt */ |
@@ -1125,6 +1127,7 @@ kvmppc_hdsi: | |||
1125 | 1127 | ||
1126 | /* Search the hash table. */ | 1128 | /* Search the hash table. */ |
1127 | mr r3, r9 /* vcpu pointer */ | 1129 | mr r3, r9 /* vcpu pointer */ |
1130 | li r7, 1 /* data fault */ | ||
1128 | bl .kvmppc_hpte_hv_fault | 1131 | bl .kvmppc_hpte_hv_fault |
1129 | ld r9, HSTATE_KVM_VCPU(r13) | 1132 | ld r9, HSTATE_KVM_VCPU(r13) |
1130 | ld r10, VCPU_PC(r9) | 1133 | ld r10, VCPU_PC(r9) |
@@ -1182,6 +1185,52 @@ kvmppc_hdsi: | |||
1182 | b nohpte_cont | 1185 | b nohpte_cont |
1183 | 1186 | ||
1184 | /* | 1187 | /* |
1188 | * Similarly for an HISI, reflect it to the guest as an ISI unless | ||
1189 | * it is an HPTE not found fault for a page that we have paged out. | ||
1190 | */ | ||
1191 | kvmppc_hisi: | ||
1192 | andis. r0, r11, SRR1_ISI_NOPT@h | ||
1193 | beq 1f | ||
1194 | andi. r0, r11, MSR_IR /* instruction relocation enabled? */ | ||
1195 | beq 3f | ||
1196 | clrrdi r0, r10, 28 | ||
1197 | PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */ | ||
1198 | bne 1f /* if no SLB entry found */ | ||
1199 | 4: | ||
1200 | /* Search the hash table. */ | ||
1201 | mr r3, r9 /* vcpu pointer */ | ||
1202 | mr r4, r10 | ||
1203 | mr r6, r11 | ||
1204 | li r7, 0 /* instruction fault */ | ||
1205 | bl .kvmppc_hpte_hv_fault | ||
1206 | ld r9, HSTATE_KVM_VCPU(r13) | ||
1207 | ld r10, VCPU_PC(r9) | ||
1208 | ld r11, VCPU_MSR(r9) | ||
1209 | li r12, BOOK3S_INTERRUPT_H_INST_STORAGE | ||
1210 | cmpdi r3, 0 /* retry the instruction */ | ||
1211 | beq 6f | ||
1212 | cmpdi r3, -1 /* handle in kernel mode */ | ||
1213 | beq nohpte_cont | ||
1214 | |||
1215 | /* Synthesize an ISI for the guest */ | ||
1216 | mr r11, r3 | ||
1217 | 1: mtspr SPRN_SRR0, r10 | ||
1218 | mtspr SPRN_SRR1, r11 | ||
1219 | li r10, BOOK3S_INTERRUPT_INST_STORAGE | ||
1220 | li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
1221 | rotldi r11, r11, 63 | ||
1222 | 6: ld r7, VCPU_CTR(r9) | ||
1223 | lwz r8, VCPU_XER(r9) | ||
1224 | mtctr r7 | ||
1225 | mtxer r8 | ||
1226 | mr r4, r9 | ||
1227 | b fast_guest_return | ||
1228 | |||
1229 | 3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ | ||
1230 | ld r5, KVM_VRMA_SLB_V(r6) | ||
1231 | b 4b | ||
1232 | |||
1233 | /* | ||
1185 | * Try to handle an hcall in real mode. | 1234 | * Try to handle an hcall in real mode. |
1186 | * Returns to the guest if we handle it, or continues on up to | 1235 | * Returns to the guest if we handle it, or continues on up to |
1187 | * the kernel if we can't (i.e. if we don't have a handler for | 1236 | * the kernel if we can't (i.e. if we don't have a handler for |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4f85ac32258a..06e955b5837e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -245,6 +245,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
245 | if (cpu_has_feature(CPU_FTR_ARCH_201)) | 245 | if (cpu_has_feature(CPU_FTR_ARCH_201)) |
246 | r = 2; | 246 | r = 2; |
247 | break; | 247 | break; |
248 | case KVM_CAP_SYNC_MMU: | ||
249 | r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; | ||
250 | break; | ||
248 | #endif | 251 | #endif |
249 | default: | 252 | default: |
250 | r = 0; | 253 | r = 0; |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a8b3cc7d90fe..f348c3d90404 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/io.h> | 12 | #include <linux/io.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/hugetlb.h> | 14 | #include <linux/hugetlb.h> |
15 | #include <linux/export.h> | ||
15 | #include <linux/of_fdt.h> | 16 | #include <linux/of_fdt.h> |
16 | #include <linux/memblock.h> | 17 | #include <linux/memblock.h> |
17 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
@@ -103,6 +104,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift | |||
103 | *shift = hugepd_shift(*hpdp); | 104 | *shift = hugepd_shift(*hpdp); |
104 | return hugepte_offset(hpdp, ea, pdshift); | 105 | return hugepte_offset(hpdp, ea, pdshift); |
105 | } | 106 | } |
107 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | ||
106 | 108 | ||
107 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
108 | { | 110 | { |