aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-12-12 07:38:05 -0500
committerAvi Kivity <avi@redhat.com>2012-03-05 07:52:38 -0500
commit342d3db763f2621ed4546ebf8f6c61cb29d7fbdb (patch)
treebc806911dd085905a5a97419fbc4f9fc1eeb142d /arch/powerpc
parent697d3899dcb4bcd918d060a92db57b794e56b077 (diff)
KVM: PPC: Implement MMU notifiers for Book3S HV guests
This adds the infrastructure to enable us to page out pages underneath a Book3S HV guest, on processors that support virtualized partition memory, that is, POWER7. Instead of pinning all the guest's pages, we now look in the host userspace Linux page tables to find the mapping for a given guest page. Then, if the userspace Linux PTE gets invalidated, kvm_unmap_hva() gets called for that address, and we replace all the guest HPTEs that refer to that page with absent HPTEs, i.e. ones with the valid bit clear and the HPTE_V_ABSENT bit set, which will cause an HDSI when the guest tries to access them. Finally, the page fault handler is extended to reinstantiate the guest HPTE when the guest tries to access a page which has been paged out. Since we can't intercept the guest DSI and ISI interrupts on PPC970, we still have to pin all the guest pages on PPC970. We have a new flag, kvm->arch.using_mmu_notifiers, that indicates whether we can page guest pages out. If it is not set, the MMU notifier callbacks do nothing and everything operates as before. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h31
-rw-r--r--arch/powerpc/include/asm/kvm_host.h16
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c290
-rw-r--r--arch/powerpc/kvm/book3s_hv.c25
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c140
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S49
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
11 files changed, 499 insertions, 65 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 3a9e51f43397..9240cebf8bad 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -143,6 +143,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
143extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 143extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
144extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); 144extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
145extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); 145extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
146extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
147 unsigned long *rmap, long pte_index, int realmode);
148extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
149 unsigned long pte_index);
146extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, 150extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
147 unsigned long *nb_ret); 151 unsigned long *nb_ret);
148extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); 152extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 79dc37fb86b5..c21e46da4a3b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -136,6 +136,37 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
136 return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type; 136 return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
137} 137}
138 138
139/*
140 * Lock and read a linux PTE. If it's present and writable, atomically
141 * set dirty and referenced bits and return the PTE, otherwise return 0.
142 */
143static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
144{
145 pte_t pte, tmp;
146
147 /* wait until _PAGE_BUSY is clear then set it atomically */
148 __asm__ __volatile__ (
149 "1: ldarx %0,0,%3\n"
150 " andi. %1,%0,%4\n"
151 " bne- 1b\n"
152 " ori %1,%0,%4\n"
153 " stdcx. %1,0,%3\n"
154 " bne- 1b"
155 : "=&r" (pte), "=&r" (tmp), "=m" (*p)
156 : "r" (p), "i" (_PAGE_BUSY)
157 : "cc");
158
159 if (pte_present(pte)) {
160 pte = pte_mkyoung(pte);
161 if (pte_write(pte))
162 pte = pte_mkdirty(pte);
163 }
164
165 *p = pte; /* clears _PAGE_BUSY */
166
167 return pte;
168}
169
139/* Return HPTE cache control bits corresponding to Linux pte bits */ 170/* Return HPTE cache control bits corresponding to Linux pte bits */
140static inline unsigned long hpte_cache_bits(unsigned long pte_val) 171static inline unsigned long hpte_cache_bits(unsigned long pte_val)
141{ 172{
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 937cacaaf236..968f3aa61cd1 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -32,6 +32,7 @@
32#include <linux/atomic.h> 32#include <linux/atomic.h>
33#include <asm/kvm_asm.h> 33#include <asm/kvm_asm.h>
34#include <asm/processor.h> 34#include <asm/processor.h>
35#include <asm/page.h>
35 36
36#define KVM_MAX_VCPUS NR_CPUS 37#define KVM_MAX_VCPUS NR_CPUS
37#define KVM_MAX_VCORES NR_CPUS 38#define KVM_MAX_VCORES NR_CPUS
@@ -44,6 +45,19 @@
44#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 45#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
45#endif 46#endif
46 47
48#ifdef CONFIG_KVM_BOOK3S_64_HV
49#include <linux/mmu_notifier.h>
50
51#define KVM_ARCH_WANT_MMU_NOTIFIER
52
53struct kvm;
54extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
55extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
56extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
57extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
58
59#endif
60
47/* We don't currently support large pages. */ 61/* We don't currently support large pages. */
48#define KVM_HPAGE_GFN_SHIFT(x) 0 62#define KVM_HPAGE_GFN_SHIFT(x) 0
49#define KVM_NR_PAGE_SIZES 1 63#define KVM_NR_PAGE_SIZES 1
@@ -212,6 +226,7 @@ struct kvm_arch {
212 struct kvmppc_rma_info *rma; 226 struct kvmppc_rma_info *rma;
213 unsigned long vrma_slb_v; 227 unsigned long vrma_slb_v;
214 int rma_setup_done; 228 int rma_setup_done;
229 int using_mmu_notifiers;
215 struct list_head spapr_tce_tables; 230 struct list_head spapr_tce_tables;
216 spinlock_t slot_phys_lock; 231 spinlock_t slot_phys_lock;
217 unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; 232 unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
@@ -460,6 +475,7 @@ struct kvm_vcpu_arch {
460 struct list_head run_list; 475 struct list_head run_list;
461 struct task_struct *run_task; 476 struct task_struct *run_task;
462 struct kvm_run *kvm_run; 477 struct kvm_run *kvm_run;
478 pgd_t *pgdir;
463#endif 479#endif
464}; 480};
465 481
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 16efb3151c20..35c9309bf038 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -495,6 +495,9 @@
495#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ 495#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */
496#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ 496#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
497#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ 497#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
498#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
499#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
500#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
498#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ 501#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
499#define SRR1_WAKESYSERR 0x00300000 /* System error */ 502#define SRR1_WAKESYSERR 0x00300000 /* System error */
500#define SRR1_WAKEEE 0x00200000 /* External interrupt */ 503#define SRR1_WAKEEE 0x00200000 /* External interrupt */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 78133deb4b64..8f64709ae331 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -69,6 +69,7 @@ config KVM_BOOK3S_64
69config KVM_BOOK3S_64_HV 69config KVM_BOOK3S_64_HV
70 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" 70 bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
71 depends on KVM_BOOK3S_64 71 depends on KVM_BOOK3S_64
72 select MMU_NOTIFIER
72 ---help--- 73 ---help---
73 Support running unmodified book3s_64 guest kernels in 74 Support running unmodified book3s_64 guest kernels in
74 virtual machines on POWER7 and PPC970 processors that have 75 virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d31519b8637..83761dd8a924 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -281,8 +281,9 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
281} 281}
282 282
283/* 283/*
284 * We come here on a H_ENTER call from the guest when 284 * We come here on a H_ENTER call from the guest when we are not
285 * we don't have the requested page pinned already. 285 * using mmu notifiers and we don't have the requested page pinned
286 * already.
286 */ 287 */
287long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 288long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
288 long pte_index, unsigned long pteh, unsigned long ptel) 289 long pte_index, unsigned long pteh, unsigned long ptel)
@@ -292,6 +293,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
292 struct kvm_memory_slot *memslot; 293 struct kvm_memory_slot *memslot;
293 long ret; 294 long ret;
294 295
296 if (kvm->arch.using_mmu_notifiers)
297 goto do_insert;
298
295 psize = hpte_page_size(pteh, ptel); 299 psize = hpte_page_size(pteh, ptel);
296 if (!psize) 300 if (!psize)
297 return H_PARAMETER; 301 return H_PARAMETER;
@@ -309,9 +313,12 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
309 return H_PARAMETER; 313 return H_PARAMETER;
310 } 314 }
311 315
312 preempt_disable(); 316 do_insert:
317 /* Protect linux PTE lookup from page table destruction */
318 rcu_read_lock_sched(); /* this disables preemption too */
319 vcpu->arch.pgdir = current->mm->pgd;
313 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); 320 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
314 preempt_enable(); 321 rcu_read_unlock_sched();
315 if (ret == H_TOO_HARD) { 322 if (ret == H_TOO_HARD) {
316 /* this can't happen */ 323 /* this can't happen */
317 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); 324 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
@@ -487,12 +494,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
487 unsigned long ea, unsigned long dsisr) 494 unsigned long ea, unsigned long dsisr)
488{ 495{
489 struct kvm *kvm = vcpu->kvm; 496 struct kvm *kvm = vcpu->kvm;
490 unsigned long *hptep, hpte[3]; 497 unsigned long *hptep, hpte[3], r;
491 unsigned long psize; 498 unsigned long mmu_seq, psize, pte_size;
492 unsigned long gfn; 499 unsigned long gfn, hva, pfn;
493 struct kvm_memory_slot *memslot; 500 struct kvm_memory_slot *memslot;
501 unsigned long *rmap;
494 struct revmap_entry *rev; 502 struct revmap_entry *rev;
495 long index; 503 struct page *page, *pages[1];
504 long index, ret, npages;
505 unsigned long is_io;
506 struct vm_area_struct *vma;
496 507
497 /* 508 /*
498 * Real-mode code has already searched the HPT and found the 509 * Real-mode code has already searched the HPT and found the
@@ -510,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
510 cpu_relax(); 521 cpu_relax();
511 hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; 522 hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
512 hpte[1] = hptep[1]; 523 hpte[1] = hptep[1];
513 hpte[2] = rev->guest_rpte; 524 hpte[2] = r = rev->guest_rpte;
514 asm volatile("lwsync" : : : "memory"); 525 asm volatile("lwsync" : : : "memory");
515 hptep[0] = hpte[0]; 526 hptep[0] = hpte[0];
516 preempt_enable(); 527 preempt_enable();
@@ -520,8 +531,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
520 return RESUME_GUEST; 531 return RESUME_GUEST;
521 532
522 /* Translate the logical address and get the page */ 533 /* Translate the logical address and get the page */
523 psize = hpte_page_size(hpte[0], hpte[1]); 534 psize = hpte_page_size(hpte[0], r);
524 gfn = hpte_rpn(hpte[2], psize); 535 gfn = hpte_rpn(r, psize);
525 memslot = gfn_to_memslot(kvm, gfn); 536 memslot = gfn_to_memslot(kvm, gfn);
526 537
527 /* No memslot means it's an emulated MMIO region */ 538 /* No memslot means it's an emulated MMIO region */
@@ -531,8 +542,228 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
531 dsisr & DSISR_ISSTORE); 542 dsisr & DSISR_ISSTORE);
532 } 543 }
533 544
534 /* should never get here otherwise */ 545 if (!kvm->arch.using_mmu_notifiers)
535 return -EFAULT; 546 return -EFAULT; /* should never get here */
547
548 /* used to check for invalidations in progress */
549 mmu_seq = kvm->mmu_notifier_seq;
550 smp_rmb();
551
552 is_io = 0;
553 pfn = 0;
554 page = NULL;
555 pte_size = PAGE_SIZE;
556 hva = gfn_to_hva_memslot(memslot, gfn);
557 npages = get_user_pages_fast(hva, 1, 1, pages);
558 if (npages < 1) {
559 /* Check if it's an I/O mapping */
560 down_read(&current->mm->mmap_sem);
561 vma = find_vma(current->mm, hva);
562 if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
563 (vma->vm_flags & VM_PFNMAP)) {
564 pfn = vma->vm_pgoff +
565 ((hva - vma->vm_start) >> PAGE_SHIFT);
566 pte_size = psize;
567 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
568 }
569 up_read(&current->mm->mmap_sem);
570 if (!pfn)
571 return -EFAULT;
572 } else {
573 page = pages[0];
574 if (PageHuge(page)) {
575 page = compound_head(page);
576 pte_size <<= compound_order(page);
577 }
578 pfn = page_to_pfn(page);
579 }
580
581 ret = -EFAULT;
582 if (psize > pte_size)
583 goto out_put;
584
585 /* Check WIMG vs. the actual page we're accessing */
586 if (!hpte_cache_flags_ok(r, is_io)) {
587 if (is_io)
588 return -EFAULT;
589 /*
590 * Allow guest to map emulated device memory as
591 * uncacheable, but actually make it cacheable.
592 */
593 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
594 }
595
596 /* Set the HPTE to point to pfn */
597 r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
598 ret = RESUME_GUEST;
599 preempt_disable();
600 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
601 cpu_relax();
602 if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
603 rev->guest_rpte != hpte[2])
604 /* HPTE has been changed under us; let the guest retry */
605 goto out_unlock;
606 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
607
608 rmap = &memslot->rmap[gfn - memslot->base_gfn];
609 lock_rmap(rmap);
610
611 /* Check if we might have been invalidated; let the guest retry if so */
612 ret = RESUME_GUEST;
613 if (mmu_notifier_retry(vcpu, mmu_seq)) {
614 unlock_rmap(rmap);
615 goto out_unlock;
616 }
617 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
618
619 hptep[1] = r;
620 eieio();
621 hptep[0] = hpte[0];
622 asm volatile("ptesync" : : : "memory");
623 preempt_enable();
624 if (page)
625 SetPageDirty(page);
626
627 out_put:
628 if (page)
629 put_page(page);
630 return ret;
631
632 out_unlock:
633 hptep[0] &= ~HPTE_V_HVLOCK;
634 preempt_enable();
635 goto out_put;
636}
637
638static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
639 int (*handler)(struct kvm *kvm, unsigned long *rmapp,
640 unsigned long gfn))
641{
642 int ret;
643 int retval = 0;
644 struct kvm_memslots *slots;
645 struct kvm_memory_slot *memslot;
646
647 slots = kvm_memslots(kvm);
648 kvm_for_each_memslot(memslot, slots) {
649 unsigned long start = memslot->userspace_addr;
650 unsigned long end;
651
652 end = start + (memslot->npages << PAGE_SHIFT);
653 if (hva >= start && hva < end) {
654 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
655
656 ret = handler(kvm, &memslot->rmap[gfn_offset],
657 memslot->base_gfn + gfn_offset);
658 retval |= ret;
659 }
660 }
661
662 return retval;
663}
664
665static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
666 unsigned long gfn)
667{
668 struct revmap_entry *rev = kvm->arch.revmap;
669 unsigned long h, i, j;
670 unsigned long *hptep;
671 unsigned long ptel, psize;
672
673 for (;;) {
674 while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
675 cpu_relax();
676 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
677 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
678 break;
679 }
680
681 /*
682 * To avoid an ABBA deadlock with the HPTE lock bit,
683 * we have to unlock the rmap chain before locking the HPTE.
684 * Thus we remove the first entry, unlock the rmap chain,
685 * lock the HPTE and then check that it is for the
686 * page we're unmapping before changing it to non-present.
687 */
688 i = *rmapp & KVMPPC_RMAP_INDEX;
689 j = rev[i].forw;
690 if (j == i) {
691 /* chain is now empty */
692 j = 0;
693 } else {
694 /* remove i from chain */
695 h = rev[i].back;
696 rev[h].forw = j;
697 rev[j].back = h;
698 rev[i].forw = rev[i].back = i;
699 j |= KVMPPC_RMAP_PRESENT;
700 }
701 smp_wmb();
702 *rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT);
703
704 /* Now lock, check and modify the HPTE */
705 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
706 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
707 cpu_relax();
708 ptel = rev[i].guest_rpte;
709 psize = hpte_page_size(hptep[0], ptel);
710 if ((hptep[0] & HPTE_V_VALID) &&
711 hpte_rpn(ptel, psize) == gfn) {
712 kvmppc_invalidate_hpte(kvm, hptep, i);
713 hptep[0] |= HPTE_V_ABSENT;
714 }
715 hptep[0] &= ~HPTE_V_HVLOCK;
716 }
717 return 0;
718}
719
720int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
721{
722 if (kvm->arch.using_mmu_notifiers)
723 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
724 return 0;
725}
726
727static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
728 unsigned long gfn)
729{
730 if (!kvm->arch.using_mmu_notifiers)
731 return 0;
732 if (!(*rmapp & KVMPPC_RMAP_REFERENCED))
733 return 0;
734 kvm_unmap_rmapp(kvm, rmapp, gfn);
735 while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
736 cpu_relax();
737 __clear_bit(KVMPPC_RMAP_REF_BIT, rmapp);
738 __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
739 return 1;
740}
741
742int kvm_age_hva(struct kvm *kvm, unsigned long hva)
743{
744 if (!kvm->arch.using_mmu_notifiers)
745 return 0;
746 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
747}
748
749static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
750 unsigned long gfn)
751{
752 return !!(*rmapp & KVMPPC_RMAP_REFERENCED);
753}
754
755int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
756{
757 if (!kvm->arch.using_mmu_notifiers)
758 return 0;
759 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
760}
761
762void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
763{
764 if (!kvm->arch.using_mmu_notifiers)
765 return;
766 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
536} 767}
537 768
538void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, 769void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
@@ -540,31 +771,42 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
540{ 771{
541 struct kvm_memory_slot *memslot; 772 struct kvm_memory_slot *memslot;
542 unsigned long gfn = gpa >> PAGE_SHIFT; 773 unsigned long gfn = gpa >> PAGE_SHIFT;
543 struct page *page; 774 struct page *page, *pages[1];
544 unsigned long psize, offset; 775 int npages;
776 unsigned long hva, psize, offset;
545 unsigned long pa; 777 unsigned long pa;
546 unsigned long *physp; 778 unsigned long *physp;
547 779
548 memslot = gfn_to_memslot(kvm, gfn); 780 memslot = gfn_to_memslot(kvm, gfn);
549 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 781 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
550 return NULL; 782 return NULL;
551 physp = kvm->arch.slot_phys[memslot->id]; 783 if (!kvm->arch.using_mmu_notifiers) {
552 if (!physp) 784 physp = kvm->arch.slot_phys[memslot->id];
553 return NULL; 785 if (!physp)
554 physp += gfn - memslot->base_gfn;
555 pa = *physp;
556 if (!pa) {
557 if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0)
558 return NULL; 786 return NULL;
787 physp += gfn - memslot->base_gfn;
559 pa = *physp; 788 pa = *physp;
789 if (!pa) {
790 if (kvmppc_get_guest_page(kvm, gfn, memslot,
791 PAGE_SIZE) < 0)
792 return NULL;
793 pa = *physp;
794 }
795 page = pfn_to_page(pa >> PAGE_SHIFT);
796 } else {
797 hva = gfn_to_hva_memslot(memslot, gfn);
798 npages = get_user_pages_fast(hva, 1, 1, pages);
799 if (npages < 1)
800 return NULL;
801 page = pages[0];
560 } 802 }
561 page = pfn_to_page(pa >> PAGE_SHIFT);
562 psize = PAGE_SIZE; 803 psize = PAGE_SIZE;
563 if (PageHuge(page)) { 804 if (PageHuge(page)) {
564 page = compound_head(page); 805 page = compound_head(page);
565 psize <<= compound_order(page); 806 psize <<= compound_order(page);
566 } 807 }
567 get_page(page); 808 if (!kvm->arch.using_mmu_notifiers)
809 get_page(page);
568 offset = gpa & (psize - 1); 810 offset = gpa & (psize - 1);
569 if (nb_ret) 811 if (nb_ret)
570 *nb_ret = psize - offset; 812 *nb_ret = psize - offset;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 45aabb9a527f..86c4191cb75b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -326,19 +326,19 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
326 break; 326 break;
327 } 327 }
328 /* 328 /*
329 * We get this if the guest accesses a page which it thinks 329 * We get these next two if the guest accesses a page which it thinks
330 * it has mapped but which is not actually present, because 330 * it has mapped but which is not actually present, either because
331 * it is for an emulated I/O device. 331 * it is for an emulated I/O device or because the corresonding
332 * Any other HDSI interrupt has been handled already. 332 * host page has been paged out. Any other HDSI/HISI interrupts
333 * have been handled already.
333 */ 334 */
334 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 335 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
335 r = kvmppc_book3s_hv_page_fault(run, vcpu, 336 r = kvmppc_book3s_hv_page_fault(run, vcpu,
336 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 337 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
337 break; 338 break;
338 case BOOK3S_INTERRUPT_H_INST_STORAGE: 339 case BOOK3S_INTERRUPT_H_INST_STORAGE:
339 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, 340 r = kvmppc_book3s_hv_page_fault(run, vcpu,
340 vcpu->arch.shregs.msr & 0x58000000); 341 kvmppc_get_pc(vcpu), 0);
341 r = RESUME_GUEST;
342 break; 342 break;
343 /* 343 /*
344 * This occurs if the guest executes an illegal instruction. 344 * This occurs if the guest executes an illegal instruction.
@@ -867,6 +867,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
867 flush_altivec_to_thread(current); 867 flush_altivec_to_thread(current);
868 flush_vsx_to_thread(current); 868 flush_vsx_to_thread(current);
869 vcpu->arch.wqp = &vcpu->arch.vcore->wq; 869 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
870 vcpu->arch.pgdir = current->mm->pgd;
870 871
871 do { 872 do {
872 r = kvmppc_run_vcpu(run, vcpu); 873 r = kvmppc_run_vcpu(run, vcpu);
@@ -1090,9 +1091,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1090 unsigned long *phys; 1091 unsigned long *phys;
1091 1092
1092 /* Allocate a slot_phys array */ 1093 /* Allocate a slot_phys array */
1093 npages = mem->memory_size >> PAGE_SHIFT;
1094 phys = kvm->arch.slot_phys[mem->slot]; 1094 phys = kvm->arch.slot_phys[mem->slot];
1095 if (!phys) { 1095 if (!kvm->arch.using_mmu_notifiers && !phys) {
1096 npages = mem->memory_size >> PAGE_SHIFT;
1096 phys = vzalloc(npages * sizeof(unsigned long)); 1097 phys = vzalloc(npages * sizeof(unsigned long));
1097 if (!phys) 1098 if (!phys)
1098 return -ENOMEM; 1099 return -ENOMEM;
@@ -1298,6 +1299,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
1298 } 1299 }
1299 kvm->arch.lpcr = lpcr; 1300 kvm->arch.lpcr = lpcr;
1300 1301
1302 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
1301 spin_lock_init(&kvm->arch.slot_phys_lock); 1303 spin_lock_init(&kvm->arch.slot_phys_lock);
1302 return 0; 1304 return 0;
1303} 1305}
@@ -1306,8 +1308,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
1306{ 1308{
1307 unsigned long i; 1309 unsigned long i;
1308 1310
1309 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) 1311 if (!kvm->arch.using_mmu_notifiers)
1310 unpin_slot(kvm, i); 1312 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
1313 unpin_slot(kvm, i);
1311 1314
1312 if (kvm->arch.rma) { 1315 if (kvm->arch.rma) {
1313 kvm_release_rma(kvm->arch.rma); 1316 kvm_release_rma(kvm->arch.rma);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index a5176dc37e7e..81d16ed9767d 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -58,7 +58,7 @@ static void *real_vmalloc_addr(void *x)
58 * Add this HPTE into the chain for the real page. 58 * Add this HPTE into the chain for the real page.
59 * Must be called with the chain locked; it unlocks the chain. 59 * Must be called with the chain locked; it unlocks the chain.
60 */ 60 */
61static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 61void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
62 unsigned long *rmap, long pte_index, int realmode) 62 unsigned long *rmap, long pte_index, int realmode)
63{ 63{
64 struct revmap_entry *head, *tail; 64 struct revmap_entry *head, *tail;
@@ -83,6 +83,7 @@ static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
83 smp_wmb(); 83 smp_wmb();
84 *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ 84 *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
85} 85}
86EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
86 87
87/* Remove this HPTE from the chain for a real page */ 88/* Remove this HPTE from the chain for a real page */
88static void remove_revmap_chain(struct kvm *kvm, long pte_index, 89static void remove_revmap_chain(struct kvm *kvm, long pte_index,
@@ -118,12 +119,33 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
118 unlock_rmap(rmap); 119 unlock_rmap(rmap);
119} 120}
120 121
122static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
123 unsigned long *pte_sizep)
124{
125 pte_t *ptep;
126 unsigned long ps = *pte_sizep;
127 unsigned int shift;
128
129 ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
130 if (!ptep)
131 return __pte(0);
132 if (shift)
133 *pte_sizep = 1ul << shift;
134 else
135 *pte_sizep = PAGE_SIZE;
136 if (ps > *pte_sizep)
137 return __pte(0);
138 if (!pte_present(*ptep))
139 return __pte(0);
140 return kvmppc_read_update_linux_pte(ptep);
141}
142
121long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 143long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
122 long pte_index, unsigned long pteh, unsigned long ptel) 144 long pte_index, unsigned long pteh, unsigned long ptel)
123{ 145{
124 struct kvm *kvm = vcpu->kvm; 146 struct kvm *kvm = vcpu->kvm;
125 unsigned long i, pa, gpa, gfn, psize; 147 unsigned long i, pa, gpa, gfn, psize;
126 unsigned long slot_fn; 148 unsigned long slot_fn, hva;
127 unsigned long *hpte; 149 unsigned long *hpte;
128 struct revmap_entry *rev; 150 struct revmap_entry *rev;
129 unsigned long g_ptel = ptel; 151 unsigned long g_ptel = ptel;
@@ -131,6 +153,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
131 unsigned long *physp, pte_size; 153 unsigned long *physp, pte_size;
132 unsigned long is_io; 154 unsigned long is_io;
133 unsigned long *rmap; 155 unsigned long *rmap;
156 pte_t pte;
157 unsigned long mmu_seq;
134 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; 158 bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
135 159
136 psize = hpte_page_size(pteh, ptel); 160 psize = hpte_page_size(pteh, ptel);
@@ -138,11 +162,16 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
138 return H_PARAMETER; 162 return H_PARAMETER;
139 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 163 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
140 164
165 /* used later to detect if we might have been invalidated */
166 mmu_seq = kvm->mmu_notifier_seq;
167 smp_rmb();
168
141 /* Find the memslot (if any) for this address */ 169 /* Find the memslot (if any) for this address */
142 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 170 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
143 gfn = gpa >> PAGE_SHIFT; 171 gfn = gpa >> PAGE_SHIFT;
144 memslot = builtin_gfn_to_memslot(kvm, gfn); 172 memslot = builtin_gfn_to_memslot(kvm, gfn);
145 pa = 0; 173 pa = 0;
174 is_io = ~0ul;
146 rmap = NULL; 175 rmap = NULL;
147 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 176 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
148 /* PPC970 can't do emulated MMIO */ 177 /* PPC970 can't do emulated MMIO */
@@ -160,19 +189,31 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
160 slot_fn = gfn - memslot->base_gfn; 189 slot_fn = gfn - memslot->base_gfn;
161 rmap = &memslot->rmap[slot_fn]; 190 rmap = &memslot->rmap[slot_fn];
162 191
163 physp = kvm->arch.slot_phys[memslot->id]; 192 if (!kvm->arch.using_mmu_notifiers) {
164 if (!physp) 193 physp = kvm->arch.slot_phys[memslot->id];
165 return H_PARAMETER; 194 if (!physp)
166 physp += slot_fn; 195 return H_PARAMETER;
167 if (realmode) 196 physp += slot_fn;
168 physp = real_vmalloc_addr(physp); 197 if (realmode)
169 pa = *physp; 198 physp = real_vmalloc_addr(physp);
170 if (!pa) 199 pa = *physp;
171 return H_TOO_HARD; 200 if (!pa)
172 is_io = pa & (HPTE_R_I | HPTE_R_W); 201 return H_TOO_HARD;
173 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); 202 is_io = pa & (HPTE_R_I | HPTE_R_W);
174 pa &= PAGE_MASK; 203 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
175 204 pa &= PAGE_MASK;
205 } else {
206 /* Translate to host virtual address */
207 hva = gfn_to_hva_memslot(memslot, gfn);
208
209 /* Look up the Linux PTE for the backing page */
210 pte_size = psize;
211 pte = lookup_linux_pte(vcpu, hva, &pte_size);
212 if (pte_present(pte)) {
213 is_io = hpte_cache_bits(pte_val(pte));
214 pa = pte_pfn(pte) << PAGE_SHIFT;
215 }
216 }
176 if (pte_size < psize) 217 if (pte_size < psize)
177 return H_PARAMETER; 218 return H_PARAMETER;
178 if (pa && pte_size > psize) 219 if (pa && pte_size > psize)
@@ -180,10 +221,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
180 221
181 ptel &= ~(HPTE_R_PP0 - psize); 222 ptel &= ~(HPTE_R_PP0 - psize);
182 ptel |= pa; 223 ptel |= pa;
183 pteh |= HPTE_V_VALID; 224
225 if (pa)
226 pteh |= HPTE_V_VALID;
227 else
228 pteh |= HPTE_V_ABSENT;
184 229
185 /* Check WIMG */ 230 /* Check WIMG */
186 if (!hpte_cache_flags_ok(ptel, is_io)) { 231 if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
187 if (is_io) 232 if (is_io)
188 return H_PARAMETER; 233 return H_PARAMETER;
189 /* 234 /*
@@ -194,6 +239,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
194 ptel |= HPTE_R_M; 239 ptel |= HPTE_R_M;
195 } 240 }
196 241
242 /* Find and lock the HPTEG slot to use */
197 do_insert: 243 do_insert:
198 if (pte_index >= HPT_NPTE) 244 if (pte_index >= HPT_NPTE)
199 return H_PARAMETER; 245 return H_PARAMETER;
@@ -253,7 +299,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
253 if (realmode) 299 if (realmode)
254 rmap = real_vmalloc_addr(rmap); 300 rmap = real_vmalloc_addr(rmap);
255 lock_rmap(rmap); 301 lock_rmap(rmap);
256 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, realmode); 302 /* Check for pending invalidations under the rmap chain lock */
303 if (kvm->arch.using_mmu_notifiers &&
304 mmu_notifier_retry(vcpu, mmu_seq)) {
305 /* inval in progress, write a non-present HPTE */
306 pteh |= HPTE_V_ABSENT;
307 pteh &= ~HPTE_V_VALID;
308 unlock_rmap(rmap);
309 } else {
310 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
311 realmode);
312 }
257 } 313 }
258 314
259 hpte[1] = ptel; 315 hpte[1] = ptel;
@@ -516,6 +572,23 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
516 return H_SUCCESS; 572 return H_SUCCESS;
517} 573}
518 574
575void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
576 unsigned long pte_index)
577{
578 unsigned long rb;
579
580 hptep[0] &= ~HPTE_V_VALID;
581 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
582 while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
583 cpu_relax();
584 asm volatile("ptesync" : : : "memory");
585 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
586 : : "r" (rb), "r" (kvm->arch.lpid));
587 asm volatile("ptesync" : : : "memory");
588 kvm->arch.tlbie_lock = 0;
589}
590EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
591
519static int slb_base_page_shift[4] = { 592static int slb_base_page_shift[4] = {
520 24, /* 16M */ 593 24, /* 16M */
521 16, /* 64k */ 594 16, /* 64k */
@@ -605,15 +678,15 @@ EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
605 678
606/* 679/*
607 * Called in real mode to check whether an HPTE not found fault 680 * Called in real mode to check whether an HPTE not found fault
608 * is due to accessing an emulated MMIO page. 681 * is due to accessing a paged-out page or an emulated MMIO page.
609 * Returns a possibly modified status (DSISR) value if not 682 * Returns a possibly modified status (DSISR) value if not
610 * (i.e. pass the interrupt to the guest), 683 * (i.e. pass the interrupt to the guest),
611 * -1 to pass the fault up to host kernel mode code, -2 to do that 684 * -1 to pass the fault up to host kernel mode code, -2 to do that
612 * and also load the instruction word, 685 * and also load the instruction word (for MMIO emulation),
613 * or 0 if we should make the guest retry the access. 686 * or 0 if we should make the guest retry the access.
614 */ 687 */
615long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 688long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
616 unsigned long slb_v, unsigned int status) 689 unsigned long slb_v, unsigned int status, bool data)
617{ 690{
618 struct kvm *kvm = vcpu->kvm; 691 struct kvm *kvm = vcpu->kvm;
619 long int index; 692 long int index;
@@ -624,6 +697,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
624 unsigned long pp, key; 697 unsigned long pp, key;
625 698
626 valid = HPTE_V_VALID | HPTE_V_ABSENT; 699 valid = HPTE_V_VALID | HPTE_V_ABSENT;
700
627 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 701 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
628 if (index < 0) 702 if (index < 0)
629 return status; /* there really was no HPTE */ 703 return status; /* there really was no HPTE */
@@ -645,22 +719,28 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
645 /* Check access permissions to the page */ 719 /* Check access permissions to the page */
646 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 720 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
647 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 721 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
648 if (status & DSISR_ISSTORE) { 722 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
723 if (!data) {
724 if (gr & (HPTE_R_N | HPTE_R_G))
725 return status | SRR1_ISI_N_OR_G;
726 if (!hpte_read_permission(pp, slb_v & key))
727 return status | SRR1_ISI_PROT;
728 } else if (status & DSISR_ISSTORE) {
649 /* check write permission */ 729 /* check write permission */
650 if (!hpte_write_permission(pp, slb_v & key)) 730 if (!hpte_write_permission(pp, slb_v & key))
651 goto protfault; 731 return status | DSISR_PROTFAULT;
652 } else { 732 } else {
653 if (!hpte_read_permission(pp, slb_v & key)) 733 if (!hpte_read_permission(pp, slb_v & key))
654 goto protfault; 734 return status | DSISR_PROTFAULT;
655 } 735 }
656 736
657 /* Check storage key, if applicable */ 737 /* Check storage key, if applicable */
658 if (vcpu->arch.shregs.msr & MSR_DR) { 738 if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
659 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 739 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
660 if (status & DSISR_ISSTORE) 740 if (status & DSISR_ISSTORE)
661 perm >>= 1; 741 perm >>= 1;
662 if (perm & 1) 742 if (perm & 1)
663 return (status & ~DSISR_NOHPTE) | DSISR_KEYFAULT; 743 return status | DSISR_KEYFAULT;
664 } 744 }
665 745
666 /* Save HPTE info for virtual-mode handler */ 746 /* Save HPTE info for virtual-mode handler */
@@ -669,11 +749,11 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
669 vcpu->arch.pgfault_hpte[0] = v; 749 vcpu->arch.pgfault_hpte[0] = v;
670 vcpu->arch.pgfault_hpte[1] = r; 750 vcpu->arch.pgfault_hpte[1] = r;
671 751
672 if (vcpu->arch.shregs.msr & MSR_IR) 752 /* Check the storage key to see if it is possibly emulated MMIO */
753 if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
754 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
755 (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
673 return -2; /* MMIO emulation - load instr word */ 756 return -2; /* MMIO emulation - load instr word */
674 757
675 return -1; /* send fault up to host kernel mode */ 758 return -1; /* send fault up to host kernel mode */
676
677 protfault:
678 return (status & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
679} 759}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index d07b64d5f37e..7d4990665d00 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -621,6 +621,8 @@ BEGIN_FTR_SECTION
621 /* If this is a page table miss then see if it's theirs or ours */ 621 /* If this is a page table miss then see if it's theirs or ours */
622 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 622 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
623 beq kvmppc_hdsi 623 beq kvmppc_hdsi
624 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
625 beq kvmppc_hisi
624END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 626END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
625 627
626 /* See if this is a leftover HDEC interrupt */ 628 /* See if this is a leftover HDEC interrupt */
@@ -1125,6 +1127,7 @@ kvmppc_hdsi:
1125 1127
1126 /* Search the hash table. */ 1128 /* Search the hash table. */
1127 mr r3, r9 /* vcpu pointer */ 1129 mr r3, r9 /* vcpu pointer */
1130 li r7, 1 /* data fault */
1128 bl .kvmppc_hpte_hv_fault 1131 bl .kvmppc_hpte_hv_fault
1129 ld r9, HSTATE_KVM_VCPU(r13) 1132 ld r9, HSTATE_KVM_VCPU(r13)
1130 ld r10, VCPU_PC(r9) 1133 ld r10, VCPU_PC(r9)
@@ -1182,6 +1185,52 @@ kvmppc_hdsi:
1182 b nohpte_cont 1185 b nohpte_cont
1183 1186
1184/* 1187/*
1188 * Similarly for an HISI, reflect it to the guest as an ISI unless
1189 * it is an HPTE not found fault for a page that we have paged out.
1190 */
1191kvmppc_hisi:
1192 andis. r0, r11, SRR1_ISI_NOPT@h
1193 beq 1f
1194 andi. r0, r11, MSR_IR /* instruction relocation enabled? */
1195 beq 3f
1196 clrrdi r0, r10, 28
1197 PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
1198 bne 1f /* if no SLB entry found */
11994:
1200 /* Search the hash table. */
1201 mr r3, r9 /* vcpu pointer */
1202 mr r4, r10
1203 mr r6, r11
1204 li r7, 0 /* instruction fault */
1205 bl .kvmppc_hpte_hv_fault
1206 ld r9, HSTATE_KVM_VCPU(r13)
1207 ld r10, VCPU_PC(r9)
1208 ld r11, VCPU_MSR(r9)
1209 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1210 cmpdi r3, 0 /* retry the instruction */
1211 beq 6f
1212 cmpdi r3, -1 /* handle in kernel mode */
1213 beq nohpte_cont
1214
1215 /* Synthesize an ISI for the guest */
1216 mr r11, r3
12171: mtspr SPRN_SRR0, r10
1218 mtspr SPRN_SRR1, r11
1219 li r10, BOOK3S_INTERRUPT_INST_STORAGE
1220 li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
1221 rotldi r11, r11, 63
12226: ld r7, VCPU_CTR(r9)
1223 lwz r8, VCPU_XER(r9)
1224 mtctr r7
1225 mtxer r8
1226 mr r4, r9
1227 b fast_guest_return
1228
12293: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
1230 ld r5, KVM_VRMA_SLB_V(r6)
1231 b 4b
1232
1233/*
1185 * Try to handle an hcall in real mode. 1234 * Try to handle an hcall in real mode.
1186 * Returns to the guest if we handle it, or continues on up to 1235 * Returns to the guest if we handle it, or continues on up to
1187 * the kernel if we can't (i.e. if we don't have a handler for 1236 * the kernel if we can't (i.e. if we don't have a handler for
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4f85ac32258a..06e955b5837e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -245,6 +245,9 @@ int kvm_dev_ioctl_check_extension(long ext)
245 if (cpu_has_feature(CPU_FTR_ARCH_201)) 245 if (cpu_has_feature(CPU_FTR_ARCH_201))
246 r = 2; 246 r = 2;
247 break; 247 break;
248 case KVM_CAP_SYNC_MMU:
249 r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
250 break;
248#endif 251#endif
249 default: 252 default:
250 r = 0; 253 r = 0;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a8b3cc7d90fe..f348c3d90404 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -12,6 +12,7 @@
12#include <linux/io.h> 12#include <linux/io.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/hugetlb.h> 14#include <linux/hugetlb.h>
15#include <linux/export.h>
15#include <linux/of_fdt.h> 16#include <linux/of_fdt.h>
16#include <linux/memblock.h> 17#include <linux/memblock.h>
17#include <linux/bootmem.h> 18#include <linux/bootmem.h>
@@ -103,6 +104,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
103 *shift = hugepd_shift(*hpdp); 104 *shift = hugepd_shift(*hpdp);
104 return hugepte_offset(hpdp, ea, pdshift); 105 return hugepte_offset(hpdp, ea, pdshift);
105} 106}
107EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
106 108
107pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 109pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
108{ 110{