aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2013-11-16 01:46:02 -0500
committerAlexander Graf <agraf@suse.de>2013-11-18 16:36:09 -0500
commitcaaa4c804fae7bb654f7d00b35b8583280a9c52c (patch)
tree1017d018bedf69d6e3dab27660fcc8ad4ac82f12 /arch
parentf080480488028bcc25357f85e8ae54ccc3bb7173 (diff)
KVM: PPC: Book3S HV: Fix physical address calculations
This fixes a bug in kvmppc_do_h_enter() where the physical address for a page can be calculated incorrectly if transparent huge pages (THP) are active. Until THP came along, it was true that if we encountered a large (16M) page in kvmppc_do_h_enter(), then the associated memslot must be 16M aligned for both its guest physical address and the userspace address, and the physical address calculations in kvmppc_do_h_enter() assumed that. With THP, that is no longer true. In the case where we are using MMU notifiers and the page size that we get from the Linux page tables is larger than the page being mapped by the guest, we need to fill in some low-order bits of the physical address. Without THP, these bits would be the same in the guest physical address (gpa) and the host virtual address (hva). With THP, they can be different, and we need to use the bits from hva rather than gpa. In the case where we are not using MMU notifiers, the host physical address we get from the memslot->arch.slot_phys[] array already includes the low-order bits down to the PAGE_SIZE level, even if we are using large pages. Thus we can simplify the calculation in this case to just add in the remaining bits in the case where PAGE_SIZE is 64k and the guest is mapping a 4k page. The same bug exists in kvmppc_book3s_hv_page_fault(). The basic fix is to use psize (the page size from the HPTE) rather than pte_size (the page size from the Linux PTE) when updating the HPTE low word in r. That means that pfn needs to be computed to PAGE_SIZE granularity even if the Linux PTE is a huge page PTE. That can be arranged simply by doing the page_to_pfn() before setting page to the head of the compound page. If psize is less than PAGE_SIZE, then we need to make sure we only update the bits from PAGE_SIZE upwards, in order not to lose any sub-page offset bits in r. On the other hand, if psize is greater than PAGE_SIZE, we need to make sure we don't bring in non-zero low order bits in pfn, hence we mask (pfn << PAGE_SHIFT) with ~(psize - 1). Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c4
2 files changed, 11 insertions, 5 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f3ff587a8b7d..47bbeaf2d320 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -665,6 +665,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
665 return -EFAULT; 665 return -EFAULT;
666 } else { 666 } else {
667 page = pages[0]; 667 page = pages[0];
668 pfn = page_to_pfn(page);
668 if (PageHuge(page)) { 669 if (PageHuge(page)) {
669 page = compound_head(page); 670 page = compound_head(page);
670 pte_size <<= compound_order(page); 671 pte_size <<= compound_order(page);
@@ -689,7 +690,6 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
689 } 690 }
690 rcu_read_unlock_sched(); 691 rcu_read_unlock_sched();
691 } 692 }
692 pfn = page_to_pfn(page);
693 } 693 }
694 694
695 ret = -EFAULT; 695 ret = -EFAULT;
@@ -707,8 +707,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
707 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; 707 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
708 } 708 }
709 709
710 /* Set the HPTE to point to pfn */ 710 /*
711 r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); 711 * Set the HPTE to point to pfn.
712 * Since the pfn is at PAGE_SIZE granularity, make sure we
713 * don't mask out lower-order bits if psize < PAGE_SIZE.
714 */
715 if (psize < PAGE_SIZE)
716 psize = PAGE_SIZE;
717 r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1));
712 if (hpte_is_writable(r) && !write_ok) 718 if (hpte_is_writable(r) && !write_ok)
713 r = hpte_make_readonly(r); 719 r = hpte_make_readonly(r);
714 ret = RESUME_GUEST; 720 ret = RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 9c515440ad1a..fddbf989f37e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -225,6 +225,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
225 is_io = pa & (HPTE_R_I | HPTE_R_W); 225 is_io = pa & (HPTE_R_I | HPTE_R_W);
226 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); 226 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
227 pa &= PAGE_MASK; 227 pa &= PAGE_MASK;
228 pa |= gpa & ~PAGE_MASK;
228 } else { 229 } else {
229 /* Translate to host virtual address */ 230 /* Translate to host virtual address */
230 hva = __gfn_to_hva_memslot(memslot, gfn); 231 hva = __gfn_to_hva_memslot(memslot, gfn);
@@ -238,13 +239,12 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
238 ptel = hpte_make_readonly(ptel); 239 ptel = hpte_make_readonly(ptel);
239 is_io = hpte_cache_bits(pte_val(pte)); 240 is_io = hpte_cache_bits(pte_val(pte));
240 pa = pte_pfn(pte) << PAGE_SHIFT; 241 pa = pte_pfn(pte) << PAGE_SHIFT;
242 pa |= hva & (pte_size - 1);
241 } 243 }
242 } 244 }
243 245
244 if (pte_size < psize) 246 if (pte_size < psize)
245 return H_PARAMETER; 247 return H_PARAMETER;
246 if (pa && pte_size > psize)
247 pa |= gpa & (pte_size - 1);
248 248
249 ptel &= ~(HPTE_R_PP0 - psize); 249 ptel &= ~(HPTE_R_PP0 - psize);
250 ptel |= pa; 250 ptel |= pa;