aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2018-03-29 14:20:13 -0400
committerRadim Krčmář <rkrcmar@redhat.com>2018-03-29 14:20:13 -0400
commit27aa8962817afdf7754e719a8a072584a747aff3 (patch)
treefde459201bb59f0934bc64b6005bd764dfac96ff
parentf497b6c25d0f62ca0a2ef3b5612d8a2e022f54a4 (diff)
parent31c8b0d0694a1f7e3b46df0d1341a874ecb5e0de (diff)
Merge tag 'kvm-ppc-next-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
KVM PPC update for 4.17 - Improvements for the radix page fault handler for HV KVM on POWER9.
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h1
-rw-r--r--arch/powerpc/kvm/book3s.c6
-rw-r--r--arch/powerpc/kvm/book3s.h1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c9
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c384
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c18
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S10
-rw-r--r--arch/powerpc/kvm/book3s_pr.c10
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/kvm/trace_pr.h15
13 files changed, 261 insertions, 202 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1f53b562726f..6b69d7999381 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -60,7 +60,6 @@
60 60
61#define KVM_ARCH_WANT_MMU_NOTIFIER 61#define KVM_ARCH_WANT_MMU_NOTIFIER
62 62
63extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
64extern int kvm_unmap_hva_range(struct kvm *kvm, 63extern int kvm_unmap_hva_range(struct kvm *kvm,
65 unsigned long start, unsigned long end); 64 unsigned long start, unsigned long end);
66extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); 65extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 7765a800ddae..23cfaef9544e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -295,7 +295,6 @@ struct kvmppc_ops {
295 const struct kvm_userspace_memory_region *mem, 295 const struct kvm_userspace_memory_region *mem,
296 const struct kvm_memory_slot *old, 296 const struct kvm_memory_slot *old,
297 const struct kvm_memory_slot *new); 297 const struct kvm_memory_slot *new);
298 int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
299 int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, 298 int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
300 unsigned long end); 299 unsigned long end);
301 int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); 300 int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 234531d1bee1..97d4a112648f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -819,12 +819,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
819 kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); 819 kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
820} 820}
821 821
822int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
823{
824 return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
825}
826EXPORT_SYMBOL_GPL(kvm_unmap_hva);
827
828int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 822int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
829{ 823{
830 return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); 824 return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index d2b3ec088b8c..4ad5e287b8bc 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -14,7 +14,6 @@
14 14
15extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 15extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
16 struct kvm_memory_slot *memslot); 16 struct kvm_memory_slot *memslot);
17extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
18extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, 17extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
19 unsigned long end); 18 unsigned long end);
20extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, 19extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ef243fed2f2b..a670fa5fbe50 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -877,15 +877,6 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
877 return 0; 877 return 0;
878} 878}
879 879
880int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
881{
882 hva_handler_fn handler;
883
884 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
885 kvm_handle_hva(kvm, hva, handler);
886 return 0;
887}
888
889int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) 880int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
890{ 881{
891 hva_handler_fn handler; 882 hva_handler_fn handler;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 0c854816e653..0590f1667607 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
150{ 150{
151 int psize = MMU_BASE_PSIZE; 151 int psize = MMU_BASE_PSIZE;
152 152
153 if (pshift >= PMD_SHIFT) 153 if (pshift >= PUD_SHIFT)
154 psize = MMU_PAGE_1G;
155 else if (pshift >= PMD_SHIFT)
154 psize = MMU_PAGE_2M; 156 psize = MMU_PAGE_2M;
155 addr &= ~0xfffUL; 157 addr &= ~0xfffUL;
156 addr |= mmu_psize_defs[psize].ap << 5; 158 addr |= mmu_psize_defs[psize].ap << 5;
@@ -160,6 +162,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
160 asm volatile("ptesync": : :"memory"); 162 asm volatile("ptesync": : :"memory");
161} 163}
162 164
165static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
166{
167 unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
168
169 asm volatile("ptesync": : :"memory");
170 /* RIC=1 PRS=0 R=1 IS=2 */
171 asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
172 : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
173 asm volatile("ptesync": : :"memory");
174}
175
163unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, 176unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
164 unsigned long clr, unsigned long set, 177 unsigned long clr, unsigned long set,
165 unsigned long addr, unsigned int shift) 178 unsigned long addr, unsigned int shift)
@@ -195,6 +208,12 @@ static void kvmppc_pte_free(pte_t *ptep)
195 kmem_cache_free(kvm_pte_cache, ptep); 208 kmem_cache_free(kvm_pte_cache, ptep);
196} 209}
197 210
211/* Like pmd_huge() and pmd_large(), but works regardless of config options */
212static inline int pmd_is_leaf(pmd_t pmd)
213{
214 return !!(pmd_val(pmd) & _PAGE_PTE);
215}
216
198static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, 217static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
199 unsigned int level, unsigned long mmu_seq) 218 unsigned int level, unsigned long mmu_seq)
200{ 219{
@@ -214,12 +233,12 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
214 new_pud = pud_alloc_one(kvm->mm, gpa); 233 new_pud = pud_alloc_one(kvm->mm, gpa);
215 234
216 pmd = NULL; 235 pmd = NULL;
217 if (pud && pud_present(*pud)) 236 if (pud && pud_present(*pud) && !pud_huge(*pud))
218 pmd = pmd_offset(pud, gpa); 237 pmd = pmd_offset(pud, gpa);
219 else 238 else if (level <= 1)
220 new_pmd = pmd_alloc_one(kvm->mm, gpa); 239 new_pmd = pmd_alloc_one(kvm->mm, gpa);
221 240
222 if (level == 0 && !(pmd && pmd_present(*pmd))) 241 if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
223 new_ptep = kvmppc_pte_alloc(); 242 new_ptep = kvmppc_pte_alloc();
224 243
225 /* Check if we might have been invalidated; let the guest retry if so */ 244 /* Check if we might have been invalidated; let the guest retry if so */
@@ -237,6 +256,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
237 new_pud = NULL; 256 new_pud = NULL;
238 } 257 }
239 pud = pud_offset(pgd, gpa); 258 pud = pud_offset(pgd, gpa);
259 if (pud_huge(*pud)) {
260 unsigned long hgpa = gpa & PUD_MASK;
261
262 /*
263 * If we raced with another CPU which has just put
264 * a 1GB pte in after we saw a pmd page, try again.
265 */
266 if (level <= 1 && !new_pmd) {
267 ret = -EAGAIN;
268 goto out_unlock;
269 }
270 /* Check if we raced and someone else has set the same thing */
271 if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
272 ret = 0;
273 goto out_unlock;
274 }
275 /* Valid 1GB page here already, remove it */
276 old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
277 ~0UL, 0, hgpa, PUD_SHIFT);
278 kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
279 if (old & _PAGE_DIRTY) {
280 unsigned long gfn = hgpa >> PAGE_SHIFT;
281 struct kvm_memory_slot *memslot;
282 memslot = gfn_to_memslot(kvm, gfn);
283 if (memslot && memslot->dirty_bitmap)
284 kvmppc_update_dirty_map(memslot,
285 gfn, PUD_SIZE);
286 }
287 }
288 if (level == 2) {
289 if (!pud_none(*pud)) {
290 /*
291 * There's a page table page here, but we wanted to
292 * install a large page, so remove and free the page
293 * table page. new_pmd will be NULL since level == 2.
294 */
295 new_pmd = pmd_offset(pud, 0);
296 pud_clear(pud);
297 kvmppc_radix_flush_pwc(kvm, gpa);
298 }
299 kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
300 ret = 0;
301 goto out_unlock;
302 }
240 if (pud_none(*pud)) { 303 if (pud_none(*pud)) {
241 if (!new_pmd) 304 if (!new_pmd)
242 goto out_unlock; 305 goto out_unlock;
@@ -244,40 +307,71 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
244 new_pmd = NULL; 307 new_pmd = NULL;
245 } 308 }
246 pmd = pmd_offset(pud, gpa); 309 pmd = pmd_offset(pud, gpa);
247 if (pmd_large(*pmd)) { 310 if (pmd_is_leaf(*pmd)) {
248 /* Someone else has instantiated a large page here; retry */ 311 unsigned long lgpa = gpa & PMD_MASK;
249 ret = -EAGAIN; 312
250 goto out_unlock;
251 }
252 if (level == 1 && !pmd_none(*pmd)) {
253 /* 313 /*
254 * There's a page table page here, but we wanted 314 * If we raced with another CPU which has just put
255 * to install a large page. Tell the caller and let 315 * a 2MB pte in after we saw a pte page, try again.
256 * it try installing a normal page if it wants.
257 */ 316 */
258 ret = -EBUSY; 317 if (level == 0 && !new_ptep) {
259 goto out_unlock; 318 ret = -EAGAIN;
260 } 319 goto out_unlock;
261 if (level == 0) {
262 if (pmd_none(*pmd)) {
263 if (!new_ptep)
264 goto out_unlock;
265 pmd_populate(kvm->mm, pmd, new_ptep);
266 new_ptep = NULL;
267 } 320 }
268 ptep = pte_offset_kernel(pmd, gpa); 321 /* Check if we raced and someone else has set the same thing */
269 if (pte_present(*ptep)) { 322 if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
270 /* PTE was previously valid, so invalidate it */ 323 ret = 0;
271 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 324 goto out_unlock;
272 0, gpa, 0); 325 }
273 kvmppc_radix_tlbie_page(kvm, gpa, 0); 326 /* Valid 2MB page here already, remove it */
274 if (old & _PAGE_DIRTY) 327 old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
275 mark_page_dirty(kvm, gpa >> PAGE_SHIFT); 328 ~0UL, 0, lgpa, PMD_SHIFT);
329 kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
330 if (old & _PAGE_DIRTY) {
331 unsigned long gfn = lgpa >> PAGE_SHIFT;
332 struct kvm_memory_slot *memslot;
333 memslot = gfn_to_memslot(kvm, gfn);
334 if (memslot && memslot->dirty_bitmap)
335 kvmppc_update_dirty_map(memslot,
336 gfn, PMD_SIZE);
337 }
338 }
339 if (level == 1) {
340 if (!pmd_none(*pmd)) {
341 /*
342 * There's a page table page here, but we wanted to
343 * install a large page, so remove and free the page
344 * table page. new_ptep will be NULL since level == 1.
345 */
346 new_ptep = pte_offset_kernel(pmd, 0);
347 pmd_clear(pmd);
348 kvmppc_radix_flush_pwc(kvm, gpa);
276 } 349 }
277 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
278 } else {
279 kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); 350 kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
351 ret = 0;
352 goto out_unlock;
280 } 353 }
354 if (pmd_none(*pmd)) {
355 if (!new_ptep)
356 goto out_unlock;
357 pmd_populate(kvm->mm, pmd, new_ptep);
358 new_ptep = NULL;
359 }
360 ptep = pte_offset_kernel(pmd, gpa);
361 if (pte_present(*ptep)) {
362 /* Check if someone else set the same thing */
363 if (pte_raw(*ptep) == pte_raw(pte)) {
364 ret = 0;
365 goto out_unlock;
366 }
367 /* PTE was previously valid, so invalidate it */
368 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
369 0, gpa, 0);
370 kvmppc_radix_tlbie_page(kvm, gpa, 0);
371 if (old & _PAGE_DIRTY)
372 mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
373 }
374 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
281 ret = 0; 375 ret = 0;
282 376
283 out_unlock: 377 out_unlock:
@@ -298,11 +392,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
298 unsigned long mmu_seq, pte_size; 392 unsigned long mmu_seq, pte_size;
299 unsigned long gpa, gfn, hva, pfn; 393 unsigned long gpa, gfn, hva, pfn;
300 struct kvm_memory_slot *memslot; 394 struct kvm_memory_slot *memslot;
301 struct page *page = NULL, *pages[1]; 395 struct page *page = NULL;
302 long ret, npages, ok; 396 long ret;
303 unsigned int writing; 397 bool writing;
304 struct vm_area_struct *vma; 398 bool upgrade_write = false;
305 unsigned long flags; 399 bool *upgrade_p = &upgrade_write;
306 pte_t pte, *ptep; 400 pte_t pte, *ptep;
307 unsigned long pgflags; 401 unsigned long pgflags;
308 unsigned int shift, level; 402 unsigned int shift, level;
@@ -342,135 +436,137 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
342 dsisr & DSISR_ISSTORE); 436 dsisr & DSISR_ISSTORE);
343 } 437 }
344 438
345 /* used to check for invalidations in progress */
346 mmu_seq = kvm->mmu_notifier_seq;
347 smp_rmb();
348
349 writing = (dsisr & DSISR_ISSTORE) != 0; 439 writing = (dsisr & DSISR_ISSTORE) != 0;
350 hva = gfn_to_hva_memslot(memslot, gfn); 440 if (memslot->flags & KVM_MEM_READONLY) {
441 if (writing) {
442 /* give the guest a DSI */
443 dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
444 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
445 return RESUME_GUEST;
446 }
447 upgrade_p = NULL;
448 }
449
351 if (dsisr & DSISR_SET_RC) { 450 if (dsisr & DSISR_SET_RC) {
352 /* 451 /*
353 * Need to set an R or C bit in the 2nd-level tables; 452 * Need to set an R or C bit in the 2nd-level tables;
354 * if the relevant bits aren't already set in the linux 453 * since we are just helping out the hardware here,
355 * page tables, fall through to do the gup_fast to 454 * it is sufficient to do what the hardware does.
356 * set them in the linux page tables too.
357 */ 455 */
358 ok = 0;
359 pgflags = _PAGE_ACCESSED; 456 pgflags = _PAGE_ACCESSED;
360 if (writing) 457 if (writing)
361 pgflags |= _PAGE_DIRTY; 458 pgflags |= _PAGE_DIRTY;
362 local_irq_save(flags); 459 /*
363 ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); 460 * We are walking the secondary page table here. We can do this
364 if (ptep) { 461 * without disabling irq.
365 pte = READ_ONCE(*ptep); 462 */
366 if (pte_present(pte) && 463 spin_lock(&kvm->mmu_lock);
367 (pte_val(pte) & pgflags) == pgflags) 464 ptep = __find_linux_pte(kvm->arch.pgtable,
368 ok = 1; 465 gpa, NULL, &shift);
369 } 466 if (ptep && pte_present(*ptep) &&
370 local_irq_restore(flags); 467 (!writing || pte_write(*ptep))) {
371 if (ok) { 468 kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
372 spin_lock(&kvm->mmu_lock); 469 gpa, shift);
373 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { 470 dsisr &= ~DSISR_SET_RC;
374 spin_unlock(&kvm->mmu_lock);
375 return RESUME_GUEST;
376 }
377 /*
378 * We are walking the secondary page table here. We can do this
379 * without disabling irq.
380 */
381 ptep = __find_linux_pte(kvm->arch.pgtable,
382 gpa, NULL, &shift);
383 if (ptep && pte_present(*ptep)) {
384 kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
385 gpa, shift);
386 spin_unlock(&kvm->mmu_lock);
387 return RESUME_GUEST;
388 }
389 spin_unlock(&kvm->mmu_lock);
390 } 471 }
472 spin_unlock(&kvm->mmu_lock);
473 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
474 DSISR_PROTFAULT | DSISR_SET_RC)))
475 return RESUME_GUEST;
391 } 476 }
392 477
393 ret = -EFAULT; 478 /* used to check for invalidations in progress */
394 pfn = 0; 479 mmu_seq = kvm->mmu_notifier_seq;
395 pte_size = PAGE_SIZE; 480 smp_rmb();
396 pgflags = _PAGE_READ | _PAGE_EXEC; 481
397 level = 0; 482 /*
398 npages = get_user_pages_fast(hva, 1, writing, pages); 483 * Do a fast check first, since __gfn_to_pfn_memslot doesn't
399 if (npages < 1) { 484 * do it with !atomic && !async, which is how we call it.
400 /* Check if it's an I/O mapping */ 485 * We always ask for write permission since the common case
401 down_read(&current->mm->mmap_sem); 486 * is that the page is writable.
402 vma = find_vma(current->mm, hva); 487 */
403 if (vma && vma->vm_start <= hva && hva < vma->vm_end && 488 hva = gfn_to_hva_memslot(memslot, gfn);
404 (vma->vm_flags & VM_PFNMAP)) { 489 if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
405 pfn = vma->vm_pgoff +
406 ((hva - vma->vm_start) >> PAGE_SHIFT);
407 pgflags = pgprot_val(vma->vm_page_prot);
408 }
409 up_read(&current->mm->mmap_sem);
410 if (!pfn)
411 return -EFAULT;
412 } else {
413 page = pages[0];
414 pfn = page_to_pfn(page); 490 pfn = page_to_pfn(page);
415 if (PageHuge(page)) { 491 upgrade_write = true;
416 page = compound_head(page); 492 } else {
417 pte_size <<= compound_order(page); 493 /* Call KVM generic code to do the slow-path check */
418 /* See if we can insert a 2MB large-page PTE here */ 494 pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
419 if (pte_size >= PMD_SIZE && 495 writing, upgrade_p);
420 (gpa & PMD_MASK & PAGE_MASK) == 496 if (is_error_noslot_pfn(pfn))
421 (hva & PMD_MASK & PAGE_MASK)) { 497 return -EFAULT;
422 level = 1; 498 page = NULL;
423 pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); 499 if (pfn_valid(pfn)) {
424 } 500 page = pfn_to_page(pfn);
501 if (PageReserved(page))
502 page = NULL;
425 } 503 }
426 /* See if we can provide write access */ 504 }
427 if (writing) { 505
428 /* 506 /* See if we can insert a 1GB or 2MB large PTE here */
429 * We assume gup_fast has set dirty on the host PTE. 507 level = 0;
430 */ 508 if (page && PageCompound(page)) {
431 pgflags |= _PAGE_WRITE; 509 pte_size = PAGE_SIZE << compound_order(compound_head(page));
432 } else { 510 if (pte_size >= PUD_SIZE &&
433 local_irq_save(flags); 511 (gpa & (PUD_SIZE - PAGE_SIZE)) ==
434 ptep = find_current_mm_pte(current->mm->pgd, 512 (hva & (PUD_SIZE - PAGE_SIZE))) {
435 hva, NULL, NULL); 513 level = 2;
436 if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) 514 pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
437 pgflags |= _PAGE_WRITE; 515 } else if (pte_size >= PMD_SIZE &&
438 local_irq_restore(flags); 516 (gpa & (PMD_SIZE - PAGE_SIZE)) ==
517 (hva & (PMD_SIZE - PAGE_SIZE))) {
518 level = 1;
519 pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
439 } 520 }
440 } 521 }
441 522
442 /* 523 /*
443 * Compute the PTE value that we need to insert. 524 * Compute the PTE value that we need to insert.
444 */ 525 */
445 pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED; 526 if (page) {
446 if (pgflags & _PAGE_WRITE) 527 pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
447 pgflags |= _PAGE_DIRTY; 528 _PAGE_ACCESSED;
448 pte = pfn_pte(pfn, __pgprot(pgflags)); 529 if (writing || upgrade_write)
449 530 pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
450 /* Allocate space in the tree and write the PTE */ 531 pte = pfn_pte(pfn, __pgprot(pgflags));
451 ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); 532 } else {
452 if (ret == -EBUSY) {
453 /* 533 /*
454 * There's already a PMD where wanted to install a large page; 534 * Read the PTE from the process' radix tree and use that
455 * for now, fall back to installing a small page. 535 * so we get the attribute bits.
456 */ 536 */
457 level = 0; 537 local_irq_disable();
458 pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1); 538 ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
459 pte = pfn_pte(pfn, __pgprot(pgflags)); 539 pte = *ptep;
460 ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); 540 local_irq_enable();
541 if (shift == PUD_SHIFT &&
542 (gpa & (PUD_SIZE - PAGE_SIZE)) ==
543 (hva & (PUD_SIZE - PAGE_SIZE))) {
544 level = 2;
545 } else if (shift == PMD_SHIFT &&
546 (gpa & (PMD_SIZE - PAGE_SIZE)) ==
547 (hva & (PMD_SIZE - PAGE_SIZE))) {
548 level = 1;
549 } else if (shift && shift != PAGE_SHIFT) {
550 /* Adjust PFN */
551 unsigned long mask = (1ul << shift) - PAGE_SIZE;
552 pte = __pte(pte_val(pte) | (hva & mask));
553 }
554 if (!(writing || upgrade_write))
555 pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
556 pte = __pte(pte_val(pte) | _PAGE_EXEC);
461 } 557 }
462 if (ret == 0 || ret == -EAGAIN) 558
463 ret = RESUME_GUEST; 559 /* Allocate space in the tree and write the PTE */
560 ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
464 561
465 if (page) { 562 if (page) {
466 /* 563 if (!ret && (pte_val(pte) & _PAGE_WRITE))
467 * We drop pages[0] here, not page because page might 564 set_page_dirty_lock(page);
468 * have been set to the head page of a compound, but 565 put_page(page);
469 * we have to drop the reference on the correct tail
470 * page to match the get inside gup()
471 */
472 put_page(pages[0]);
473 } 566 }
567
568 if (ret == 0 || ret == -EAGAIN)
569 ret = RESUME_GUEST;
474 return ret; 570 return ret;
475} 571}
476 572
@@ -642,9 +738,13 @@ void kvmppc_free_radix(struct kvm *kvm)
642 for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { 738 for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
643 if (!pud_present(*pud)) 739 if (!pud_present(*pud))
644 continue; 740 continue;
741 if (pud_huge(*pud)) {
742 pud_clear(pud);
743 continue;
744 }
645 pmd = pmd_offset(pud, 0); 745 pmd = pmd_offset(pud, 0);
646 for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { 746 for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
647 if (pmd_huge(*pmd)) { 747 if (pmd_is_leaf(*pmd)) {
648 pmd_clear(pmd); 748 pmd_clear(pmd);
649 continue; 749 continue;
650 } 750 }
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index c32e9bfe75b1..6651f736a0b1 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -450,7 +450,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
450 450
451 /* 451 /*
452 * Synchronize with the MMU notifier callbacks in 452 * Synchronize with the MMU notifier callbacks in
453 * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). 453 * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.).
454 * While we have the rmap lock, code running on other CPUs 454 * While we have the rmap lock, code running on other CPUs
455 * cannot finish unmapping the host real page that backs 455 * cannot finish unmapping the host real page that backs
456 * this guest real page, so we are OK to access the host 456 * this guest real page, so we are OK to access the host
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 89707354c2ef..4863ab81f663 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2885 */ 2885 */
2886 trace_hardirqs_on(); 2886 trace_hardirqs_on();
2887 2887
2888 guest_enter(); 2888 guest_enter_irqoff();
2889 2889
2890 srcu_idx = srcu_read_lock(&vc->kvm->srcu); 2890 srcu_idx = srcu_read_lock(&vc->kvm->srcu);
2891 2891
@@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2893 2893
2894 srcu_read_unlock(&vc->kvm->srcu, srcu_idx); 2894 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2895 2895
2896 guest_exit();
2897
2898 trace_hardirqs_off(); 2896 trace_hardirqs_off();
2899 set_irq_happened(trap); 2897 set_irq_happened(trap);
2900 2898
@@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2937 kvmppc_set_host_core(pcpu); 2935 kvmppc_set_host_core(pcpu);
2938 2936
2939 local_irq_enable(); 2937 local_irq_enable();
2938 guest_exit();
2940 2939
2941 /* Let secondaries go back to the offline loop */ 2940 /* Let secondaries go back to the offline loop */
2942 for (i = 0; i < controlled_threads; ++i) { 2941 for (i = 0; i < controlled_threads; ++i) {
@@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3656 goto up_out; 3655 goto up_out;
3657 3656
3658 psize = vma_kernel_pagesize(vma); 3657 psize = vma_kernel_pagesize(vma);
3659 porder = __ilog2(psize);
3660 3658
3661 up_read(&current->mm->mmap_sem); 3659 up_read(&current->mm->mmap_sem);
3662 3660
3663 /* We can handle 4k, 64k or 16M pages in the VRMA */ 3661 /* We can handle 4k, 64k or 16M pages in the VRMA */
3664 err = -EINVAL; 3662 if (psize >= 0x1000000)
3665 if (!(psize == 0x1000 || psize == 0x10000 || 3663 psize = 0x1000000;
3666 psize == 0x1000000)) 3664 else if (psize >= 0x10000)
3667 goto out_srcu; 3665 psize = 0x10000;
3666 else
3667 psize = 0x1000;
3668 porder = __ilog2(psize);
3668 3669
3669 senc = slb_pgsize_encoding(psize); 3670 senc = slb_pgsize_encoding(psize);
3670 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 3671 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
@@ -4350,7 +4351,6 @@ static struct kvmppc_ops kvm_ops_hv = {
4350 .flush_memslot = kvmppc_core_flush_memslot_hv, 4351 .flush_memslot = kvmppc_core_flush_memslot_hv,
4351 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, 4352 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
4352 .commit_memory_region = kvmppc_core_commit_memory_region_hv, 4353 .commit_memory_region = kvmppc_core_commit_memory_region_hv,
4353 .unmap_hva = kvm_unmap_hva_hv,
4354 .unmap_hva_range = kvm_unmap_hva_range_hv, 4354 .unmap_hva_range = kvm_unmap_hva_range_hv,
4355 .age_hva = kvm_age_hva_hv, 4355 .age_hva = kvm_age_hva_hv,
4356 .test_age_hva = kvm_test_age_hva_hv, 4356 .test_age_hva = kvm_test_age_hva_hv,
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f31f357b8c5a..d33264697a31 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -320,7 +320,6 @@ kvm_novcpu_exit:
320 stw r12, STACK_SLOT_TRAP(r1) 320 stw r12, STACK_SLOT_TRAP(r1)
321 bl kvmhv_commence_exit 321 bl kvmhv_commence_exit
322 nop 322 nop
323 lwz r12, STACK_SLOT_TRAP(r1)
324 b kvmhv_switch_to_host 323 b kvmhv_switch_to_host
325 324
326/* 325/*
@@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1220 1219
1221secondary_too_late: 1220secondary_too_late:
1222 li r12, 0 1221 li r12, 0
1222 stw r12, STACK_SLOT_TRAP(r1)
1223 cmpdi r4, 0 1223 cmpdi r4, 0
1224 beq 11f 1224 beq 11f
1225 stw r12, VCPU_TRAP(r4) 1225 stw r12, VCPU_TRAP(r4)
@@ -1558,12 +1558,12 @@ mc_cont:
15583: stw r5,VCPU_SLB_MAX(r9) 15583: stw r5,VCPU_SLB_MAX(r9)
1559 1559
1560guest_bypass: 1560guest_bypass:
1561 stw r12, STACK_SLOT_TRAP(r1)
1561 mr r3, r12 1562 mr r3, r12
1562 /* Increment exit count, poke other threads to exit */ 1563 /* Increment exit count, poke other threads to exit */
1563 bl kvmhv_commence_exit 1564 bl kvmhv_commence_exit
1564 nop 1565 nop
1565 ld r9, HSTATE_KVM_VCPU(r13) 1566 ld r9, HSTATE_KVM_VCPU(r13)
1566 lwz r12, VCPU_TRAP(r9)
1567 1567
1568 /* Stop others sending VCPU interrupts to this physical CPU */ 1568 /* Stop others sending VCPU interrupts to this physical CPU */
1569 li r0, -1 1569 li r0, -1
@@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
1898 * POWER7/POWER8 guest -> host partition switch code. 1898 * POWER7/POWER8 guest -> host partition switch code.
1899 * We don't have to lock against tlbies but we do 1899 * We don't have to lock against tlbies but we do
1900 * have to coordinate the hardware threads. 1900 * have to coordinate the hardware threads.
1901 * Here STACK_SLOT_TRAP(r1) contains the trap number.
1901 */ 1902 */
1902kvmhv_switch_to_host: 1903kvmhv_switch_to_host:
1903 /* Secondary threads wait for primary to do partition switch */ 1904 /* Secondary threads wait for primary to do partition switch */
@@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION
1950END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1951END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1951 1952
1952 /* If HMI, call kvmppc_realmode_hmi_handler() */ 1953 /* If HMI, call kvmppc_realmode_hmi_handler() */
1954 lwz r12, STACK_SLOT_TRAP(r1)
1953 cmpwi r12, BOOK3S_INTERRUPT_HMI 1955 cmpwi r12, BOOK3S_INTERRUPT_HMI
1954 bne 27f 1956 bne 27f
1955 bl kvmppc_realmode_hmi_handler 1957 bl kvmppc_realmode_hmi_handler
1956 nop 1958 nop
1957 cmpdi r3, 0 1959 cmpdi r3, 0
1958 li r12, BOOK3S_INTERRUPT_HMI
1959 /* 1960 /*
1960 * At this point kvmppc_realmode_hmi_handler may have resync-ed 1961 * At this point kvmppc_realmode_hmi_handler may have resync-ed
1961 * the TB, and if it has, we must not subtract the guest timebase 1962 * the TB, and if it has, we must not subtract the guest timebase
@@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION
2008 lwz r8, KVM_SPLIT_DO_RESTORE(r3) 2009 lwz r8, KVM_SPLIT_DO_RESTORE(r3)
2009 cmpwi r8, 0 2010 cmpwi r8, 0
2010 beq 47f 2011 beq 47f
2011 stw r12, STACK_SLOT_TRAP(r1)
2012 bl kvmhv_p9_restore_lpcr 2012 bl kvmhv_p9_restore_lpcr
2013 nop 2013 nop
2014 lwz r12, STACK_SLOT_TRAP(r1)
2015 b 48f 2014 b 48f
201647: 201547:
2017END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2016END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
@@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
2049 li r0, KVM_GUEST_MODE_NONE 2048 li r0, KVM_GUEST_MODE_NONE
2050 stb r0, HSTATE_IN_GUEST(r13) 2049 stb r0, HSTATE_IN_GUEST(r13)
2051 2050
2051 lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */
2052 ld r0, SFS+PPC_LR_STKOFF(r1) 2052 ld r0, SFS+PPC_LR_STKOFF(r1)
2053 addi r1, r1, SFS 2053 addi r1, r1, SFS
2054 mtlr r0 2054 mtlr r0
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3ae752314b34..d3f304d06adf 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -277,15 +277,6 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
277 } 277 }
278} 278}
279 279
280static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
281{
282 trace_kvm_unmap_hva(hva);
283
284 do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
285
286 return 0;
287}
288
289static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, 280static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
290 unsigned long end) 281 unsigned long end)
291{ 282{
@@ -1773,7 +1764,6 @@ static struct kvmppc_ops kvm_ops_pr = {
1773 .flush_memslot = kvmppc_core_flush_memslot_pr, 1764 .flush_memslot = kvmppc_core_flush_memslot_pr,
1774 .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, 1765 .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
1775 .commit_memory_region = kvmppc_core_commit_memory_region_pr, 1766 .commit_memory_region = kvmppc_core_commit_memory_region_pr,
1776 .unmap_hva = kvm_unmap_hva_pr,
1777 .unmap_hva_range = kvm_unmap_hva_range_pr, 1767 .unmap_hva_range = kvm_unmap_hva_range_pr,
1778 .age_hva = kvm_age_hva_pr, 1768 .age_hva = kvm_age_hva_pr,
1779 .test_age_hva = kvm_test_age_hva_pr, 1769 .test_age_hva = kvm_test_age_hva_pr,
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 423b21393bc9..c878b4ffb86f 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -724,7 +724,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
724 724
725/************* MMU Notifiers *************/ 725/************* MMU Notifiers *************/
726 726
727int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 727static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
728{ 728{
729 trace_kvm_unmap_hva(hva); 729 trace_kvm_unmap_hva(hva);
730 730
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 403e642c78f5..52c205373986 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
1345int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, 1345int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
1346 unsigned int rt, int is_default_endian) 1346 unsigned int rt, int is_default_endian)
1347{ 1347{
1348 enum emulation_result emulated; 1348 enum emulation_result emulated = EMULATE_DONE;
1349 1349
1350 while (vcpu->arch.mmio_vmx_copy_nums) { 1350 while (vcpu->arch.mmio_vmx_copy_nums) {
1351 emulated = __kvmppc_handle_load(run, vcpu, rt, 8, 1351 emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
@@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
1608 1608
1609 kvm_sigset_deactivate(vcpu); 1609 kvm_sigset_deactivate(vcpu);
1610 1610
1611#ifdef CONFIG_ALTIVEC
1611out: 1612out:
1613#endif
1612 vcpu_put(vcpu); 1614 vcpu_put(vcpu);
1613 return r; 1615 return r;
1614} 1616}
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 85785a370c0e..2f9a8829552b 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -254,21 +254,6 @@ TRACE_EVENT(kvm_exit,
254 ) 254 )
255); 255);
256 256
257TRACE_EVENT(kvm_unmap_hva,
258 TP_PROTO(unsigned long hva),
259 TP_ARGS(hva),
260
261 TP_STRUCT__entry(
262 __field( unsigned long, hva )
263 ),
264
265 TP_fast_assign(
266 __entry->hva = hva;
267 ),
268
269 TP_printk("unmap hva 0x%lx\n", __entry->hva)
270);
271
272#endif /* _TRACE_KVM_H */ 257#endif /* _TRACE_KVM_H */
273 258
274/* This part must be outside protection */ 259/* This part must be outside protection */