aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-26 16:23:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-26 16:23:15 -0400
commit63905bba5b0170492777b327ac5e2aaef64989d6 (patch)
treecb60404ffedfa7d842c1e34fa965de57715182a6 /arch
parenteadf16a912b6bdf8bd476bde2f19fb41d06e0c3b (diff)
parent2e826695d87c2d213def07bc344ae97d88384f62 (diff)
Merge tag 'powerpc-4.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc fixes from Michael Ellerman: - fix for mm_dec_nr_pmds() from Scott. - fixes for oopses seen with KVM + THP from Aneesh. - build fixes from Aneesh & Shreyas. * tag 'powerpc-4.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: powerpc/mm: Fix build error with CONFIG_PPC_TRANSACTIONAL_MEM disabled powerpc/kvm: Fix ppc64_defconfig + PPC_POWERNV=n build error powerpc/mm/thp: Return pte address if we find trans_splitting. powerpc/mm/thp: Make page table walk safe against thp split/collapse KVM: PPC: Remove page table walk helpers KVM: PPC: Use READ_ONCE when dereferencing pte_t pointer powerpc/hugetlb: Call mm_dec_nr_pmds() in hugetlb_free_pmd_range()
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h17
-rw-r--r--arch/powerpc/include/asm/pgtable.h28
-rw-r--r--arch/powerpc/kernel/eeh.c6
-rw-r--r--arch/powerpc/kernel/io-workarounds.c10
-rw-r--r--arch/powerpc/kvm/Kconfig2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c14
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c86
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c32
-rw-r--r--arch/powerpc/mm/hash_utils_64.c3
-rw-r--r--arch/powerpc/mm/hugetlbpage.c32
-rw-r--r--arch/powerpc/perf/callchain.c24
11 files changed, 137 insertions, 117 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 7ae407941be2..3536d12eb798 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -295,16 +295,17 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
295 295
296/* 296/*
297 * If it's present and writable, atomically set dirty and referenced bits and 297 * If it's present and writable, atomically set dirty and referenced bits and
298 * return the PTE, otherwise return 0. If we find a transparent hugepage 298 * return the PTE, otherwise return 0.
299 * and if it is marked splitting we return 0;
300 */ 299 */
301static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, 300static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
302 unsigned int hugepage)
303{ 301{
304 pte_t old_pte, new_pte = __pte(0); 302 pte_t old_pte, new_pte = __pte(0);
305 303
306 while (1) { 304 while (1) {
307 old_pte = *ptep; 305 /*
306 * Make sure we don't reload from ptep
307 */
308 old_pte = READ_ONCE(*ptep);
308 /* 309 /*
309 * wait until _PAGE_BUSY is clear then set it atomically 310 * wait until _PAGE_BUSY is clear then set it atomically
310 */ 311 */
@@ -312,12 +313,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
312 cpu_relax(); 313 cpu_relax();
313 continue; 314 continue;
314 } 315 }
315#ifdef CONFIG_TRANSPARENT_HUGEPAGE
316 /* If hugepage and is trans splitting return None */
317 if (unlikely(hugepage &&
318 pmd_trans_splitting(pte_pmd(old_pte))))
319 return __pte(0);
320#endif
321 /* If pte is not present return None */ 316 /* If pte is not present return None */
322 if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) 317 if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
323 return __pte(0); 318 return __pte(0);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9835ac4173b7..11a38635dd65 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -247,28 +247,16 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
247#define pmd_large(pmd) 0 247#define pmd_large(pmd) 0
248#define has_transparent_hugepage() 0 248#define has_transparent_hugepage() 0
249#endif 249#endif
250pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, 250pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
251 unsigned *shift); 251 unsigned *shift);
252 252static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
253static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva, 253 unsigned *shift)
254 unsigned long *pte_sizep)
255{ 254{
256 pte_t *ptep; 255 if (!arch_irqs_disabled()) {
257 unsigned long ps = *pte_sizep; 256 pr_info("%s called with irq enabled\n", __func__);
258 unsigned int shift; 257 dump_stack();
259 258 }
260 ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); 259 return __find_linux_pte_or_hugepte(pgdir, ea, shift);
261 if (!ptep)
262 return NULL;
263 if (shift)
264 *pte_sizep = 1ul << shift;
265 else
266 *pte_sizep = PAGE_SIZE;
267
268 if (ps > *pte_sizep)
269 return NULL;
270
271 return ptep;
272} 260}
273#endif /* __ASSEMBLY__ */ 261#endif /* __ASSEMBLY__ */
274 262
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index a4c62eb0ee48..44b480e3a5af 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -334,9 +334,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
334 int hugepage_shift; 334 int hugepage_shift;
335 335
336 /* 336 /*
337 * We won't find hugepages here, iomem 337 * We won't find hugepages here(this is iomem). Hence we are not
338 * worried about _PAGE_SPLITTING/collapse. Also we will not hit
339 * page table free, because of init_mm.
338 */ 340 */
339 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 341 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
340 if (!ptep) 342 if (!ptep)
341 return token; 343 return token;
342 WARN_ON(hugepage_shift); 344 WARN_ON(hugepage_shift);
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 24b968f8e4d8..63d9cc4d7366 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -71,15 +71,15 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
71 vaddr = (unsigned long)PCI_FIX_ADDR(addr); 71 vaddr = (unsigned long)PCI_FIX_ADDR(addr);
72 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) 72 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
73 return NULL; 73 return NULL;
74 74 /*
75 ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr, 75 * We won't find huge pages here (iomem). Also can't hit
76 * a page table free due to init_mm
77 */
78 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
76 &hugepage_shift); 79 &hugepage_shift);
77 if (ptep == NULL) 80 if (ptep == NULL)
78 paddr = 0; 81 paddr = 0;
79 else { 82 else {
80 /*
81 * we don't have hugepages backing iomem
82 */
83 WARN_ON(hugepage_shift); 83 WARN_ON(hugepage_shift);
84 paddr = pte_pfn(*ptep) << PAGE_SHIFT; 84 paddr = pte_pfn(*ptep) << PAGE_SHIFT;
85 } 85 }
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 2963e4dd0b80..3caec2c42105 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -75,7 +75,7 @@ config KVM_BOOK3S_64
75 75
76config KVM_BOOK3S_64_HV 76config KVM_BOOK3S_64_HV
77 tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" 77 tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
78 depends on KVM_BOOK3S_64 78 depends on KVM_BOOK3S_64 && PPC_POWERNV
79 select KVM_BOOK3S_HV_POSSIBLE 79 select KVM_BOOK3S_HV_POSSIBLE
80 select MMU_NOTIFIER 80 select MMU_NOTIFIER
81 select CMA 81 select CMA
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d6fe30835c58..1a4acf8bf4f4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -535,23 +535,21 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
535 } 535 }
536 /* if the guest wants write access, see if that is OK */ 536 /* if the guest wants write access, see if that is OK */
537 if (!writing && hpte_is_writable(r)) { 537 if (!writing && hpte_is_writable(r)) {
538 unsigned int hugepage_shift;
539 pte_t *ptep, pte; 538 pte_t *ptep, pte;
540 539 unsigned long flags;
541 /* 540 /*
542 * We need to protect against page table destruction 541 * We need to protect against page table destruction
543 * while looking up and updating the pte. 542 * hugepage split and collapse.
544 */ 543 */
545 rcu_read_lock_sched(); 544 local_irq_save(flags);
546 ptep = find_linux_pte_or_hugepte(current->mm->pgd, 545 ptep = find_linux_pte_or_hugepte(current->mm->pgd,
547 hva, &hugepage_shift); 546 hva, NULL);
548 if (ptep) { 547 if (ptep) {
549 pte = kvmppc_read_update_linux_pte(ptep, 1, 548 pte = kvmppc_read_update_linux_pte(ptep, 1);
550 hugepage_shift);
551 if (pte_write(pte)) 549 if (pte_write(pte))
552 write_ok = 1; 550 write_ok = 1;
553 } 551 }
554 rcu_read_unlock_sched(); 552 local_irq_restore(flags);
555 } 553 }
556 } 554 }
557 555
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index f6bf0b1de6d7..b027a89737b6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x)
26{ 26{
27 unsigned long addr = (unsigned long) x; 27 unsigned long addr = (unsigned long) x;
28 pte_t *p; 28 pte_t *p;
29 29 /*
30 p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); 30 * assume we don't have huge pages in vmalloc space...
31 * So don't worry about THP collapse/split. Called
32 * Only in realmode, hence won't need irq_save/restore.
33 */
34 p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
31 if (!p || !pte_present(*p)) 35 if (!p || !pte_present(*p))
32 return NULL; 36 return NULL;
33 /* assume we don't have huge pages in vmalloc space... */
34 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); 37 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
35 return __va(addr); 38 return __va(addr);
36} 39}
@@ -131,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
131 unlock_rmap(rmap); 134 unlock_rmap(rmap);
132} 135}
133 136
134static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
135 int writing, unsigned long *pte_sizep)
136{
137 pte_t *ptep;
138 unsigned long ps = *pte_sizep;
139 unsigned int hugepage_shift;
140
141 ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
142 if (!ptep)
143 return __pte(0);
144 if (hugepage_shift)
145 *pte_sizep = 1ul << hugepage_shift;
146 else
147 *pte_sizep = PAGE_SIZE;
148 if (ps > *pte_sizep)
149 return __pte(0);
150 return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
151}
152
153long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 137long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
154 long pte_index, unsigned long pteh, unsigned long ptel, 138 long pte_index, unsigned long pteh, unsigned long ptel,
155 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 139 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
@@ -160,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
160 struct revmap_entry *rev; 144 struct revmap_entry *rev;
161 unsigned long g_ptel; 145 unsigned long g_ptel;
162 struct kvm_memory_slot *memslot; 146 struct kvm_memory_slot *memslot;
163 unsigned long pte_size; 147 unsigned hpage_shift;
164 unsigned long is_io; 148 unsigned long is_io;
165 unsigned long *rmap; 149 unsigned long *rmap;
166 pte_t pte; 150 pte_t *ptep;
167 unsigned int writing; 151 unsigned int writing;
168 unsigned long mmu_seq; 152 unsigned long mmu_seq;
169 unsigned long rcbits; 153 unsigned long rcbits, irq_flags = 0;
170 154
171 psize = hpte_page_size(pteh, ptel); 155 psize = hpte_page_size(pteh, ptel);
172 if (!psize) 156 if (!psize)
@@ -202,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
202 186
203 /* Translate to host virtual address */ 187 /* Translate to host virtual address */
204 hva = __gfn_to_hva_memslot(memslot, gfn); 188 hva = __gfn_to_hva_memslot(memslot, gfn);
205 189 /*
206 /* Look up the Linux PTE for the backing page */ 190 * If we had a page table table change after lookup, we would
207 pte_size = psize; 191 * retry via mmu_notifier_retry.
208 pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); 192 */
209 if (pte_present(pte) && !pte_protnone(pte)) { 193 if (realmode)
210 if (writing && !pte_write(pte)) 194 ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
211 /* make the actual HPTE be read-only */ 195 else {
212 ptel = hpte_make_readonly(ptel); 196 local_irq_save(irq_flags);
213 is_io = hpte_cache_bits(pte_val(pte)); 197 ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
214 pa = pte_pfn(pte) << PAGE_SHIFT;
215 pa |= hva & (pte_size - 1);
216 pa |= gpa & ~PAGE_MASK;
217 } 198 }
199 if (ptep) {
200 pte_t pte;
201 unsigned int host_pte_size;
218 202
219 if (pte_size < psize) 203 if (hpage_shift)
220 return H_PARAMETER; 204 host_pte_size = 1ul << hpage_shift;
205 else
206 host_pte_size = PAGE_SIZE;
207 /*
208 * We should always find the guest page size
209 * to <= host page size, if host is using hugepage
210 */
211 if (host_pte_size < psize) {
212 if (!realmode)
213 local_irq_restore(flags);
214 return H_PARAMETER;
215 }
216 pte = kvmppc_read_update_linux_pte(ptep, writing);
217 if (pte_present(pte) && !pte_protnone(pte)) {
218 if (writing && !pte_write(pte))
219 /* make the actual HPTE be read-only */
220 ptel = hpte_make_readonly(ptel);
221 is_io = hpte_cache_bits(pte_val(pte));
222 pa = pte_pfn(pte) << PAGE_SHIFT;
223 pa |= hva & (host_pte_size - 1);
224 pa |= gpa & ~PAGE_MASK;
225 }
226 }
227 if (!realmode)
228 local_irq_restore(irq_flags);
221 229
222 ptel &= ~(HPTE_R_PP0 - psize); 230 ptel &= ~(HPTE_R_PP0 - psize);
223 ptel |= pa; 231 ptel |= pa;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index cc536d4a75ef..4d33e199edcc 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -338,6 +338,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
338 pte_t *ptep; 338 pte_t *ptep;
339 unsigned int wimg = 0; 339 unsigned int wimg = 0;
340 pgd_t *pgdir; 340 pgd_t *pgdir;
341 unsigned long flags;
341 342
342 /* used to check for invalidations in progress */ 343 /* used to check for invalidations in progress */
343 mmu_seq = kvm->mmu_notifier_seq; 344 mmu_seq = kvm->mmu_notifier_seq;
@@ -468,15 +469,28 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
468 469
469 470
470 pgdir = vcpu_e500->vcpu.arch.pgdir; 471 pgdir = vcpu_e500->vcpu.arch.pgdir;
471 ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages); 472 /*
472 if (pte_present(*ptep)) 473 * We are just looking at the wimg bits, so we don't
473 wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; 474 * care much about the trans splitting bit.
474 else { 475 * We are holding kvm->mmu_lock so a notifier invalidate
475 if (printk_ratelimit()) 476 * can't run hence pfn won't change.
476 pr_err("%s: pte not present: gfn %lx, pfn %lx\n", 477 */
477 __func__, (long)gfn, pfn); 478 local_irq_save(flags);
478 ret = -EINVAL; 479 ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL);
479 goto out; 480 if (ptep) {
481 pte_t pte = READ_ONCE(*ptep);
482
483 if (pte_present(pte)) {
484 wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
485 MAS2_WIMGE_MASK;
486 local_irq_restore(flags);
487 } else {
488 local_irq_restore(flags);
489 pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
490 __func__, (long)gfn, pfn);
491 ret = -EINVAL;
492 goto out;
493 }
480 } 494 }
481 kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); 495 kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
482 496
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 2c2022d16059..fda236f908eb 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1066,7 +1066,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1066#endif /* CONFIG_PPC_64K_PAGES */ 1066#endif /* CONFIG_PPC_64K_PAGES */
1067 1067
1068 /* Get PTE and page size from page tables */ 1068 /* Get PTE and page size from page tables */
1069 ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); 1069 ptep = __find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
1070 if (ptep == NULL || !pte_present(*ptep)) { 1070 if (ptep == NULL || !pte_present(*ptep)) {
1071 DBG_LOW(" no PTE !\n"); 1071 DBG_LOW(" no PTE !\n");
1072 rc = 1; 1072 rc = 1;
@@ -1394,6 +1394,7 @@ tm_abort:
1394 tm_abort(TM_CAUSE_TLBI); 1394 tm_abort(TM_CAUSE_TLBI);
1395 } 1395 }
1396#endif 1396#endif
1397 return;
1397} 1398}
1398#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1399#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1399 1400
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fa9d5c238d22..0ce968b00b7c 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -109,7 +109,7 @@ int pgd_huge(pgd_t pgd)
109pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 109pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
110{ 110{
111 /* Only called for hugetlbfs pages, hence can ignore THP */ 111 /* Only called for hugetlbfs pages, hence can ignore THP */
112 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); 112 return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
113} 113}
114 114
115static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 115static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -581,6 +581,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
581 pmd = pmd_offset(pud, start); 581 pmd = pmd_offset(pud, start);
582 pud_clear(pud); 582 pud_clear(pud);
583 pmd_free_tlb(tlb, pmd, start); 583 pmd_free_tlb(tlb, pmd, start);
584 mm_dec_nr_pmds(tlb->mm);
584} 585}
585 586
586static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 587static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -681,28 +682,35 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
681 } while (addr = next, addr != end); 682 } while (addr = next, addr != end);
682} 683}
683 684
685/*
686 * We are holding mmap_sem, so a parallel huge page collapse cannot run.
687 * To prevent hugepage split, disable irq.
688 */
684struct page * 689struct page *
685follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 690follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
686{ 691{
687 pte_t *ptep; 692 pte_t *ptep;
688 struct page *page; 693 struct page *page;
689 unsigned shift; 694 unsigned shift;
690 unsigned long mask; 695 unsigned long mask, flags;
691 /* 696 /*
692 * Transparent hugepages are handled by generic code. We can skip them 697 * Transparent hugepages are handled by generic code. We can skip them
693 * here. 698 * here.
694 */ 699 */
700 local_irq_save(flags);
695 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); 701 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
696 702
697 /* Verify it is a huge page else bail. */ 703 /* Verify it is a huge page else bail. */
698 if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) 704 if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
705 local_irq_restore(flags);
699 return ERR_PTR(-EINVAL); 706 return ERR_PTR(-EINVAL);
700 707 }
701 mask = (1UL << shift) - 1; 708 mask = (1UL << shift) - 1;
702 page = pte_page(*ptep); 709 page = pte_page(*ptep);
703 if (page) 710 if (page)
704 page += (address & mask) / PAGE_SIZE; 711 page += (address & mask) / PAGE_SIZE;
705 712
713 local_irq_restore(flags);
706 return page; 714 return page;
707} 715}
708 716
@@ -949,9 +957,12 @@ void flush_dcache_icache_hugepage(struct page *page)
949 * 957 *
950 * So long as we atomically load page table pointers we are safe against teardown, 958 * So long as we atomically load page table pointers we are safe against teardown,
951 * we can follow the address down to the the page and take a ref on it. 959 * we can follow the address down to the the page and take a ref on it.
960 * This function need to be called with interrupts disabled. We use this variant
961 * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
952 */ 962 */
953 963
954pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 964pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
965 unsigned *shift)
955{ 966{
956 pgd_t pgd, *pgdp; 967 pgd_t pgd, *pgdp;
957 pud_t pud, *pudp; 968 pud_t pud, *pudp;
@@ -1003,12 +1014,11 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
1003 * A hugepage collapse is captured by pmd_none, because 1014 * A hugepage collapse is captured by pmd_none, because
1004 * it mark the pmd none and do a hpte invalidate. 1015 * it mark the pmd none and do a hpte invalidate.
1005 * 1016 *
1006 * A hugepage split is captured by pmd_trans_splitting 1017 * We don't worry about pmd_trans_splitting here, The
1007 * because we mark the pmd trans splitting and do a 1018 * caller if it needs to handle the splitting case
1008 * hpte invalidate 1019 * should check for that.
1009 *
1010 */ 1020 */
1011 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 1021 if (pmd_none(pmd))
1012 return NULL; 1022 return NULL;
1013 1023
1014 if (pmd_huge(pmd) || pmd_large(pmd)) { 1024 if (pmd_huge(pmd) || pmd_large(pmd)) {
@@ -1030,7 +1040,7 @@ out:
1030 *shift = pdshift; 1040 *shift = pdshift;
1031 return ret_pte; 1041 return ret_pte;
1032} 1042}
1033EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); 1043EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
1034 1044
1035int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, 1045int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
1036 unsigned long end, int write, struct page **pages, int *nr) 1046 unsigned long end, int write, struct page **pages, int *nr)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index ead55351b254..ff09cde20cd2 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -111,41 +111,45 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
111 * interrupt context, so if the access faults, we read the page tables 111 * interrupt context, so if the access faults, we read the page tables
112 * to find which page (if any) is mapped and access it directly. 112 * to find which page (if any) is mapped and access it directly.
113 */ 113 */
114static int read_user_stack_slow(void __user *ptr, void *ret, int nb) 114static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
115{ 115{
116 int ret = -EFAULT;
116 pgd_t *pgdir; 117 pgd_t *pgdir;
117 pte_t *ptep, pte; 118 pte_t *ptep, pte;
118 unsigned shift; 119 unsigned shift;
119 unsigned long addr = (unsigned long) ptr; 120 unsigned long addr = (unsigned long) ptr;
120 unsigned long offset; 121 unsigned long offset;
121 unsigned long pfn; 122 unsigned long pfn, flags;
122 void *kaddr; 123 void *kaddr;
123 124
124 pgdir = current->mm->pgd; 125 pgdir = current->mm->pgd;
125 if (!pgdir) 126 if (!pgdir)
126 return -EFAULT; 127 return -EFAULT;
127 128
129 local_irq_save(flags);
128 ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); 130 ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
131 if (!ptep)
132 goto err_out;
129 if (!shift) 133 if (!shift)
130 shift = PAGE_SHIFT; 134 shift = PAGE_SHIFT;
131 135
132 /* align address to page boundary */ 136 /* align address to page boundary */
133 offset = addr & ((1UL << shift) - 1); 137 offset = addr & ((1UL << shift) - 1);
134 addr -= offset;
135 138
136 if (ptep == NULL) 139 pte = READ_ONCE(*ptep);
137 return -EFAULT;
138 pte = *ptep;
139 if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) 140 if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
140 return -EFAULT; 141 goto err_out;
141 pfn = pte_pfn(pte); 142 pfn = pte_pfn(pte);
142 if (!page_is_ram(pfn)) 143 if (!page_is_ram(pfn))
143 return -EFAULT; 144 goto err_out;
144 145
145 /* no highmem to worry about here */ 146 /* no highmem to worry about here */
146 kaddr = pfn_to_kaddr(pfn); 147 kaddr = pfn_to_kaddr(pfn);
147 memcpy(ret, kaddr + offset, nb); 148 memcpy(buf, kaddr + offset, nb);
148 return 0; 149 ret = 0;
150err_out:
151 local_irq_restore(flags);
152 return ret;
149} 153}
150 154
151static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) 155static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)