diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 16:23:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 16:23:15 -0400 |
commit | 63905bba5b0170492777b327ac5e2aaef64989d6 (patch) | |
tree | cb60404ffedfa7d842c1e34fa965de57715182a6 /arch | |
parent | eadf16a912b6bdf8bd476bde2f19fb41d06e0c3b (diff) | |
parent | 2e826695d87c2d213def07bc344ae97d88384f62 (diff) |
Merge tag 'powerpc-4.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc fixes from Michael Ellerman:
- fix for mm_dec_nr_pmds() from Scott.
- fixes for oopses seen with KVM + THP from Aneesh.
- build fixes from Aneesh & Shreyas.
* tag 'powerpc-4.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux:
powerpc/mm: Fix build error with CONFIG_PPC_TRANSACTIONAL_MEM disabled
powerpc/kvm: Fix ppc64_defconfig + PPC_POWERNV=n build error
powerpc/mm/thp: Return pte address if we find trans_splitting.
powerpc/mm/thp: Make page table walk safe against thp split/collapse
KVM: PPC: Remove page table walk helpers
KVM: PPC: Use READ_ONCE when dereferencing pte_t pointer
powerpc/hugetlb: Call mm_dec_nr_pmds() in hugetlb_free_pmd_range()
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 17 | ||||
-rw-r--r-- | arch/powerpc/include/asm/pgtable.h | 28 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kernel/io-workarounds.c | 10 | ||||
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 14 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 86 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu_host.c | 32 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 32 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain.c | 24 |
11 files changed, 137 insertions, 117 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 7ae407941be2..3536d12eb798 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -295,16 +295,17 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) | |||
295 | 295 | ||
296 | /* | 296 | /* |
297 | * If it's present and writable, atomically set dirty and referenced bits and | 297 | * If it's present and writable, atomically set dirty and referenced bits and |
298 | * return the PTE, otherwise return 0. If we find a transparent hugepage | 298 | * return the PTE, otherwise return 0. |
299 | * and if it is marked splitting we return 0; | ||
300 | */ | 299 | */ |
301 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, | 300 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing) |
302 | unsigned int hugepage) | ||
303 | { | 301 | { |
304 | pte_t old_pte, new_pte = __pte(0); | 302 | pte_t old_pte, new_pte = __pte(0); |
305 | 303 | ||
306 | while (1) { | 304 | while (1) { |
307 | old_pte = *ptep; | 305 | /* |
306 | * Make sure we don't reload from ptep | ||
307 | */ | ||
308 | old_pte = READ_ONCE(*ptep); | ||
308 | /* | 309 | /* |
309 | * wait until _PAGE_BUSY is clear then set it atomically | 310 | * wait until _PAGE_BUSY is clear then set it atomically |
310 | */ | 311 | */ |
@@ -312,12 +313,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, | |||
312 | cpu_relax(); | 313 | cpu_relax(); |
313 | continue; | 314 | continue; |
314 | } | 315 | } |
315 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
316 | /* If hugepage and is trans splitting return None */ | ||
317 | if (unlikely(hugepage && | ||
318 | pmd_trans_splitting(pte_pmd(old_pte)))) | ||
319 | return __pte(0); | ||
320 | #endif | ||
321 | /* If pte is not present return None */ | 316 | /* If pte is not present return None */ |
322 | if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) | 317 | if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) |
323 | return __pte(0); | 318 | return __pte(0); |
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 9835ac4173b7..11a38635dd65 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h | |||
@@ -247,28 +247,16 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | |||
247 | #define pmd_large(pmd) 0 | 247 | #define pmd_large(pmd) 0 |
248 | #define has_transparent_hugepage() 0 | 248 | #define has_transparent_hugepage() 0 |
249 | #endif | 249 | #endif |
250 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | 250 | pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, |
251 | unsigned *shift); | 251 | unsigned *shift); |
252 | 252 | static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | |
253 | static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva, | 253 | unsigned *shift) |
254 | unsigned long *pte_sizep) | ||
255 | { | 254 | { |
256 | pte_t *ptep; | 255 | if (!arch_irqs_disabled()) { |
257 | unsigned long ps = *pte_sizep; | 256 | pr_info("%s called with irq enabled\n", __func__); |
258 | unsigned int shift; | 257 | dump_stack(); |
259 | 258 | } | |
260 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); | 259 | return __find_linux_pte_or_hugepte(pgdir, ea, shift); |
261 | if (!ptep) | ||
262 | return NULL; | ||
263 | if (shift) | ||
264 | *pte_sizep = 1ul << shift; | ||
265 | else | ||
266 | *pte_sizep = PAGE_SIZE; | ||
267 | |||
268 | if (ps > *pte_sizep) | ||
269 | return NULL; | ||
270 | |||
271 | return ptep; | ||
272 | } | 260 | } |
273 | #endif /* __ASSEMBLY__ */ | 261 | #endif /* __ASSEMBLY__ */ |
274 | 262 | ||
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index a4c62eb0ee48..44b480e3a5af 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c | |||
@@ -334,9 +334,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) | |||
334 | int hugepage_shift; | 334 | int hugepage_shift; |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * We won't find hugepages here, iomem | 337 | * We won't find hugepages here(this is iomem). Hence we are not |
338 | * worried about _PAGE_SPLITTING/collapse. Also we will not hit | ||
339 | * page table free, because of init_mm. | ||
338 | */ | 340 | */ |
339 | ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); | 341 | ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); |
340 | if (!ptep) | 342 | if (!ptep) |
341 | return token; | 343 | return token; |
342 | WARN_ON(hugepage_shift); | 344 | WARN_ON(hugepage_shift); |
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 24b968f8e4d8..63d9cc4d7366 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c | |||
@@ -71,15 +71,15 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) | |||
71 | vaddr = (unsigned long)PCI_FIX_ADDR(addr); | 71 | vaddr = (unsigned long)PCI_FIX_ADDR(addr); |
72 | if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) | 72 | if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) |
73 | return NULL; | 73 | return NULL; |
74 | 74 | /* | |
75 | ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr, | 75 | * We won't find huge pages here (iomem). Also can't hit |
76 | * a page table free due to init_mm | ||
77 | */ | ||
78 | ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr, | ||
76 | &hugepage_shift); | 79 | &hugepage_shift); |
77 | if (ptep == NULL) | 80 | if (ptep == NULL) |
78 | paddr = 0; | 81 | paddr = 0; |
79 | else { | 82 | else { |
80 | /* | ||
81 | * we don't have hugepages backing iomem | ||
82 | */ | ||
83 | WARN_ON(hugepage_shift); | 83 | WARN_ON(hugepage_shift); |
84 | paddr = pte_pfn(*ptep) << PAGE_SHIFT; | 84 | paddr = pte_pfn(*ptep) << PAGE_SHIFT; |
85 | } | 85 | } |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 2963e4dd0b80..3caec2c42105 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -75,7 +75,7 @@ config KVM_BOOK3S_64 | |||
75 | 75 | ||
76 | config KVM_BOOK3S_64_HV | 76 | config KVM_BOOK3S_64_HV |
77 | tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" | 77 | tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" |
78 | depends on KVM_BOOK3S_64 | 78 | depends on KVM_BOOK3S_64 && PPC_POWERNV |
79 | select KVM_BOOK3S_HV_POSSIBLE | 79 | select KVM_BOOK3S_HV_POSSIBLE |
80 | select MMU_NOTIFIER | 80 | select MMU_NOTIFIER |
81 | select CMA | 81 | select CMA |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d6fe30835c58..1a4acf8bf4f4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -535,23 +535,21 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
535 | } | 535 | } |
536 | /* if the guest wants write access, see if that is OK */ | 536 | /* if the guest wants write access, see if that is OK */ |
537 | if (!writing && hpte_is_writable(r)) { | 537 | if (!writing && hpte_is_writable(r)) { |
538 | unsigned int hugepage_shift; | ||
539 | pte_t *ptep, pte; | 538 | pte_t *ptep, pte; |
540 | 539 | unsigned long flags; | |
541 | /* | 540 | /* |
542 | * We need to protect against page table destruction | 541 | * We need to protect against page table destruction |
543 | * while looking up and updating the pte. | 542 | * hugepage split and collapse. |
544 | */ | 543 | */ |
545 | rcu_read_lock_sched(); | 544 | local_irq_save(flags); |
546 | ptep = find_linux_pte_or_hugepte(current->mm->pgd, | 545 | ptep = find_linux_pte_or_hugepte(current->mm->pgd, |
547 | hva, &hugepage_shift); | 546 | hva, NULL); |
548 | if (ptep) { | 547 | if (ptep) { |
549 | pte = kvmppc_read_update_linux_pte(ptep, 1, | 548 | pte = kvmppc_read_update_linux_pte(ptep, 1); |
550 | hugepage_shift); | ||
551 | if (pte_write(pte)) | 549 | if (pte_write(pte)) |
552 | write_ok = 1; | 550 | write_ok = 1; |
553 | } | 551 | } |
554 | rcu_read_unlock_sched(); | 552 | local_irq_restore(flags); |
555 | } | 553 | } |
556 | } | 554 | } |
557 | 555 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index f6bf0b1de6d7..b027a89737b6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x) | |||
26 | { | 26 | { |
27 | unsigned long addr = (unsigned long) x; | 27 | unsigned long addr = (unsigned long) x; |
28 | pte_t *p; | 28 | pte_t *p; |
29 | 29 | /* | |
30 | p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); | 30 | * assume we don't have huge pages in vmalloc space... |
31 | * So don't worry about THP collapse/split. Called | ||
32 | * Only in realmode, hence won't need irq_save/restore. | ||
33 | */ | ||
34 | p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); | ||
31 | if (!p || !pte_present(*p)) | 35 | if (!p || !pte_present(*p)) |
32 | return NULL; | 36 | return NULL; |
33 | /* assume we don't have huge pages in vmalloc space... */ | ||
34 | addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); | 37 | addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); |
35 | return __va(addr); | 38 | return __va(addr); |
36 | } | 39 | } |
@@ -131,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, | |||
131 | unlock_rmap(rmap); | 134 | unlock_rmap(rmap); |
132 | } | 135 | } |
133 | 136 | ||
134 | static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva, | ||
135 | int writing, unsigned long *pte_sizep) | ||
136 | { | ||
137 | pte_t *ptep; | ||
138 | unsigned long ps = *pte_sizep; | ||
139 | unsigned int hugepage_shift; | ||
140 | |||
141 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift); | ||
142 | if (!ptep) | ||
143 | return __pte(0); | ||
144 | if (hugepage_shift) | ||
145 | *pte_sizep = 1ul << hugepage_shift; | ||
146 | else | ||
147 | *pte_sizep = PAGE_SIZE; | ||
148 | if (ps > *pte_sizep) | ||
149 | return __pte(0); | ||
150 | return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); | ||
151 | } | ||
152 | |||
153 | long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | 137 | long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, |
154 | long pte_index, unsigned long pteh, unsigned long ptel, | 138 | long pte_index, unsigned long pteh, unsigned long ptel, |
155 | pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) | 139 | pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) |
@@ -160,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
160 | struct revmap_entry *rev; | 144 | struct revmap_entry *rev; |
161 | unsigned long g_ptel; | 145 | unsigned long g_ptel; |
162 | struct kvm_memory_slot *memslot; | 146 | struct kvm_memory_slot *memslot; |
163 | unsigned long pte_size; | 147 | unsigned hpage_shift; |
164 | unsigned long is_io; | 148 | unsigned long is_io; |
165 | unsigned long *rmap; | 149 | unsigned long *rmap; |
166 | pte_t pte; | 150 | pte_t *ptep; |
167 | unsigned int writing; | 151 | unsigned int writing; |
168 | unsigned long mmu_seq; | 152 | unsigned long mmu_seq; |
169 | unsigned long rcbits; | 153 | unsigned long rcbits, irq_flags = 0; |
170 | 154 | ||
171 | psize = hpte_page_size(pteh, ptel); | 155 | psize = hpte_page_size(pteh, ptel); |
172 | if (!psize) | 156 | if (!psize) |
@@ -202,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
202 | 186 | ||
203 | /* Translate to host virtual address */ | 187 | /* Translate to host virtual address */ |
204 | hva = __gfn_to_hva_memslot(memslot, gfn); | 188 | hva = __gfn_to_hva_memslot(memslot, gfn); |
205 | 189 | /* | |
206 | /* Look up the Linux PTE for the backing page */ | 190 | * If we had a page table table change after lookup, we would |
207 | pte_size = psize; | 191 | * retry via mmu_notifier_retry. |
208 | pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); | 192 | */ |
209 | if (pte_present(pte) && !pte_protnone(pte)) { | 193 | if (realmode) |
210 | if (writing && !pte_write(pte)) | 194 | ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift); |
211 | /* make the actual HPTE be read-only */ | 195 | else { |
212 | ptel = hpte_make_readonly(ptel); | 196 | local_irq_save(irq_flags); |
213 | is_io = hpte_cache_bits(pte_val(pte)); | 197 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift); |
214 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
215 | pa |= hva & (pte_size - 1); | ||
216 | pa |= gpa & ~PAGE_MASK; | ||
217 | } | 198 | } |
199 | if (ptep) { | ||
200 | pte_t pte; | ||
201 | unsigned int host_pte_size; | ||
218 | 202 | ||
219 | if (pte_size < psize) | 203 | if (hpage_shift) |
220 | return H_PARAMETER; | 204 | host_pte_size = 1ul << hpage_shift; |
205 | else | ||
206 | host_pte_size = PAGE_SIZE; | ||
207 | /* | ||
208 | * We should always find the guest page size | ||
209 | * to <= host page size, if host is using hugepage | ||
210 | */ | ||
211 | if (host_pte_size < psize) { | ||
212 | if (!realmode) | ||
213 | local_irq_restore(flags); | ||
214 | return H_PARAMETER; | ||
215 | } | ||
216 | pte = kvmppc_read_update_linux_pte(ptep, writing); | ||
217 | if (pte_present(pte) && !pte_protnone(pte)) { | ||
218 | if (writing && !pte_write(pte)) | ||
219 | /* make the actual HPTE be read-only */ | ||
220 | ptel = hpte_make_readonly(ptel); | ||
221 | is_io = hpte_cache_bits(pte_val(pte)); | ||
222 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
223 | pa |= hva & (host_pte_size - 1); | ||
224 | pa |= gpa & ~PAGE_MASK; | ||
225 | } | ||
226 | } | ||
227 | if (!realmode) | ||
228 | local_irq_restore(irq_flags); | ||
221 | 229 | ||
222 | ptel &= ~(HPTE_R_PP0 - psize); | 230 | ptel &= ~(HPTE_R_PP0 - psize); |
223 | ptel |= pa; | 231 | ptel |= pa; |
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index cc536d4a75ef..4d33e199edcc 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c | |||
@@ -338,6 +338,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
338 | pte_t *ptep; | 338 | pte_t *ptep; |
339 | unsigned int wimg = 0; | 339 | unsigned int wimg = 0; |
340 | pgd_t *pgdir; | 340 | pgd_t *pgdir; |
341 | unsigned long flags; | ||
341 | 342 | ||
342 | /* used to check for invalidations in progress */ | 343 | /* used to check for invalidations in progress */ |
343 | mmu_seq = kvm->mmu_notifier_seq; | 344 | mmu_seq = kvm->mmu_notifier_seq; |
@@ -468,15 +469,28 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
468 | 469 | ||
469 | 470 | ||
470 | pgdir = vcpu_e500->vcpu.arch.pgdir; | 471 | pgdir = vcpu_e500->vcpu.arch.pgdir; |
471 | ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages); | 472 | /* |
472 | if (pte_present(*ptep)) | 473 | * We are just looking at the wimg bits, so we don't |
473 | wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; | 474 | * care much about the trans splitting bit. |
474 | else { | 475 | * We are holding kvm->mmu_lock so a notifier invalidate |
475 | if (printk_ratelimit()) | 476 | * can't run hence pfn won't change. |
476 | pr_err("%s: pte not present: gfn %lx, pfn %lx\n", | 477 | */ |
477 | __func__, (long)gfn, pfn); | 478 | local_irq_save(flags); |
478 | ret = -EINVAL; | 479 | ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL); |
479 | goto out; | 480 | if (ptep) { |
481 | pte_t pte = READ_ONCE(*ptep); | ||
482 | |||
483 | if (pte_present(pte)) { | ||
484 | wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) & | ||
485 | MAS2_WIMGE_MASK; | ||
486 | local_irq_restore(flags); | ||
487 | } else { | ||
488 | local_irq_restore(flags); | ||
489 | pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n", | ||
490 | __func__, (long)gfn, pfn); | ||
491 | ret = -EINVAL; | ||
492 | goto out; | ||
493 | } | ||
480 | } | 494 | } |
481 | kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); | 495 | kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); |
482 | 496 | ||
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2c2022d16059..fda236f908eb 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -1066,7 +1066,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, | |||
1066 | #endif /* CONFIG_PPC_64K_PAGES */ | 1066 | #endif /* CONFIG_PPC_64K_PAGES */ |
1067 | 1067 | ||
1068 | /* Get PTE and page size from page tables */ | 1068 | /* Get PTE and page size from page tables */ |
1069 | ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); | 1069 | ptep = __find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); |
1070 | if (ptep == NULL || !pte_present(*ptep)) { | 1070 | if (ptep == NULL || !pte_present(*ptep)) { |
1071 | DBG_LOW(" no PTE !\n"); | 1071 | DBG_LOW(" no PTE !\n"); |
1072 | rc = 1; | 1072 | rc = 1; |
@@ -1394,6 +1394,7 @@ tm_abort: | |||
1394 | tm_abort(TM_CAUSE_TLBI); | 1394 | tm_abort(TM_CAUSE_TLBI); |
1395 | } | 1395 | } |
1396 | #endif | 1396 | #endif |
1397 | return; | ||
1397 | } | 1398 | } |
1398 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1399 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1399 | 1400 | ||
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fa9d5c238d22..0ce968b00b7c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -109,7 +109,7 @@ int pgd_huge(pgd_t pgd) | |||
109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
110 | { | 110 | { |
111 | /* Only called for hugetlbfs pages, hence can ignore THP */ | 111 | /* Only called for hugetlbfs pages, hence can ignore THP */ |
112 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | 112 | return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL); |
113 | } | 113 | } |
114 | 114 | ||
115 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 115 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
@@ -581,6 +581,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
581 | pmd = pmd_offset(pud, start); | 581 | pmd = pmd_offset(pud, start); |
582 | pud_clear(pud); | 582 | pud_clear(pud); |
583 | pmd_free_tlb(tlb, pmd, start); | 583 | pmd_free_tlb(tlb, pmd, start); |
584 | mm_dec_nr_pmds(tlb->mm); | ||
584 | } | 585 | } |
585 | 586 | ||
586 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | 587 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, |
@@ -681,28 +682,35 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
681 | } while (addr = next, addr != end); | 682 | } while (addr = next, addr != end); |
682 | } | 683 | } |
683 | 684 | ||
685 | /* | ||
686 | * We are holding mmap_sem, so a parallel huge page collapse cannot run. | ||
687 | * To prevent hugepage split, disable irq. | ||
688 | */ | ||
684 | struct page * | 689 | struct page * |
685 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 690 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
686 | { | 691 | { |
687 | pte_t *ptep; | 692 | pte_t *ptep; |
688 | struct page *page; | 693 | struct page *page; |
689 | unsigned shift; | 694 | unsigned shift; |
690 | unsigned long mask; | 695 | unsigned long mask, flags; |
691 | /* | 696 | /* |
692 | * Transparent hugepages are handled by generic code. We can skip them | 697 | * Transparent hugepages are handled by generic code. We can skip them |
693 | * here. | 698 | * here. |
694 | */ | 699 | */ |
700 | local_irq_save(flags); | ||
695 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | 701 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); |
696 | 702 | ||
697 | /* Verify it is a huge page else bail. */ | 703 | /* Verify it is a huge page else bail. */ |
698 | if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) | 704 | if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) { |
705 | local_irq_restore(flags); | ||
699 | return ERR_PTR(-EINVAL); | 706 | return ERR_PTR(-EINVAL); |
700 | 707 | } | |
701 | mask = (1UL << shift) - 1; | 708 | mask = (1UL << shift) - 1; |
702 | page = pte_page(*ptep); | 709 | page = pte_page(*ptep); |
703 | if (page) | 710 | if (page) |
704 | page += (address & mask) / PAGE_SIZE; | 711 | page += (address & mask) / PAGE_SIZE; |
705 | 712 | ||
713 | local_irq_restore(flags); | ||
706 | return page; | 714 | return page; |
707 | } | 715 | } |
708 | 716 | ||
@@ -949,9 +957,12 @@ void flush_dcache_icache_hugepage(struct page *page) | |||
949 | * | 957 | * |
950 | * So long as we atomically load page table pointers we are safe against teardown, | 958 | * So long as we atomically load page table pointers we are safe against teardown, |
951 | * we can follow the address down to the the page and take a ref on it. | 959 | * we can follow the address down to the the page and take a ref on it. |
960 | * This function need to be called with interrupts disabled. We use this variant | ||
961 | * when we have MSR[EE] = 0 but the paca->soft_enabled = 1 | ||
952 | */ | 962 | */ |
953 | 963 | ||
954 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | 964 | pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, |
965 | unsigned *shift) | ||
955 | { | 966 | { |
956 | pgd_t pgd, *pgdp; | 967 | pgd_t pgd, *pgdp; |
957 | pud_t pud, *pudp; | 968 | pud_t pud, *pudp; |
@@ -1003,12 +1014,11 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift | |||
1003 | * A hugepage collapse is captured by pmd_none, because | 1014 | * A hugepage collapse is captured by pmd_none, because |
1004 | * it mark the pmd none and do a hpte invalidate. | 1015 | * it mark the pmd none and do a hpte invalidate. |
1005 | * | 1016 | * |
1006 | * A hugepage split is captured by pmd_trans_splitting | 1017 | * We don't worry about pmd_trans_splitting here, The |
1007 | * because we mark the pmd trans splitting and do a | 1018 | * caller if it needs to handle the splitting case |
1008 | * hpte invalidate | 1019 | * should check for that. |
1009 | * | ||
1010 | */ | 1020 | */ |
1011 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | 1021 | if (pmd_none(pmd)) |
1012 | return NULL; | 1022 | return NULL; |
1013 | 1023 | ||
1014 | if (pmd_huge(pmd) || pmd_large(pmd)) { | 1024 | if (pmd_huge(pmd) || pmd_large(pmd)) { |
@@ -1030,7 +1040,7 @@ out: | |||
1030 | *shift = pdshift; | 1040 | *shift = pdshift; |
1031 | return ret_pte; | 1041 | return ret_pte; |
1032 | } | 1042 | } |
1033 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | 1043 | EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte); |
1034 | 1044 | ||
1035 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | 1045 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, |
1036 | unsigned long end, int write, struct page **pages, int *nr) | 1046 | unsigned long end, int write, struct page **pages, int *nr) |
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index ead55351b254..ff09cde20cd2 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c | |||
@@ -111,41 +111,45 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
111 | * interrupt context, so if the access faults, we read the page tables | 111 | * interrupt context, so if the access faults, we read the page tables |
112 | * to find which page (if any) is mapped and access it directly. | 112 | * to find which page (if any) is mapped and access it directly. |
113 | */ | 113 | */ |
114 | static int read_user_stack_slow(void __user *ptr, void *ret, int nb) | 114 | static int read_user_stack_slow(void __user *ptr, void *buf, int nb) |
115 | { | 115 | { |
116 | int ret = -EFAULT; | ||
116 | pgd_t *pgdir; | 117 | pgd_t *pgdir; |
117 | pte_t *ptep, pte; | 118 | pte_t *ptep, pte; |
118 | unsigned shift; | 119 | unsigned shift; |
119 | unsigned long addr = (unsigned long) ptr; | 120 | unsigned long addr = (unsigned long) ptr; |
120 | unsigned long offset; | 121 | unsigned long offset; |
121 | unsigned long pfn; | 122 | unsigned long pfn, flags; |
122 | void *kaddr; | 123 | void *kaddr; |
123 | 124 | ||
124 | pgdir = current->mm->pgd; | 125 | pgdir = current->mm->pgd; |
125 | if (!pgdir) | 126 | if (!pgdir) |
126 | return -EFAULT; | 127 | return -EFAULT; |
127 | 128 | ||
129 | local_irq_save(flags); | ||
128 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); | 130 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); |
131 | if (!ptep) | ||
132 | goto err_out; | ||
129 | if (!shift) | 133 | if (!shift) |
130 | shift = PAGE_SHIFT; | 134 | shift = PAGE_SHIFT; |
131 | 135 | ||
132 | /* align address to page boundary */ | 136 | /* align address to page boundary */ |
133 | offset = addr & ((1UL << shift) - 1); | 137 | offset = addr & ((1UL << shift) - 1); |
134 | addr -= offset; | ||
135 | 138 | ||
136 | if (ptep == NULL) | 139 | pte = READ_ONCE(*ptep); |
137 | return -EFAULT; | ||
138 | pte = *ptep; | ||
139 | if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) | 140 | if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) |
140 | return -EFAULT; | 141 | goto err_out; |
141 | pfn = pte_pfn(pte); | 142 | pfn = pte_pfn(pte); |
142 | if (!page_is_ram(pfn)) | 143 | if (!page_is_ram(pfn)) |
143 | return -EFAULT; | 144 | goto err_out; |
144 | 145 | ||
145 | /* no highmem to worry about here */ | 146 | /* no highmem to worry about here */ |
146 | kaddr = pfn_to_kaddr(pfn); | 147 | kaddr = pfn_to_kaddr(pfn); |
147 | memcpy(ret, kaddr + offset, nb); | 148 | memcpy(buf, kaddr + offset, nb); |
148 | return 0; | 149 | ret = 0; |
150 | err_out: | ||
151 | local_irq_restore(flags); | ||
152 | return ret; | ||
149 | } | 153 | } |
150 | 154 | ||
151 | static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) | 155 | static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) |