diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 16:23:15 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-26 16:23:15 -0400 |
| commit | 63905bba5b0170492777b327ac5e2aaef64989d6 (patch) | |
| tree | cb60404ffedfa7d842c1e34fa965de57715182a6 /arch/powerpc | |
| parent | eadf16a912b6bdf8bd476bde2f19fb41d06e0c3b (diff) | |
| parent | 2e826695d87c2d213def07bc344ae97d88384f62 (diff) | |
Merge tag 'powerpc-4.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc fixes from Michael Ellerman:
- fix for mm_dec_nr_pmds() from Scott.
- fixes for oopses seen with KVM + THP from Aneesh.
- build fixes from Aneesh & Shreyas.
* tag 'powerpc-4.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux:
powerpc/mm: Fix build error with CONFIG_PPC_TRANSACTIONAL_MEM disabled
powerpc/kvm: Fix ppc64_defconfig + PPC_POWERNV=n build error
powerpc/mm/thp: Return pte address if we find trans_splitting.
powerpc/mm/thp: Make page table walk safe against thp split/collapse
KVM: PPC: Remove page table walk helpers
KVM: PPC: Use READ_ONCE when dereferencing pte_t pointer
powerpc/hugetlb: Call mm_dec_nr_pmds() in hugetlb_free_pmd_range()
Diffstat (limited to 'arch/powerpc')
| -rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_64.h | 17 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/pgtable.h | 28 | ||||
| -rw-r--r-- | arch/powerpc/kernel/eeh.c | 6 | ||||
| -rw-r--r-- | arch/powerpc/kernel/io-workarounds.c | 10 | ||||
| -rw-r--r-- | arch/powerpc/kvm/Kconfig | 2 | ||||
| -rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 14 | ||||
| -rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 86 | ||||
| -rw-r--r-- | arch/powerpc/kvm/e500_mmu_host.c | 32 | ||||
| -rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 3 | ||||
| -rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 32 | ||||
| -rw-r--r-- | arch/powerpc/perf/callchain.c | 24 |
11 files changed, 137 insertions, 117 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 7ae407941be2..3536d12eb798 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
| @@ -295,16 +295,17 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) | |||
| 295 | 295 | ||
| 296 | /* | 296 | /* |
| 297 | * If it's present and writable, atomically set dirty and referenced bits and | 297 | * If it's present and writable, atomically set dirty and referenced bits and |
| 298 | * return the PTE, otherwise return 0. If we find a transparent hugepage | 298 | * return the PTE, otherwise return 0. |
| 299 | * and if it is marked splitting we return 0; | ||
| 300 | */ | 299 | */ |
| 301 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, | 300 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing) |
| 302 | unsigned int hugepage) | ||
| 303 | { | 301 | { |
| 304 | pte_t old_pte, new_pte = __pte(0); | 302 | pte_t old_pte, new_pte = __pte(0); |
| 305 | 303 | ||
| 306 | while (1) { | 304 | while (1) { |
| 307 | old_pte = *ptep; | 305 | /* |
| 306 | * Make sure we don't reload from ptep | ||
| 307 | */ | ||
| 308 | old_pte = READ_ONCE(*ptep); | ||
| 308 | /* | 309 | /* |
| 309 | * wait until _PAGE_BUSY is clear then set it atomically | 310 | * wait until _PAGE_BUSY is clear then set it atomically |
| 310 | */ | 311 | */ |
| @@ -312,12 +313,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, | |||
| 312 | cpu_relax(); | 313 | cpu_relax(); |
| 313 | continue; | 314 | continue; |
| 314 | } | 315 | } |
| 315 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 316 | /* If hugepage and is trans splitting return None */ | ||
| 317 | if (unlikely(hugepage && | ||
| 318 | pmd_trans_splitting(pte_pmd(old_pte)))) | ||
| 319 | return __pte(0); | ||
| 320 | #endif | ||
| 321 | /* If pte is not present return None */ | 316 | /* If pte is not present return None */ |
| 322 | if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) | 317 | if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT))) |
| 323 | return __pte(0); | 318 | return __pte(0); |
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 9835ac4173b7..11a38635dd65 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h | |||
| @@ -247,28 +247,16 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | |||
| 247 | #define pmd_large(pmd) 0 | 247 | #define pmd_large(pmd) 0 |
| 248 | #define has_transparent_hugepage() 0 | 248 | #define has_transparent_hugepage() 0 |
| 249 | #endif | 249 | #endif |
| 250 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | 250 | pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, |
| 251 | unsigned *shift); | 251 | unsigned *shift); |
| 252 | 252 | static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, | |
| 253 | static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva, | 253 | unsigned *shift) |
| 254 | unsigned long *pte_sizep) | ||
| 255 | { | 254 | { |
| 256 | pte_t *ptep; | 255 | if (!arch_irqs_disabled()) { |
| 257 | unsigned long ps = *pte_sizep; | 256 | pr_info("%s called with irq enabled\n", __func__); |
| 258 | unsigned int shift; | 257 | dump_stack(); |
| 259 | 258 | } | |
| 260 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); | 259 | return __find_linux_pte_or_hugepte(pgdir, ea, shift); |
| 261 | if (!ptep) | ||
| 262 | return NULL; | ||
| 263 | if (shift) | ||
| 264 | *pte_sizep = 1ul << shift; | ||
| 265 | else | ||
| 266 | *pte_sizep = PAGE_SIZE; | ||
| 267 | |||
| 268 | if (ps > *pte_sizep) | ||
| 269 | return NULL; | ||
| 270 | |||
| 271 | return ptep; | ||
| 272 | } | 260 | } |
| 273 | #endif /* __ASSEMBLY__ */ | 261 | #endif /* __ASSEMBLY__ */ |
| 274 | 262 | ||
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index a4c62eb0ee48..44b480e3a5af 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c | |||
| @@ -334,9 +334,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) | |||
| 334 | int hugepage_shift; | 334 | int hugepage_shift; |
| 335 | 335 | ||
| 336 | /* | 336 | /* |
| 337 | * We won't find hugepages here, iomem | 337 | * We won't find hugepages here(this is iomem). Hence we are not |
| 338 | * worried about _PAGE_SPLITTING/collapse. Also we will not hit | ||
| 339 | * page table free, because of init_mm. | ||
| 338 | */ | 340 | */ |
| 339 | ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); | 341 | ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); |
| 340 | if (!ptep) | 342 | if (!ptep) |
| 341 | return token; | 343 | return token; |
| 342 | WARN_ON(hugepage_shift); | 344 | WARN_ON(hugepage_shift); |
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 24b968f8e4d8..63d9cc4d7366 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c | |||
| @@ -71,15 +71,15 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) | |||
| 71 | vaddr = (unsigned long)PCI_FIX_ADDR(addr); | 71 | vaddr = (unsigned long)PCI_FIX_ADDR(addr); |
| 72 | if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) | 72 | if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) |
| 73 | return NULL; | 73 | return NULL; |
| 74 | 74 | /* | |
| 75 | ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr, | 75 | * We won't find huge pages here (iomem). Also can't hit |
| 76 | * a page table free due to init_mm | ||
| 77 | */ | ||
| 78 | ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr, | ||
| 76 | &hugepage_shift); | 79 | &hugepage_shift); |
| 77 | if (ptep == NULL) | 80 | if (ptep == NULL) |
| 78 | paddr = 0; | 81 | paddr = 0; |
| 79 | else { | 82 | else { |
| 80 | /* | ||
| 81 | * we don't have hugepages backing iomem | ||
| 82 | */ | ||
| 83 | WARN_ON(hugepage_shift); | 83 | WARN_ON(hugepage_shift); |
| 84 | paddr = pte_pfn(*ptep) << PAGE_SHIFT; | 84 | paddr = pte_pfn(*ptep) << PAGE_SHIFT; |
| 85 | } | 85 | } |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 2963e4dd0b80..3caec2c42105 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
| @@ -75,7 +75,7 @@ config KVM_BOOK3S_64 | |||
| 75 | 75 | ||
| 76 | config KVM_BOOK3S_64_HV | 76 | config KVM_BOOK3S_64_HV |
| 77 | tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" | 77 | tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" |
| 78 | depends on KVM_BOOK3S_64 | 78 | depends on KVM_BOOK3S_64 && PPC_POWERNV |
| 79 | select KVM_BOOK3S_HV_POSSIBLE | 79 | select KVM_BOOK3S_HV_POSSIBLE |
| 80 | select MMU_NOTIFIER | 80 | select MMU_NOTIFIER |
| 81 | select CMA | 81 | select CMA |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d6fe30835c58..1a4acf8bf4f4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
| @@ -535,23 +535,21 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 535 | } | 535 | } |
| 536 | /* if the guest wants write access, see if that is OK */ | 536 | /* if the guest wants write access, see if that is OK */ |
| 537 | if (!writing && hpte_is_writable(r)) { | 537 | if (!writing && hpte_is_writable(r)) { |
| 538 | unsigned int hugepage_shift; | ||
| 539 | pte_t *ptep, pte; | 538 | pte_t *ptep, pte; |
| 540 | 539 | unsigned long flags; | |
| 541 | /* | 540 | /* |
| 542 | * We need to protect against page table destruction | 541 | * We need to protect against page table destruction |
| 543 | * while looking up and updating the pte. | 542 | * hugepage split and collapse. |
| 544 | */ | 543 | */ |
| 545 | rcu_read_lock_sched(); | 544 | local_irq_save(flags); |
| 546 | ptep = find_linux_pte_or_hugepte(current->mm->pgd, | 545 | ptep = find_linux_pte_or_hugepte(current->mm->pgd, |
| 547 | hva, &hugepage_shift); | 546 | hva, NULL); |
| 548 | if (ptep) { | 547 | if (ptep) { |
| 549 | pte = kvmppc_read_update_linux_pte(ptep, 1, | 548 | pte = kvmppc_read_update_linux_pte(ptep, 1); |
| 550 | hugepage_shift); | ||
| 551 | if (pte_write(pte)) | 549 | if (pte_write(pte)) |
| 552 | write_ok = 1; | 550 | write_ok = 1; |
| 553 | } | 551 | } |
| 554 | rcu_read_unlock_sched(); | 552 | local_irq_restore(flags); |
| 555 | } | 553 | } |
| 556 | } | 554 | } |
| 557 | 555 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index f6bf0b1de6d7..b027a89737b6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
| @@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x) | |||
| 26 | { | 26 | { |
| 27 | unsigned long addr = (unsigned long) x; | 27 | unsigned long addr = (unsigned long) x; |
| 28 | pte_t *p; | 28 | pte_t *p; |
| 29 | 29 | /* | |
| 30 | p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); | 30 | * assume we don't have huge pages in vmalloc space... |
| 31 | * So don't worry about THP collapse/split. Called | ||
| 32 | * Only in realmode, hence won't need irq_save/restore. | ||
| 33 | */ | ||
| 34 | p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); | ||
| 31 | if (!p || !pte_present(*p)) | 35 | if (!p || !pte_present(*p)) |
| 32 | return NULL; | 36 | return NULL; |
| 33 | /* assume we don't have huge pages in vmalloc space... */ | ||
| 34 | addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); | 37 | addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); |
| 35 | return __va(addr); | 38 | return __va(addr); |
| 36 | } | 39 | } |
| @@ -131,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, | |||
| 131 | unlock_rmap(rmap); | 134 | unlock_rmap(rmap); |
| 132 | } | 135 | } |
| 133 | 136 | ||
| 134 | static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva, | ||
| 135 | int writing, unsigned long *pte_sizep) | ||
| 136 | { | ||
| 137 | pte_t *ptep; | ||
| 138 | unsigned long ps = *pte_sizep; | ||
| 139 | unsigned int hugepage_shift; | ||
| 140 | |||
| 141 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift); | ||
| 142 | if (!ptep) | ||
| 143 | return __pte(0); | ||
| 144 | if (hugepage_shift) | ||
| 145 | *pte_sizep = 1ul << hugepage_shift; | ||
| 146 | else | ||
| 147 | *pte_sizep = PAGE_SIZE; | ||
| 148 | if (ps > *pte_sizep) | ||
| 149 | return __pte(0); | ||
| 150 | return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); | ||
| 151 | } | ||
| 152 | |||
| 153 | long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | 137 | long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, |
| 154 | long pte_index, unsigned long pteh, unsigned long ptel, | 138 | long pte_index, unsigned long pteh, unsigned long ptel, |
| 155 | pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) | 139 | pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) |
| @@ -160,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
| 160 | struct revmap_entry *rev; | 144 | struct revmap_entry *rev; |
| 161 | unsigned long g_ptel; | 145 | unsigned long g_ptel; |
| 162 | struct kvm_memory_slot *memslot; | 146 | struct kvm_memory_slot *memslot; |
| 163 | unsigned long pte_size; | 147 | unsigned hpage_shift; |
| 164 | unsigned long is_io; | 148 | unsigned long is_io; |
| 165 | unsigned long *rmap; | 149 | unsigned long *rmap; |
| 166 | pte_t pte; | 150 | pte_t *ptep; |
| 167 | unsigned int writing; | 151 | unsigned int writing; |
| 168 | unsigned long mmu_seq; | 152 | unsigned long mmu_seq; |
| 169 | unsigned long rcbits; | 153 | unsigned long rcbits, irq_flags = 0; |
| 170 | 154 | ||
| 171 | psize = hpte_page_size(pteh, ptel); | 155 | psize = hpte_page_size(pteh, ptel); |
| 172 | if (!psize) | 156 | if (!psize) |
| @@ -202,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
| 202 | 186 | ||
| 203 | /* Translate to host virtual address */ | 187 | /* Translate to host virtual address */ |
| 204 | hva = __gfn_to_hva_memslot(memslot, gfn); | 188 | hva = __gfn_to_hva_memslot(memslot, gfn); |
| 205 | 189 | /* | |
| 206 | /* Look up the Linux PTE for the backing page */ | 190 | * If we had a page table table change after lookup, we would |
| 207 | pte_size = psize; | 191 | * retry via mmu_notifier_retry. |
| 208 | pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); | 192 | */ |
| 209 | if (pte_present(pte) && !pte_protnone(pte)) { | 193 | if (realmode) |
| 210 | if (writing && !pte_write(pte)) | 194 | ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift); |
| 211 | /* make the actual HPTE be read-only */ | 195 | else { |
| 212 | ptel = hpte_make_readonly(ptel); | 196 | local_irq_save(irq_flags); |
| 213 | is_io = hpte_cache_bits(pte_val(pte)); | 197 | ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift); |
| 214 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
| 215 | pa |= hva & (pte_size - 1); | ||
| 216 | pa |= gpa & ~PAGE_MASK; | ||
| 217 | } | 198 | } |
| 199 | if (ptep) { | ||
| 200 | pte_t pte; | ||
| 201 | unsigned int host_pte_size; | ||
| 218 | 202 | ||
| 219 | if (pte_size < psize) | 203 | if (hpage_shift) |
| 220 | return H_PARAMETER; | 204 | host_pte_size = 1ul << hpage_shift; |
| 205 | else | ||
| 206 | host_pte_size = PAGE_SIZE; | ||
| 207 | /* | ||
| 208 | * We should always find the guest page size | ||
| 209 | * to <= host page size, if host is using hugepage | ||
| 210 | */ | ||
| 211 | if (host_pte_size < psize) { | ||
| 212 | if (!realmode) | ||
| 213 | local_irq_restore(flags); | ||
| 214 | return H_PARAMETER; | ||
| 215 | } | ||
| 216 | pte = kvmppc_read_update_linux_pte(ptep, writing); | ||
| 217 | if (pte_present(pte) && !pte_protnone(pte)) { | ||
| 218 | if (writing && !pte_write(pte)) | ||
| 219 | /* make the actual HPTE be read-only */ | ||
| 220 | ptel = hpte_make_readonly(ptel); | ||
| 221 | is_io = hpte_cache_bits(pte_val(pte)); | ||
| 222 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
| 223 | pa |= hva & (host_pte_size - 1); | ||
| 224 | pa |= gpa & ~PAGE_MASK; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | if (!realmode) | ||
| 228 | local_irq_restore(irq_flags); | ||
| 221 | 229 | ||
| 222 | ptel &= ~(HPTE_R_PP0 - psize); | 230 | ptel &= ~(HPTE_R_PP0 - psize); |
| 223 | ptel |= pa; | 231 | ptel |= pa; |
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index cc536d4a75ef..4d33e199edcc 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c | |||
| @@ -338,6 +338,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
| 338 | pte_t *ptep; | 338 | pte_t *ptep; |
| 339 | unsigned int wimg = 0; | 339 | unsigned int wimg = 0; |
| 340 | pgd_t *pgdir; | 340 | pgd_t *pgdir; |
| 341 | unsigned long flags; | ||
| 341 | 342 | ||
| 342 | /* used to check for invalidations in progress */ | 343 | /* used to check for invalidations in progress */ |
| 343 | mmu_seq = kvm->mmu_notifier_seq; | 344 | mmu_seq = kvm->mmu_notifier_seq; |
| @@ -468,15 +469,28 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
| 468 | 469 | ||
| 469 | 470 | ||
| 470 | pgdir = vcpu_e500->vcpu.arch.pgdir; | 471 | pgdir = vcpu_e500->vcpu.arch.pgdir; |
| 471 | ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages); | 472 | /* |
| 472 | if (pte_present(*ptep)) | 473 | * We are just looking at the wimg bits, so we don't |
| 473 | wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; | 474 | * care much about the trans splitting bit. |
| 474 | else { | 475 | * We are holding kvm->mmu_lock so a notifier invalidate |
| 475 | if (printk_ratelimit()) | 476 | * can't run hence pfn won't change. |
| 476 | pr_err("%s: pte not present: gfn %lx, pfn %lx\n", | 477 | */ |
| 477 | __func__, (long)gfn, pfn); | 478 | local_irq_save(flags); |
| 478 | ret = -EINVAL; | 479 | ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL); |
| 479 | goto out; | 480 | if (ptep) { |
| 481 | pte_t pte = READ_ONCE(*ptep); | ||
| 482 | |||
| 483 | if (pte_present(pte)) { | ||
| 484 | wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) & | ||
| 485 | MAS2_WIMGE_MASK; | ||
| 486 | local_irq_restore(flags); | ||
| 487 | } else { | ||
| 488 | local_irq_restore(flags); | ||
| 489 | pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n", | ||
| 490 | __func__, (long)gfn, pfn); | ||
| 491 | ret = -EINVAL; | ||
| 492 | goto out; | ||
| 493 | } | ||
| 480 | } | 494 | } |
| 481 | kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); | 495 | kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); |
| 482 | 496 | ||
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2c2022d16059..fda236f908eb 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
| @@ -1066,7 +1066,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, | |||
| 1066 | #endif /* CONFIG_PPC_64K_PAGES */ | 1066 | #endif /* CONFIG_PPC_64K_PAGES */ |
| 1067 | 1067 | ||
| 1068 | /* Get PTE and page size from page tables */ | 1068 | /* Get PTE and page size from page tables */ |
| 1069 | ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); | 1069 | ptep = __find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); |
| 1070 | if (ptep == NULL || !pte_present(*ptep)) { | 1070 | if (ptep == NULL || !pte_present(*ptep)) { |
| 1071 | DBG_LOW(" no PTE !\n"); | 1071 | DBG_LOW(" no PTE !\n"); |
| 1072 | rc = 1; | 1072 | rc = 1; |
| @@ -1394,6 +1394,7 @@ tm_abort: | |||
| 1394 | tm_abort(TM_CAUSE_TLBI); | 1394 | tm_abort(TM_CAUSE_TLBI); |
| 1395 | } | 1395 | } |
| 1396 | #endif | 1396 | #endif |
| 1397 | return; | ||
| 1397 | } | 1398 | } |
| 1398 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1399 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
| 1399 | 1400 | ||
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fa9d5c238d22..0ce968b00b7c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
| @@ -109,7 +109,7 @@ int pgd_huge(pgd_t pgd) | |||
| 109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
| 110 | { | 110 | { |
| 111 | /* Only called for hugetlbfs pages, hence can ignore THP */ | 111 | /* Only called for hugetlbfs pages, hence can ignore THP */ |
| 112 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | 112 | return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL); |
| 113 | } | 113 | } |
| 114 | 114 | ||
| 115 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 115 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
| @@ -581,6 +581,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
| 581 | pmd = pmd_offset(pud, start); | 581 | pmd = pmd_offset(pud, start); |
| 582 | pud_clear(pud); | 582 | pud_clear(pud); |
| 583 | pmd_free_tlb(tlb, pmd, start); | 583 | pmd_free_tlb(tlb, pmd, start); |
| 584 | mm_dec_nr_pmds(tlb->mm); | ||
| 584 | } | 585 | } |
| 585 | 586 | ||
| 586 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | 587 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, |
| @@ -681,28 +682,35 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
| 681 | } while (addr = next, addr != end); | 682 | } while (addr = next, addr != end); |
| 682 | } | 683 | } |
| 683 | 684 | ||
| 685 | /* | ||
| 686 | * We are holding mmap_sem, so a parallel huge page collapse cannot run. | ||
| 687 | * To prevent hugepage split, disable irq. | ||
| 688 | */ | ||
| 684 | struct page * | 689 | struct page * |
| 685 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 690 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
| 686 | { | 691 | { |
| 687 | pte_t *ptep; | 692 | pte_t *ptep; |
| 688 | struct page *page; | 693 | struct page *page; |
| 689 | unsigned shift; | 694 | unsigned shift; |
| 690 | unsigned long mask; | 695 | unsigned long mask, flags; |
| 691 | /* | 696 | /* |
| 692 | * Transparent hugepages are handled by generic code. We can skip them | 697 | * Transparent hugepages are handled by generic code. We can skip them |
| 693 | * here. | 698 | * here. |
| 694 | */ | 699 | */ |
| 700 | local_irq_save(flags); | ||
| 695 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | 701 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); |
| 696 | 702 | ||
| 697 | /* Verify it is a huge page else bail. */ | 703 | /* Verify it is a huge page else bail. */ |
| 698 | if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) | 704 | if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) { |
| 705 | local_irq_restore(flags); | ||
| 699 | return ERR_PTR(-EINVAL); | 706 | return ERR_PTR(-EINVAL); |
| 700 | 707 | } | |
| 701 | mask = (1UL << shift) - 1; | 708 | mask = (1UL << shift) - 1; |
| 702 | page = pte_page(*ptep); | 709 | page = pte_page(*ptep); |
| 703 | if (page) | 710 | if (page) |
| 704 | page += (address & mask) / PAGE_SIZE; | 711 | page += (address & mask) / PAGE_SIZE; |
| 705 | 712 | ||
| 713 | local_irq_restore(flags); | ||
| 706 | return page; | 714 | return page; |
| 707 | } | 715 | } |
| 708 | 716 | ||
| @@ -949,9 +957,12 @@ void flush_dcache_icache_hugepage(struct page *page) | |||
| 949 | * | 957 | * |
| 950 | * So long as we atomically load page table pointers we are safe against teardown, | 958 | * So long as we atomically load page table pointers we are safe against teardown, |
| 951 | * we can follow the address down to the the page and take a ref on it. | 959 | * we can follow the address down to the the page and take a ref on it. |
| 960 | * This function need to be called with interrupts disabled. We use this variant | ||
| 961 | * when we have MSR[EE] = 0 but the paca->soft_enabled = 1 | ||
| 952 | */ | 962 | */ |
| 953 | 963 | ||
| 954 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | 964 | pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, |
| 965 | unsigned *shift) | ||
| 955 | { | 966 | { |
| 956 | pgd_t pgd, *pgdp; | 967 | pgd_t pgd, *pgdp; |
| 957 | pud_t pud, *pudp; | 968 | pud_t pud, *pudp; |
| @@ -1003,12 +1014,11 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift | |||
| 1003 | * A hugepage collapse is captured by pmd_none, because | 1014 | * A hugepage collapse is captured by pmd_none, because |
| 1004 | * it mark the pmd none and do a hpte invalidate. | 1015 | * it mark the pmd none and do a hpte invalidate. |
| 1005 | * | 1016 | * |
| 1006 | * A hugepage split is captured by pmd_trans_splitting | 1017 | * We don't worry about pmd_trans_splitting here, The |
| 1007 | * because we mark the pmd trans splitting and do a | 1018 | * caller if it needs to handle the splitting case |
| 1008 | * hpte invalidate | 1019 | * should check for that. |
| 1009 | * | ||
| 1010 | */ | 1020 | */ |
| 1011 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | 1021 | if (pmd_none(pmd)) |
| 1012 | return NULL; | 1022 | return NULL; |
| 1013 | 1023 | ||
| 1014 | if (pmd_huge(pmd) || pmd_large(pmd)) { | 1024 | if (pmd_huge(pmd) || pmd_large(pmd)) { |
| @@ -1030,7 +1040,7 @@ out: | |||
| 1030 | *shift = pdshift; | 1040 | *shift = pdshift; |
| 1031 | return ret_pte; | 1041 | return ret_pte; |
| 1032 | } | 1042 | } |
| 1033 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | 1043 | EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte); |
| 1034 | 1044 | ||
| 1035 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | 1045 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, |
| 1036 | unsigned long end, int write, struct page **pages, int *nr) | 1046 | unsigned long end, int write, struct page **pages, int *nr) |
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index ead55351b254..ff09cde20cd2 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c | |||
| @@ -111,41 +111,45 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
| 111 | * interrupt context, so if the access faults, we read the page tables | 111 | * interrupt context, so if the access faults, we read the page tables |
| 112 | * to find which page (if any) is mapped and access it directly. | 112 | * to find which page (if any) is mapped and access it directly. |
| 113 | */ | 113 | */ |
| 114 | static int read_user_stack_slow(void __user *ptr, void *ret, int nb) | 114 | static int read_user_stack_slow(void __user *ptr, void *buf, int nb) |
| 115 | { | 115 | { |
| 116 | int ret = -EFAULT; | ||
| 116 | pgd_t *pgdir; | 117 | pgd_t *pgdir; |
| 117 | pte_t *ptep, pte; | 118 | pte_t *ptep, pte; |
| 118 | unsigned shift; | 119 | unsigned shift; |
| 119 | unsigned long addr = (unsigned long) ptr; | 120 | unsigned long addr = (unsigned long) ptr; |
| 120 | unsigned long offset; | 121 | unsigned long offset; |
| 121 | unsigned long pfn; | 122 | unsigned long pfn, flags; |
| 122 | void *kaddr; | 123 | void *kaddr; |
| 123 | 124 | ||
| 124 | pgdir = current->mm->pgd; | 125 | pgdir = current->mm->pgd; |
| 125 | if (!pgdir) | 126 | if (!pgdir) |
| 126 | return -EFAULT; | 127 | return -EFAULT; |
| 127 | 128 | ||
| 129 | local_irq_save(flags); | ||
| 128 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); | 130 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); |
| 131 | if (!ptep) | ||
| 132 | goto err_out; | ||
| 129 | if (!shift) | 133 | if (!shift) |
| 130 | shift = PAGE_SHIFT; | 134 | shift = PAGE_SHIFT; |
| 131 | 135 | ||
| 132 | /* align address to page boundary */ | 136 | /* align address to page boundary */ |
| 133 | offset = addr & ((1UL << shift) - 1); | 137 | offset = addr & ((1UL << shift) - 1); |
| 134 | addr -= offset; | ||
| 135 | 138 | ||
| 136 | if (ptep == NULL) | 139 | pte = READ_ONCE(*ptep); |
| 137 | return -EFAULT; | ||
| 138 | pte = *ptep; | ||
| 139 | if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) | 140 | if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) |
| 140 | return -EFAULT; | 141 | goto err_out; |
| 141 | pfn = pte_pfn(pte); | 142 | pfn = pte_pfn(pte); |
| 142 | if (!page_is_ram(pfn)) | 143 | if (!page_is_ram(pfn)) |
| 143 | return -EFAULT; | 144 | goto err_out; |
| 144 | 145 | ||
| 145 | /* no highmem to worry about here */ | 146 | /* no highmem to worry about here */ |
| 146 | kaddr = pfn_to_kaddr(pfn); | 147 | kaddr = pfn_to_kaddr(pfn); |
| 147 | memcpy(ret, kaddr + offset, nb); | 148 | memcpy(buf, kaddr + offset, nb); |
| 148 | return 0; | 149 | ret = 0; |
| 150 | err_out: | ||
| 151 | local_irq_restore(flags); | ||
| 152 | return ret; | ||
| 149 | } | 153 | } |
| 150 | 154 | ||
| 151 | static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) | 155 | static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) |
