aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-10 08:29:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-10 08:29:27 -0400
commitb970afcfcabd63cd3832e95db096439c177c3592 (patch)
treeb63e662c780e02617916f4c0269e2adddc67f5a0
parent8ea5b2abd07e2280a332bd9c1a7f4dd15b9b6c13 (diff)
parent8150a153c013aa2dd1ffae43370b89ac1347a7fb (diff)
Merge tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Slightly delayed due to the issue with printk() calling probe_kernel_read() interacting with our new user access prevention stuff, but all fixed now. The only out-of-area changes are the addition of a cpuhp_state, small additions to Documentation and MAINTAINERS updates. Highlights: - Support for Kernel Userspace Access/Execution Prevention (like SMAP/SMEP/PAN/PXN) on some 64-bit and 32-bit CPUs. This prevents the kernel from accidentally accessing userspace outside copy_to/from_user(), or ever executing userspace. - KASAN support on 32-bit. - Rework of where we map the kernel, vmalloc, etc. on 64-bit hash to use the same address ranges we use with the Radix MMU. - A rewrite into C of large parts of our idle handling code for 64-bit Book3S (ie. power8 & power9). - A fast path entry for syscalls on 32-bit CPUs, for a 12-17% speedup in the null_syscall benchmark. - On 64-bit bare metal we have support for recovering from errors with the time base (our clocksource), however if that fails currently we hang in __delay() and never crash. We now have support for detecting that case and short circuiting __delay() so we at least panic() and reboot. - Add support for optionally enabling the DAWR on Power9, which had to be disabled by default due to a hardware erratum. This has the effect of enabling hardware breakpoints for GDB, the downside is a badly behaved program could crash the machine by pointing the DAWR at cache inhibited memory. This is opt-in obviously. - xmon, our crash handler, gets support for a read only mode where operations that could change memory or otherwise disturb the system are disabled. Plus many clean-ups, reworks and minor fixes etc. Thanks to: Christophe Leroy, Akshay Adiga, Alastair D'Silva, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anton Blanchard, Ben Hutchings, Bo YU, Breno Leitao, Cédric Le Goater, Christopher M. Riedl, Christoph Hellwig, Colin Ian King, David Gibson, Ganesh Goudar, Gautham R. Shenoy, George Spelvin, Greg Kroah-Hartman, Greg Kurz, Horia Geantă, Jagadeesh Pagadala, Joel Stanley, Joe Perches, Julia Lawall, Laurentiu Tudor, Laurent Vivier, Lukas Bulwahn, Madhavan Srinivasan, Mahesh Salgaonkar, Mathieu Malaterre, Michael Neuling, Mukesh Ojha, Nathan Fontenot, Nathan Lynch, Nicholas Piggin, Nick Desaulniers, Oliver O'Halloran, Peng Hao, Qian Cai, Ravi Bangoria, Rick Lindsley, Russell Currey, Sachin Sant, Stewart Smith, Sukadev Bhattiprolu, Thomas Huth, Tobin C. Harding, Tyrel Datwyler, Valentin Schneider, Wei Yongjun, Wen Yang, YueHaibing" * tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (205 commits) powerpc/64s: Use early_mmu_has_feature() in set_kuap() powerpc/book3s/64: check for NULL pointer in pgd_alloc() powerpc/mm: Fix hugetlb page initialization ocxl: Fix return value check in afu_ioctl() powerpc/mm: fix section mismatch for setup_kup() powerpc/mm: fix redundant inclusion of pgtable-frag.o in Makefile powerpc/mm: Fix makefile for KASAN powerpc/kasan: add missing/lost Makefile selftests/powerpc: Add a signal fuzzer selftest powerpc/booke64: set RI in default MSR ocxl: Provide global MMIO accessors for external drivers ocxl: move event_fd handling to frontend ocxl: afu_irq only deals with IRQ IDs, not offsets ocxl: Allow external drivers to use OpenCAPI contexts ocxl: Create a clear delineation between ocxl backend & frontend ocxl: Don't pass pci_dev around ocxl: Split pci.c ocxl: Remove some unused exported symbols ocxl: Remove superfluous 'extern' from headers ocxl: read_pasid never returns an error, so make it void ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--Documentation/powerpc/DAWR-POWER9.txt40
-rw-r--r--MAINTAINERS13
-rw-r--r--arch/powerpc/Kconfig10
-rw-r--r--arch/powerpc/Kconfig.debug32
-rw-r--r--arch/powerpc/Makefile15
-rw-r--r--arch/powerpc/boot/addnote.c6
-rw-r--r--arch/powerpc/boot/dts/fsl/b4qds.dtsi1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig2
-rw-r--r--arch/powerpc/crypto/crc-vpmsum_test.c10
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h145
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h9
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h41
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h21
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h95
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h72
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup-radix.h108
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h70
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h104
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h52
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h13
-rw-r--r--arch/powerpc/include/asm/cpuidle.h19
-rw-r--r--arch/powerpc/include/asm/drmem.h21
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/include/asm/fadump.h1
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h3
-rw-r--r--arch/powerpc/include/asm/fixmap.h5
-rw-r--r--arch/powerpc/include/asm/futex.h4
-rw-r--r--arch/powerpc/include/asm/hugetlb.h87
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h8
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h39
-rw-r--r--arch/powerpc/include/asm/kasan.h40
-rw-r--r--arch/powerpc/include/asm/kup.h73
-rw-r--r--arch/powerpc/include/asm/mce.h97
-rw-r--r--arch/powerpc/include/asm/mmu.h28
-rw-r--r--arch/powerpc/include/asm/mmu_context.h7
-rw-r--r--arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h44
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h58
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h102
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu.h25
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h123
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/nohash/32/slice.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/mmu.h12
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h117
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/nohash/64/slice.h12
-rw-r--r--arch/powerpc/include/asm/nohash/hugetlb-book3e.h45
-rw-r--r--arch/powerpc/include/asm/nohash/mmu-book3e.h2
-rw-r--r--arch/powerpc/include/asm/nohash/mmu.h16
-rw-r--r--arch/powerpc/include/asm/nohash/pgalloc.h56
-rw-r--r--arch/powerpc/include/asm/nohash/pte-book3e.h5
-rw-r--r--arch/powerpc/include/asm/opal-api.h18
-rw-r--r--arch/powerpc/include/asm/opal.h9
-rw-r--r--arch/powerpc/include/asm/paca.h40
-rw-r--r--arch/powerpc/include/asm/page.h23
-rw-r--r--arch/powerpc/include/asm/pgalloc.h51
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h9
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h9
-rw-r--r--arch/powerpc/include/asm/pgtable.h9
-rw-r--r--arch/powerpc/include/asm/processor.h12
-rw-r--r--arch/powerpc/include/asm/ptrace.h11
-rw-r--r--arch/powerpc/include/asm/reg.h8
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/asm/slice.h9
-rw-r--r--arch/powerpc/include/asm/sparsemem.h4
-rw-r--r--arch/powerpc/include/asm/string.h32
-rw-r--r--arch/powerpc/include/asm/task_size_64.h2
-rw-r--r--arch/powerpc/include/asm/time.h2
-rw-r--r--arch/powerpc/include/asm/trace.h16
-rw-r--r--arch/powerpc/include/asm/uaccess.h38
-rw-r--r--arch/powerpc/include/asm/xive.h14
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/asm-offsets.c25
-rw-r--r--arch/powerpc/kernel/cacheinfo.c13
-rw-r--r--arch/powerpc/kernel/cputable.c13
-rw-r--r--arch/powerpc/kernel/dbell.c3
-rw-r--r--arch/powerpc/kernel/early_32.c36
-rw-r--r--arch/powerpc/kernel/entry_32.S186
-rw-r--r--arch/powerpc/kernel/entry_64.S35
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S26
-rw-r--r--arch/powerpc/kernel/fadump.c1
-rw-r--r--arch/powerpc/kernel/fpu.S1
-rw-r--r--arch/powerpc/kernel/head_32.S258
-rw-r--r--arch/powerpc/kernel/head_32.h203
-rw-r--r--arch/powerpc/kernel/head_40x.S155
-rw-r--r--arch/powerpc/kernel/head_44x.S12
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/head_8xx.S136
-rw-r--r--arch/powerpc/kernel/head_booke.h131
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S32
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c62
-rw-r--r--arch/powerpc/kernel/idle_book3s.S1060
-rw-r--r--arch/powerpc/kernel/irq.c16
-rw-r--r--arch/powerpc/kernel/mce.c106
-rw-r--r--arch/powerpc/kernel/mce_power.c253
-rw-r--r--arch/powerpc/kernel/paca.c12
-rw-r--r--arch/powerpc/kernel/process.c35
-rw-r--r--arch/powerpc/kernel/prom_init.c248
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh12
-rw-r--r--arch/powerpc/kernel/ptrace.c3
-rw-r--r--arch/powerpc/kernel/security.c8
-rw-r--r--arch/powerpc/kernel/setup-common.c116
-rw-r--r--arch/powerpc/kernel/setup_32.c28
-rw-r--r--arch/powerpc/kernel/setup_64.c10
-rw-r--r--arch/powerpc/kernel/signal_64.c27
-rw-r--r--arch/powerpc/kernel/time.c10
-rw-r--r--arch/powerpc/kernel/traps.c8
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile5
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile5
-rw-r--r--arch/powerpc/kernel/vector.S1
-rw-r--r--arch/powerpc/kernel/watchdog.c81
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S141
-rw-r--r--arch/powerpc/lib/Makefile19
-rw-r--r--arch/powerpc/lib/checksum_wrappers.c4
-rw-r--r--arch/powerpc/lib/code-patching.c5
-rw-r--r--arch/powerpc/lib/copy_32.S12
-rw-r--r--arch/powerpc/lib/mem_64.S9
-rw-r--r--arch/powerpc/lib/memcpy_64.S4
-rw-r--r--arch/powerpc/mm/Makefile47
-rw-r--r--arch/powerpc/mm/book3s32/Makefile9
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S (renamed from arch/powerpc/mm/hash_low_32.S)6
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c (renamed from arch/powerpc/mm/ppc_mmu_32.c)76
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_hash32.c)0
-rw-r--r--arch/powerpc/mm/book3s32/tlb.c (renamed from arch/powerpc/mm/tlb_hash32.c)2
-rw-r--r--arch/powerpc/mm/book3s64/Makefile24
-rw-r--r--arch/powerpc/mm/book3s64/hash_4k.c (renamed from arch/powerpc/mm/hash64_4k.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_64k.c (renamed from arch/powerpc/mm/hash64_64k.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugepage.c (renamed from arch/powerpc/mm/hugepage-hash64.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-hash64.c)31
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c (renamed from arch/powerpc/mm/hash_native_64.c)0
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c (renamed from arch/powerpc/mm/pgtable-hash64.c)15
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c (renamed from arch/powerpc/mm/tlb_hash64.c)18
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c (renamed from arch/powerpc/mm/hash_utils_64.c)145
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c (renamed from arch/powerpc/mm/mmu_context_iommu.c)0
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_book3s64.c)29
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c (renamed from arch/powerpc/mm/pgtable-book3s64.c)2
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c (renamed from arch/powerpc/mm/pkeys.c)1
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-radix.c)0
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c (renamed from arch/powerpc/mm/pgtable-radix.c)117
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c (renamed from arch/powerpc/mm/tlb-radix.c)0
-rw-r--r--arch/powerpc/mm/book3s64/slb.c (renamed from arch/powerpc/mm/slb.c)31
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c (renamed from arch/powerpc/mm/subpage-prot.c)39
-rw-r--r--arch/powerpc/mm/book3s64/vphn.c (renamed from arch/powerpc/mm/vphn.c)6
-rw-r--r--arch/powerpc/mm/book3s64/vphn.h (renamed from arch/powerpc/mm/vphn.h)3
-rw-r--r--arch/powerpc/mm/copro_fault.c18
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/drmem.c6
-rw-r--r--arch/powerpc/mm/fault.c49
-rw-r--r--arch/powerpc/mm/highmem.c14
-rw-r--r--arch/powerpc/mm/hugetlbpage.c242
-rw-r--r--arch/powerpc/mm/init-common.c26
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/init_64.c2
-rw-r--r--arch/powerpc/mm/kasan/Makefile5
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c183
-rw-r--r--arch/powerpc/mm/mem.c17
-rw-r--r--arch/powerpc/mm/mmu_context.c2
-rw-r--r--arch/powerpc/mm/mmu_decl.h9
-rw-r--r--arch/powerpc/mm/nohash/40x.c (renamed from arch/powerpc/mm/40x_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/44x.c (renamed from arch/powerpc/mm/44x_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/8xx.c (renamed from arch/powerpc/mm/8xx_mmu.c)26
-rw-r--r--arch/powerpc/mm/nohash/Makefile18
-rw-r--r--arch/powerpc/mm/nohash/book3e_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-book3e.c)52
-rw-r--r--arch/powerpc/mm/nohash/book3e_pgtable.c (renamed from arch/powerpc/mm/pgtable-book3e.c)9
-rw-r--r--arch/powerpc/mm/nohash/fsl_booke.c (renamed from arch/powerpc/mm/fsl_booke_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_nohash.c)2
-rw-r--r--arch/powerpc/mm/nohash/tlb.c (renamed from arch/powerpc/mm/tlb_nohash.c)19
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S (renamed from arch/powerpc/mm/tlb_nohash_low.S)0
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S (renamed from arch/powerpc/mm/tlb_low_64e.S)31
-rw-r--r--arch/powerpc/mm/numa.c35
-rw-r--r--arch/powerpc/mm/pgtable.c114
-rw-r--r--arch/powerpc/mm/pgtable_32.c47
-rw-r--r--arch/powerpc/mm/pgtable_64.c13
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c2
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c86
-rw-r--r--arch/powerpc/mm/slice.c109
-rw-r--r--arch/powerpc/perf/Makefile3
-rw-r--r--arch/powerpc/perf/core-book3s.c28
-rw-r--r--arch/powerpc/perf/generic-compat-pmu.c234
-rw-r--r--arch/powerpc/perf/imc-pmu.c347
-rw-r--r--arch/powerpc/perf/internal.h12
-rw-r--r--arch/powerpc/perf/power5+-pmu.c4
-rw-r--r--arch/powerpc/perf/power5-pmu.c4
-rw-r--r--arch/powerpc/perf/power6-pmu.c4
-rw-r--r--arch/powerpc/perf/power7-pmu.c4
-rw-r--r--arch/powerpc/perf/power8-pmu.c3
-rw-r--r--arch/powerpc/perf/power9-events-list.h2
-rw-r--r--arch/powerpc/perf/power9-pmu.c3
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c4
-rw-r--r--arch/powerpc/platforms/83xx/usb.c4
-rw-r--r--arch/powerpc/platforms/8xx/pic.c3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype45
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c19
-rw-r--r--arch/powerpc/platforms/powermac/Makefile6
-rw-r--r--arch/powerpc/platforms/powernv/idle.c902
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c35
-rw-r--r--arch/powerpc/platforms/powernv/pci.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c5
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c17
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c13
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c3
-rw-r--r--arch/powerpc/platforms/pseries/ras.c135
-rw-r--r--arch/powerpc/purgatory/Makefile3
-rw-r--r--arch/powerpc/sysdev/xive/native.c99
-rw-r--r--arch/powerpc/xmon/Makefile1
-rw-r--r--arch/powerpc/xmon/xmon.c66
-rw-r--r--drivers/misc/cxl/fault.c2
-rw-r--r--drivers/misc/ocxl/Makefile3
-rw-r--r--drivers/misc/ocxl/afu_irq.c102
-rw-r--r--drivers/misc/ocxl/config.c13
-rw-r--r--drivers/misc/ocxl/context.c31
-rw-r--r--drivers/misc/ocxl/core.c574
-rw-r--r--drivers/misc/ocxl/file.c182
-rw-r--r--drivers/misc/ocxl/link.c42
-rw-r--r--drivers/misc/ocxl/mmio.c234
-rw-r--r--drivers/misc/ocxl/ocxl_internal.h94
-rw-r--r--drivers/misc/ocxl/pci.c565
-rw-r--r--drivers/misc/ocxl/sysfs.c54
-rw-r--r--drivers/misc/ocxl/trace.h12
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/misc/ocxl.h359
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/export.h1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/kasan.h0
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h1
-rw-r--r--tools/testing/selftests/powerpc/harness.c6
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h2
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/signal/sigfuz.c325
l---------tools/testing/selftests/powerpc/vphn/vphn.c2
l---------tools/testing/selftests/powerpc/vphn/vphn.h2
247 files changed, 7819 insertions, 4558 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1fe7e8c4f15..08df58805703 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2875,11 +2875,11 @@
2875 noexec=on: enable non-executable mappings (default) 2875 noexec=on: enable non-executable mappings (default)
2876 noexec=off: disable non-executable mappings 2876 noexec=off: disable non-executable mappings
2877 2877
2878 nosmap [X86] 2878 nosmap [X86,PPC]
2879 Disable SMAP (Supervisor Mode Access Prevention) 2879 Disable SMAP (Supervisor Mode Access Prevention)
2880 even if it is supported by processor. 2880 even if it is supported by processor.
2881 2881
2882 nosmep [X86] 2882 nosmep [X86,PPC]
2883 Disable SMEP (Supervisor Mode Execution Prevention) 2883 Disable SMEP (Supervisor Mode Execution Prevention)
2884 even if it is supported by processor. 2884 even if it is supported by processor.
2885 2885
diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/DAWR-POWER9.txt
index 2feaa6619658..ecdbb076438c 100644
--- a/Documentation/powerpc/DAWR-POWER9.txt
+++ b/Documentation/powerpc/DAWR-POWER9.txt
@@ -1,10 +1,10 @@
1DAWR issues on POWER9 1DAWR issues on POWER9
2============================ 2============================
3 3
4On POWER9 the DAWR can cause a checkstop if it points to cache 4On POWER9 the Data Address Watchpoint Register (DAWR) can cause a checkstop
5inhibited (CI) memory. Currently Linux has no way to disinguish CI 5if it points to cache inhibited (CI) memory. Currently Linux has no way to
6memory when configuring the DAWR, so (for now) the DAWR is disabled by 6disinguish CI memory when configuring the DAWR, so (for now) the DAWR is
7this commit: 7disabled by this commit:
8 8
9 commit 9654153158d3e0684a1bdb76dbababdb7111d5a0 9 commit 9654153158d3e0684a1bdb76dbababdb7111d5a0
10 Author: Michael Neuling <mikey@neuling.org> 10 Author: Michael Neuling <mikey@neuling.org>
@@ -56,3 +56,35 @@ POWER9. Loads and stores to the watchpoint locations will not be
56trapped in GDB. The watchpoint is remembered, so if the guest is 56trapped in GDB. The watchpoint is remembered, so if the guest is
57migrated back to the POWER8 host, it will start working again. 57migrated back to the POWER8 host, it will start working again.
58 58
59Force enabling the DAWR
60=============================
61Kernels (since ~v5.2) have an option to force enable the DAWR via:
62
63 echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
64
65This enables the DAWR even on POWER9.
66
67This is a dangerous setting, USE AT YOUR OWN RISK.
68
69Some users may not care about a bad user crashing their box
70(ie. single user/desktop systems) and really want the DAWR. This
71allows them to force enable DAWR.
72
73This flag can also be used to disable DAWR access. Once this is
74cleared, all DAWR access should be cleared immediately and your
75machine once again safe from crashing.
76
77Userspace may get confused by toggling this. If DAWR is force
78enabled/disabled between getting the number of breakpoints (via
79PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
80inconsistent view of what's available. Similarly for guests.
81
82For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
83enabled in the host AND the guest. For this reason, this won't work on
84POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
85dawr_enable_dangerous file will fail if the hypervisor doesn't support
86writing the DAWR.
87
88To double check the DAWR is working, run this kernel selftest:
89 tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
90Any errors/failures/skips mean something is wrong.
diff --git a/MAINTAINERS b/MAINTAINERS
index 960070e36bd9..d85f8ecc64a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4347,7 +4347,7 @@ F: drivers/net/ethernet/chelsio/cxgb4vf/
4347 4347
4348CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER 4348CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER
4349M: Frederic Barrat <fbarrat@linux.ibm.com> 4349M: Frederic Barrat <fbarrat@linux.ibm.com>
4350M: Andrew Donnellan <andrew.donnellan@au1.ibm.com> 4350M: Andrew Donnellan <ajd@linux.ibm.com>
4351L: linuxppc-dev@lists.ozlabs.org 4351L: linuxppc-dev@lists.ozlabs.org
4352S: Supported 4352S: Supported
4353F: arch/powerpc/platforms/powernv/pci-cxl.c 4353F: arch/powerpc/platforms/powernv/pci-cxl.c
@@ -7483,13 +7483,12 @@ S: Supported
7483F: drivers/net/ethernet/ibm/ibmvnic.* 7483F: drivers/net/ethernet/ibm/ibmvnic.*
7484 7484
7485IBM Power Virtual Accelerator Switchboard 7485IBM Power Virtual Accelerator Switchboard
7486M: Sukadev Bhattiprolu 7486M: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
7487L: linuxppc-dev@lists.ozlabs.org 7487L: linuxppc-dev@lists.ozlabs.org
7488S: Supported 7488S: Supported
7489F: arch/powerpc/platforms/powernv/vas* 7489F: arch/powerpc/platforms/powernv/vas*
7490F: arch/powerpc/platforms/powernv/copy-paste.h 7490F: arch/powerpc/platforms/powernv/copy-paste.h
7491F: arch/powerpc/include/asm/vas.h 7491F: arch/powerpc/include/asm/vas.h
7492F: arch/powerpc/include/uapi/asm/vas.h
7493 7492
7494IBM Power Virtual Ethernet Device Driver 7493IBM Power Virtual Ethernet Device Driver
7495M: Thomas Falcon <tlfalcon@linux.ibm.com> 7494M: Thomas Falcon <tlfalcon@linux.ibm.com>
@@ -7536,14 +7535,14 @@ F: drivers/crypto/vmx/ghash*
7536F: drivers/crypto/vmx/ppc-xlate.pl 7535F: drivers/crypto/vmx/ppc-xlate.pl
7537 7536
7538IBM Power PCI Hotplug Driver for RPA-compliant PPC64 platform 7537IBM Power PCI Hotplug Driver for RPA-compliant PPC64 platform
7539M: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> 7538M: Tyrel Datwyler <tyreld@linux.ibm.com>
7540L: linux-pci@vger.kernel.org 7539L: linux-pci@vger.kernel.org
7541L: linuxppc-dev@lists.ozlabs.org 7540L: linuxppc-dev@lists.ozlabs.org
7542S: Supported 7541S: Supported
7543F: drivers/pci/hotplug/rpaphp* 7542F: drivers/pci/hotplug/rpaphp*
7544 7543
7545IBM Power IO DLPAR Driver for RPA-compliant PPC64 platform 7544IBM Power IO DLPAR Driver for RPA-compliant PPC64 platform
7546M: Tyrel Datwyler <tyreld@linux.vnet.ibm.com> 7545M: Tyrel Datwyler <tyreld@linux.ibm.com>
7547L: linux-pci@vger.kernel.org 7546L: linux-pci@vger.kernel.org
7548L: linuxppc-dev@lists.ozlabs.org 7547L: linuxppc-dev@lists.ozlabs.org
7549S: Supported 7548S: Supported
@@ -10498,7 +10497,7 @@ F: arch/arm/mach-mmp/
10498 10497
10499MMU GATHER AND TLB INVALIDATION 10498MMU GATHER AND TLB INVALIDATION
10500M: Will Deacon <will.deacon@arm.com> 10499M: Will Deacon <will.deacon@arm.com>
10501M: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> 10500M: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
10502M: Andrew Morton <akpm@linux-foundation.org> 10501M: Andrew Morton <akpm@linux-foundation.org>
10503M: Nick Piggin <npiggin@gmail.com> 10502M: Nick Piggin <npiggin@gmail.com>
10504M: Peter Zijlstra <peterz@infradead.org> 10503M: Peter Zijlstra <peterz@infradead.org>
@@ -11299,7 +11298,7 @@ F: tools/objtool/
11299 11298
11300OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER 11299OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
11301M: Frederic Barrat <fbarrat@linux.ibm.com> 11300M: Frederic Barrat <fbarrat@linux.ibm.com>
11302M: Andrew Donnellan <andrew.donnellan@au1.ibm.com> 11301M: Andrew Donnellan <ajd@linux.ibm.com>
11303L: linuxppc-dev@lists.ozlabs.org 11302L: linuxppc-dev@lists.ozlabs.org
11304S: Supported 11303S: Supported
11305F: arch/powerpc/platforms/powernv/ocxl.c 11304F: arch/powerpc/platforms/powernv/ocxl.c
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fa7219ffeadc..d7996cfaceca 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -167,6 +167,7 @@ config PPC
167 select GENERIC_TIME_VSYSCALL 167 select GENERIC_TIME_VSYSCALL
168 select HAVE_ARCH_AUDITSYSCALL 168 select HAVE_ARCH_AUDITSYSCALL
169 select HAVE_ARCH_JUMP_LABEL 169 select HAVE_ARCH_JUMP_LABEL
170 select HAVE_ARCH_KASAN if PPC32
170 select HAVE_ARCH_KGDB 171 select HAVE_ARCH_KGDB
171 select HAVE_ARCH_MMAP_RND_BITS 172 select HAVE_ARCH_MMAP_RND_BITS
172 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT 173 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
@@ -375,7 +376,6 @@ config ZONE_DMA
375config PGTABLE_LEVELS 376config PGTABLE_LEVELS
376 int 377 int
377 default 2 if !PPC64 378 default 2 if !PPC64
378 default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64
379 default 4 379 default 4
380 380
381source "arch/powerpc/sysdev/Kconfig" 381source "arch/powerpc/sysdev/Kconfig"
@@ -391,7 +391,7 @@ source "kernel/Kconfig.hz"
391 391
392config HUGETLB_PAGE_SIZE_VARIABLE 392config HUGETLB_PAGE_SIZE_VARIABLE
393 bool 393 bool
394 depends on HUGETLB_PAGE 394 depends on HUGETLB_PAGE && PPC_BOOK3S_64
395 default y 395 default y
396 396
397config MATH_EMULATION 397config MATH_EMULATION
@@ -832,9 +832,9 @@ config CMDLINE_BOOL
832 bool "Default bootloader kernel arguments" 832 bool "Default bootloader kernel arguments"
833 833
834config CMDLINE 834config CMDLINE
835 string "Initial kernel command string" 835 string "Initial kernel command string" if CMDLINE_BOOL
836 depends on CMDLINE_BOOL 836 default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL
837 default "console=ttyS0,9600 console=tty0 root=/dev/sda2" 837 default ""
838 help 838 help
839 On some platforms, there is currently no way for the boot loader to 839 On some platforms, there is currently no way for the boot loader to
840 pass arguments to the kernel. For these platforms, you can supply 840 pass arguments to the kernel. For these platforms, you can supply
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 4e00cb0a5464..c59920920ddc 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -117,6 +117,14 @@ config XMON_DISASSEMBLY
117 to say Y here, unless you're building for a memory-constrained 117 to say Y here, unless you're building for a memory-constrained
118 system. 118 system.
119 119
120config XMON_DEFAULT_RO_MODE
121 bool "Restrict xmon to read-only operations by default"
122 depends on XMON
123 default y
124 help
125 Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
126 'xmon=ro' override this default.
127
120config DEBUGGER 128config DEBUGGER
121 bool 129 bool
122 depends on KGDB || XMON 130 depends on KGDB || XMON
@@ -361,8 +369,32 @@ config PPC_PTDUMP
361 369
362 If you are unsure, say N. 370 If you are unsure, say N.
363 371
372config PPC_DEBUG_WX
373 bool "Warn on W+X mappings at boot"
374 depends on PPC_PTDUMP
375 help
376 Generate a warning if any W+X mappings are found at boot.
377
378 This is useful for discovering cases where the kernel is leaving
379 W+X mappings after applying NX, as such mappings are a security risk.
380
381 Note that even if the check fails, your kernel is possibly
382 still fine, as W+X mappings are not a security hole in
383 themselves, what they do is that they make the exploitation
384 of other unfixed kernel bugs easier.
385
386 There is no runtime or memory usage effect of this option
387 once the kernel has booted up - it's a one time check.
388
389 If in doubt, say "Y".
390
364config PPC_FAST_ENDIAN_SWITCH 391config PPC_FAST_ENDIAN_SWITCH
365 bool "Deprecated fast endian-switch syscall" 392 bool "Deprecated fast endian-switch syscall"
366 depends on DEBUG_KERNEL && PPC_BOOK3S_64 393 depends on DEBUG_KERNEL && PPC_BOOK3S_64
367 help 394 help
368 If you're unsure what this is, say N. 395 If you're unsure what this is, say N.
396
397config KASAN_SHADOW_OFFSET
398 hex
399 depends on KASAN
400 default 0xe0000000
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 7de49889bd5d..258ea6b2f2e7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -34,11 +34,10 @@ ifdef CONFIG_PPC_BOOK3S_32
34KBUILD_CFLAGS += -mcpu=powerpc 34KBUILD_CFLAGS += -mcpu=powerpc
35endif 35endif
36 36
37ifeq ($(CROSS_COMPILE),) 37# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
38KBUILD_DEFCONFIG := $(shell uname -m)_defconfig 38# ppc64_defconfig because we have nothing better to go on.
39else 39uname := $(shell uname -m)
40KBUILD_DEFCONFIG := ppc64_defconfig 40KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
41endif
42 41
43ifdef CONFIG_PPC64 42ifdef CONFIG_PPC64
44new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) 43new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
@@ -367,6 +366,10 @@ ppc32_allmodconfig:
367 $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \ 366 $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
368 -f $(srctree)/Makefile allmodconfig 367 -f $(srctree)/Makefile allmodconfig
369 368
369PHONY += ppc_defconfig
370ppc_defconfig:
371 $(call merge_into_defconfig,book3s_32.config,)
372
370PHONY += ppc64le_allmodconfig 373PHONY += ppc64le_allmodconfig
371ppc64le_allmodconfig: 374ppc64le_allmodconfig:
372 $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \ 375 $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \
@@ -406,7 +409,9 @@ vdso_install:
406ifdef CONFIG_PPC64 409ifdef CONFIG_PPC64
407 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ 410 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
408endif 411endif
412ifdef CONFIG_VDSO32
409 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ 413 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
414endif
410 415
411archclean: 416archclean:
412 $(Q)$(MAKE) $(clean)=$(boot) 417 $(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index 9d9f6f334d3c..3da3e2b1b51b 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c
@@ -223,7 +223,11 @@ main(int ac, char **av)
223 PUT_16(E_PHNUM, np + 2); 223 PUT_16(E_PHNUM, np + 2);
224 224
225 /* write back */ 225 /* write back */
226 lseek(fd, (long) 0, SEEK_SET); 226 i = lseek(fd, (long) 0, SEEK_SET);
227 if (i < 0) {
228 perror("lseek");
229 exit(1);
230 }
227 i = write(fd, buf, n); 231 i = write(fd, buf, n);
228 if (i < 0) { 232 if (i < 0) {
229 perror("write"); 233 perror("write");
diff --git a/arch/powerpc/boot/dts/fsl/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
index 999efd3bc167..05be919f3545 100644
--- a/arch/powerpc/boot/dts/fsl/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
@@ -40,6 +40,7 @@
40 interrupt-parent = <&mpic>; 40 interrupt-parent = <&mpic>;
41 41
42 aliases { 42 aliases {
43 crypto = &crypto;
43 phy_sgmii_10 = &phy_sgmii_10; 44 phy_sgmii_10 = &phy_sgmii_10;
44 phy_sgmii_11 = &phy_sgmii_11; 45 phy_sgmii_11 = &phy_sgmii_11;
45 phy_sgmii_1c = &phy_sgmii_1c; 46 phy_sgmii_1c = &phy_sgmii_1c;
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index ea79c519863d..62e12f61a3b2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -217,6 +217,7 @@ CONFIG_USB_MON=m
217CONFIG_USB_EHCI_HCD=y 217CONFIG_USB_EHCI_HCD=y
218# CONFIG_USB_EHCI_HCD_PPC_OF is not set 218# CONFIG_USB_EHCI_HCD_PPC_OF is not set
219CONFIG_USB_OHCI_HCD=y 219CONFIG_USB_OHCI_HCD=y
220CONFIG_USB_XHCI_HCD=y
220CONFIG_USB_STORAGE=m 221CONFIG_USB_STORAGE=m
221CONFIG_NEW_LEDS=y 222CONFIG_NEW_LEDS=y
222CONFIG_LEDS_CLASS=m 223CONFIG_LEDS_CLASS=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 1bcd468ab422..a887616e35a2 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -163,6 +163,8 @@ CONFIG_S2IO=m
163CONFIG_MLX4_EN=m 163CONFIG_MLX4_EN=m
164# CONFIG_MLX4_CORE_GEN2 is not set 164# CONFIG_MLX4_CORE_GEN2 is not set
165CONFIG_MLX5_CORE=m 165CONFIG_MLX5_CORE=m
166CONFIG_MLX5_CORE_EN=y
167# CONFIG_MLX5_EN_RXNFC is not set
166# CONFIG_NET_VENDOR_MICREL is not set 168# CONFIG_NET_VENDOR_MICREL is not set
167# CONFIG_NET_VENDOR_MICROSEMI is not set 169# CONFIG_NET_VENDOR_MICROSEMI is not set
168CONFIG_MYRI10GE=m 170CONFIG_MYRI10GE=m
diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c
index 0153a9c6f4af..98ea4f4d3dde 100644
--- a/arch/powerpc/crypto/crc-vpmsum_test.c
+++ b/arch/powerpc/crypto/crc-vpmsum_test.c
@@ -78,16 +78,12 @@ static int __init crc_test_init(void)
78 78
79 pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations); 79 pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations);
80 for (i=0; i<iterations; i++) { 80 for (i=0; i<iterations; i++) {
81 size_t len, offset; 81 size_t offset = prandom_u32_max(16);
82 size_t len = prandom_u32_max(MAX_CRC_LENGTH);
82 83
83 get_random_bytes(data, MAX_CRC_LENGTH);
84 get_random_bytes(&len, sizeof(len));
85 get_random_bytes(&offset, sizeof(offset));
86
87 len %= MAX_CRC_LENGTH;
88 offset &= 15;
89 if (len <= offset) 84 if (len <= offset)
90 continue; 85 continue;
86 prandom_bytes(data, len);
91 len -= offset; 87 len -= offset;
92 88
93 crypto_shash_update(crct10dif_shash, data+offset, len); 89 crypto_shash_update(crct10dif_shash, data+offset, len);
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
new file mode 100644
index 000000000000..677e9babef80
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -0,0 +1,145 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H
3#define _ASM_POWERPC_BOOK3S_32_KUP_H
4
5#include <asm/book3s/32/mmu-hash.h>
6
7#ifdef __ASSEMBLY__
8
9.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */
10101: mtsrin \gpr1, \gpr2
11 addi \gpr1, \gpr1, 0x111 /* next VSID */
12 rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
13 addis \gpr2, \gpr2, 0x1000 /* address of next segment */
14 bdnz 101b
15 isync
16.endm
17
18.macro kuep_lock gpr1, gpr2
19#ifdef CONFIG_PPC_KUEP
20 li \gpr1, NUM_USER_SEGMENTS
21 li \gpr2, 0
22 mtctr \gpr1
23 mfsrin \gpr1, \gpr2
24 oris \gpr1, \gpr1, SR_NX@h /* set Nx */
25 kuep_update_sr \gpr1, \gpr2
26#endif
27.endm
28
29.macro kuep_unlock gpr1, gpr2
30#ifdef CONFIG_PPC_KUEP
31 li \gpr1, NUM_USER_SEGMENTS
32 li \gpr2, 0
33 mtctr \gpr1
34 mfsrin \gpr1, \gpr2
35 rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */
36 kuep_update_sr \gpr1, \gpr2
37#endif
38.endm
39
40#ifdef CONFIG_PPC_KUAP
41
42.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */
43101: mtsrin \gpr1, \gpr2
44 addi \gpr1, \gpr1, 0x111 /* next VSID */
45 rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
46 addis \gpr2, \gpr2, 0x1000 /* address of next segment */
47 cmplw \gpr2, \gpr3
48 blt- 101b
49 isync
50.endm
51
52.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
53 lwz \gpr2, KUAP(\thread)
54 rlwinm. \gpr3, \gpr2, 28, 0xf0000000
55 stw \gpr2, STACK_REGS_KUAP(\sp)
56 beq+ 102f
57 li \gpr1, 0
58 stw \gpr1, KUAP(\thread)
59 mfsrin \gpr1, \gpr2
60 oris \gpr1, \gpr1, SR_KS@h /* set Ks */
61 kuap_update_sr \gpr1, \gpr2, \gpr3
62102:
63.endm
64
65.macro kuap_restore sp, current, gpr1, gpr2, gpr3
66 lwz \gpr2, STACK_REGS_KUAP(\sp)
67 rlwinm. \gpr3, \gpr2, 28, 0xf0000000
68 stw \gpr2, THREAD + KUAP(\current)
69 beq+ 102f
70 mfsrin \gpr1, \gpr2
71 rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */
72 kuap_update_sr \gpr1, \gpr2, \gpr3
73102:
74.endm
75
76.macro kuap_check current, gpr
77#ifdef CONFIG_PPC_KUAP_DEBUG
78 lwz \gpr2, KUAP(thread)
79999: twnei \gpr, 0
80 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
81#endif
82.endm
83
84#endif /* CONFIG_PPC_KUAP */
85
86#else /* !__ASSEMBLY__ */
87
88#ifdef CONFIG_PPC_KUAP
89
90#include <linux/sched.h>
91
92static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
93{
94 barrier(); /* make sure thread.kuap is updated before playing with SRs */
95 while (addr < end) {
96 mtsrin(sr, addr);
97 sr += 0x111; /* next VSID */
98 sr &= 0xf0ffffff; /* clear VSID overflow */
99 addr += 0x10000000; /* address of next segment */
100 }
101 isync(); /* Context sync required after mtsrin() */
102}
103
104static inline void allow_user_access(void __user *to, const void __user *from, u32 size)
105{
106 u32 addr, end;
107
108 if (__builtin_constant_p(to) && to == NULL)
109 return;
110
111 addr = (__force u32)to;
112
113 if (!addr || addr >= TASK_SIZE || !size)
114 return;
115
116 end = min(addr + size, TASK_SIZE);
117 current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
118 kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
119}
120
121static inline void prevent_user_access(void __user *to, const void __user *from, u32 size)
122{
123 u32 addr = (__force u32)to;
124 u32 end = min(addr + size, TASK_SIZE);
125
126 if (!addr || addr >= TASK_SIZE || !size)
127 return;
128
129 current->thread.kuap = 0;
130 kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
131}
132
133static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
134{
135 if (!is_write)
136 return false;
137
138 return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
139}
140
141#endif /* CONFIG_PPC_KUAP */
142
143#endif /* __ASSEMBLY__ */
144
145#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 5cb588395fdc..2e277ca0170f 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -10,8 +10,6 @@
10 * BATs 10 * BATs
11 */ 11 */
12 12
13#include <asm/page.h>
14
15/* Block size masks */ 13/* Block size masks */
16#define BL_128K 0x000 14#define BL_128K 0x000
17#define BL_256K 0x001 15#define BL_256K 0x001
@@ -49,8 +47,6 @@ struct ppc_bat {
49 u32 batu; 47 u32 batu;
50 u32 batl; 48 u32 batl;
51}; 49};
52
53typedef pte_t *pgtable_t;
54#endif /* !__ASSEMBLY__ */ 50#endif /* !__ASSEMBLY__ */
55 51
56/* 52/*
@@ -63,6 +59,11 @@ typedef pte_t *pgtable_t;
63#define PP_RWRW 2 /* Supervisor read/write, User read/write */ 59#define PP_RWRW 2 /* Supervisor read/write, User read/write */
64#define PP_RXRX 3 /* Supervisor read, User read */ 60#define PP_RXRX 3 /* Supervisor read, User read */
65 61
62/* Values for Segment Registers */
63#define SR_NX 0x10000000 /* No Execute */
64#define SR_KP 0x20000000 /* User key */
65#define SR_KS 0x40000000 /* Supervisor key */
66
66#ifndef __ASSEMBLY__ 67#ifndef __ASSEMBLY__
67 68
68/* 69/*
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 3633502e102c..998317702630 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -5,28 +5,6 @@
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/slab.h> 6#include <linux/slab.h>
7 7
8/*
9 * Functions that deal with pagetables that could be at any level of
10 * the table need to be passed an "index_size" so they know how to
11 * handle allocation. For PTE pages (which are linked to a struct
12 * page for now, and drawn from the main get_free_pages() pool), the
13 * allocation size will be (2^index_size * sizeof(pointer)) and
14 * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
15 *
16 * The maximum index size needs to be big enough to allow any
17 * pagetable sizes we need, but small enough to fit in the low bits of
18 * any page table pointer. In other words all pagetables, even tiny
19 * ones, must be aligned to allow at least enough low 0 bits to
20 * contain this value. This value is also used as a mask, so it must
21 * be one less than a power of two.
22 */
23#define MAX_PGTABLE_INDEX_SIZE 0xf
24
25extern void __bad_pte(pmd_t *pmd);
26
27extern struct kmem_cache *pgtable_cache[];
28#define PGT_CACHE(shift) pgtable_cache[shift]
29
30static inline pgd_t *pgd_alloc(struct mm_struct *mm) 8static inline pgd_t *pgd_alloc(struct mm_struct *mm)
31{ 9{
32 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 10 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
@@ -59,24 +37,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
59 *pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT); 37 *pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT);
60} 38}
61 39
62#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
63
64extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
65extern pgtable_t pte_alloc_one(struct mm_struct *mm);
66void pte_frag_destroy(void *pte_frag);
67pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
68void pte_fragment_free(unsigned long *table, int kernel);
69
70static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
71{
72 pte_fragment_free((unsigned long *)pte, 1);
73}
74
75static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
76{
77 pte_fragment_free((unsigned long *)ptepage, 0);
78}
79
80static inline void pgtable_free(void *table, unsigned index_size) 40static inline void pgtable_free(void *table, unsigned index_size)
81{ 41{
82 if (!index_size) { 42 if (!index_size) {
@@ -87,7 +47,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
87 } 47 }
88} 48}
89 49
90#define check_pgt_cache() do { } while (0)
91#define get_hugepd_cache_index(x) (x) 50#define get_hugepd_cache_index(x) (x)
92 51
93#ifdef CONFIG_SMP 52#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index aa8406b8f7ba..838de59f6754 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -134,15 +134,24 @@ static inline bool pte_user(pte_t pte)
134#define PGDIR_MASK (~(PGDIR_SIZE-1)) 134#define PGDIR_MASK (~(PGDIR_SIZE-1))
135 135
136#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) 136#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
137
138#ifndef __ASSEMBLY__
139
140int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
141
142#endif /* !__ASSEMBLY__ */
143
137/* 144/*
138 * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary 145 * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
139 * value (for now) on others, from where we can start layout kernel 146 * value (for now) on others, from where we can start layout kernel
140 * virtual space that goes below PKMAP and FIXMAP 147 * virtual space that goes below PKMAP and FIXMAP
141 */ 148 */
149#include <asm/fixmap.h>
150
142#ifdef CONFIG_HIGHMEM 151#ifdef CONFIG_HIGHMEM
143#define KVIRT_TOP PKMAP_BASE 152#define KVIRT_TOP PKMAP_BASE
144#else 153#else
145#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ 154#define KVIRT_TOP FIXADDR_START
146#endif 155#endif
147 156
148/* 157/*
@@ -373,8 +382,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
373#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) 382#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
374#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) 383#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
375 384
376int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
377
378/* Generic accessors to PTE bits */ 385/* Generic accessors to PTE bits */
379static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} 386static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
380static inline int pte_read(pte_t pte) { return 1; } 387static inline int pte_read(pte_t pte) { return 1; }
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index cf5ba5254299..8fd8599c9395 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -2,10 +2,10 @@
2#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H 2#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
3#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H 3#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
4 4
5#define H_PTE_INDEX_SIZE 9 5#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB
6#define H_PMD_INDEX_SIZE 7 6#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB
7#define H_PUD_INDEX_SIZE 9 7#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB
8#define H_PGD_INDEX_SIZE 9 8#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB
9 9
10/* 10/*
11 * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB 11 * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
@@ -13,6 +13,21 @@
13 */ 13 */
14#define MAX_EA_BITS_PER_CONTEXT 46 14#define MAX_EA_BITS_PER_CONTEXT 46
15 15
16#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2)
17
18/*
19 * Our page table limit us to 64TB. Hence for the kernel mapping,
20 * each MAP area is limited to 16 TB.
21 * The four map areas are: linear mapping, vmap, IO and vmemmap
22 */
23#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
24
25/*
26 * Define the address range of the kernel non-linear virtual area
27 * 16TB
28 */
29#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
30
16#ifndef __ASSEMBLY__ 31#ifndef __ASSEMBLY__
17#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) 32#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
18#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) 33#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f82ee8a3b561..d1d9177d9ebd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,16 +2,29 @@
2#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H 2#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
3#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H 3#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
4 4
5#define H_PTE_INDEX_SIZE 8 5#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB
6#define H_PMD_INDEX_SIZE 10 6#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB
7#define H_PUD_INDEX_SIZE 10 7#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
8#define H_PGD_INDEX_SIZE 8 8#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB
9
9 10
10/* 11/*
11 * Each context is 512TB size. SLB miss for first context/default context 12 * Each context is 512TB size. SLB miss for first context/default context
12 * is handled in the hotpath. 13 * is handled in the hotpath.
13 */ 14 */
14#define MAX_EA_BITS_PER_CONTEXT 49 15#define MAX_EA_BITS_PER_CONTEXT 49
16#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT
17
18/*
19 * We use one context for each MAP area.
20 */
21#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
22
23/*
24 * Define the address range of the kernel non-linear virtual area
25 * 2PB
26 */
27#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000)
15 28
16/* 29/*
17 * 64k aligned address free up few of the lower bits of RPN for us 30 * 64k aligned address free up few of the lower bits of RPN for us
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 54b7af6cd27f..1d1183048cfd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -29,6 +29,10 @@
29#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \ 29#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
30 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) 30 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
31#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) 31#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
32/*
33 * Top 2 bits are ignored in page table walk.
34 */
35#define EA_MASK (~(0xcUL << 60))
32 36
33/* 37/*
34 * We store the slot details in the second half of page table. 38 * We store the slot details in the second half of page table.
@@ -42,59 +46,63 @@
42#endif 46#endif
43 47
44/* 48/*
45 * Define the address range of the kernel non-linear virtual area. In contrast 49 * +------------------------------+
46 * to the linear mapping, this is managed using the kernel page tables and then 50 * | |
47 * inserted into the hash page table to actually take effect, similarly to user 51 * | |
48 * mappings. 52 * | |
53 * +------------------------------+ Kernel virtual map end (0xc00e000000000000)
54 * | |
55 * | |
56 * | 512TB/16TB of vmemmap |
57 * | |
58 * | |
59 * +------------------------------+ Kernel vmemmap start
60 * | |
61 * | 512TB/16TB of IO map |
62 * | |
63 * +------------------------------+ Kernel IO map start
64 * | |
65 * | 512TB/16TB of vmap |
66 * | |
67 * +------------------------------+ Kernel virt start (0xc008000000000000)
68 * | |
69 * | |
70 * | |
71 * +------------------------------+ Kernel linear (0xc.....)
49 */ 72 */
50#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
51 73
52/* 74#define H_VMALLOC_START H_KERN_VIRT_START
53 * Allow virtual mapping of one context size. 75#define H_VMALLOC_SIZE H_KERN_MAP_SIZE
54 * 512TB for 64K page size 76#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
55 * 64TB for 4K page size
56 */
57#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
58 77
59/* 78#define H_KERN_IO_START H_VMALLOC_END
60 * 8TB IO mapping size 79#define H_KERN_IO_SIZE H_KERN_MAP_SIZE
61 */ 80#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE)
62#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */
63 81
64/* 82#define H_VMEMMAP_START H_KERN_IO_END
65 * The vmalloc space starts at the beginning of the kernel non-linear virtual 83#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE
66 * region, and occupies 504T (64K) or 56T (4K) 84#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE)
67 */
68#define H_VMALLOC_START H_KERN_VIRT_START
69#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE)
70#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
71 85
72#define H_KERN_IO_START H_VMALLOC_END 86#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2)
73 87
74/* 88/*
75 * Region IDs 89 * Region IDs
76 */ 90 */
77#define REGION_SHIFT 60UL 91#define USER_REGION_ID 0
78#define REGION_MASK (0xfUL << REGION_SHIFT) 92#define LINEAR_MAP_REGION_ID 1
79#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) 93#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START)
80 94#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START)
81#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START)) 95#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START)
82#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
83#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
84#define USER_REGION_ID (0UL)
85 96
86/* 97/*
87 * Defines the address of the vmemap area, in its own region on 98 * Defines the address of the vmemap area, in its own region on
88 * hash table CPUs. 99 * hash table CPUs.
89 */ 100 */
90#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
91
92#ifdef CONFIG_PPC_MM_SLICES 101#ifdef CONFIG_PPC_MM_SLICES
93#define HAVE_ARCH_UNMAPPED_AREA 102#define HAVE_ARCH_UNMAPPED_AREA
94#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 103#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
95#endif /* CONFIG_PPC_MM_SLICES */ 104#endif /* CONFIG_PPC_MM_SLICES */
96 105
97
98/* PTEIDX nibble */ 106/* PTEIDX nibble */
99#define _PTEIDX_SECONDARY 0x8 107#define _PTEIDX_SECONDARY 0x8
100#define _PTEIDX_GROUP_IX 0x7 108#define _PTEIDX_GROUP_IX 0x7
@@ -103,6 +111,25 @@
103#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1) 111#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
104 112
105#ifndef __ASSEMBLY__ 113#ifndef __ASSEMBLY__
114static inline int get_region_id(unsigned long ea)
115{
116 int region_id;
117 int id = (ea >> 60UL);
118
119 if (id == 0)
120 return USER_REGION_ID;
121
122 if (ea < H_KERN_VIRT_START)
123 return LINEAR_MAP_REGION_ID;
124
125 VM_BUG_ON(id != 0xc);
126 BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
127
128 region_id = NON_LINEAR_REGION_ID(ea);
129 VM_BUG_ON(region_id > VMEMMAP_REGION_ID);
130 return region_id;
131}
132
106#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) 133#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
107#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) 134#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
108static inline int hash__pgd_bad(pgd_t pgd) 135static inline int hash__pgd_bad(pgd_t pgd)
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index ec2a55a553c7..56140d19c85f 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -62,4 +62,76 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
62extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 62extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
63 unsigned long addr, pte_t *ptep, 63 unsigned long addr, pte_t *ptep,
64 pte_t old_pte, pte_t new_pte); 64 pte_t old_pte, pte_t new_pte);
65/*
66 * This should work for other subarchs too. But right now we use the
67 * new format only for 64bit book3s
68 */
69static inline pte_t *hugepd_page(hugepd_t hpd)
70{
71 BUG_ON(!hugepd_ok(hpd));
72 /*
73 * We have only four bits to encode, MMU page size
74 */
75 BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
76 return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
77}
78
79static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
80{
81 return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
82}
83
84static inline unsigned int hugepd_shift(hugepd_t hpd)
85{
86 return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
87}
88static inline void flush_hugetlb_page(struct vm_area_struct *vma,
89 unsigned long vmaddr)
90{
91 if (radix_enabled())
92 return radix__flush_hugetlb_page(vma, vmaddr);
93}
94
95static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
96 unsigned int pdshift)
97{
98 unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
99
100 return hugepd_page(hpd) + idx;
101}
102
103static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
104{
105 *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2));
106}
107
108void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
109
110static inline int check_and_get_huge_psize(int shift)
111{
112 int mmu_psize;
113
114 if (shift > SLICE_HIGH_SHIFT)
115 return -EINVAL;
116
117 mmu_psize = shift_to_mmu_psize(shift);
118
119 /*
120 * We need to make sure that for different page sizes reported by
121 * firmware we only add hugetlb support for page sizes that can be
122 * supported by linux page table layout.
123 * For now we have
124 * Radix: 2M and 1G
125 * Hash: 16M and 16G
126 */
127 if (radix_enabled()) {
128 if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
129 return -EINVAL;
130 } else {
131 if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
132 return -EINVAL;
133 }
134 return mmu_psize;
135}
136
65#endif 137#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
new file mode 100644
index 000000000000..f254de956d6a
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -0,0 +1,108 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
3#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
4
5#include <linux/const.h>
6
7#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
8#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
9#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
10#define AMR_KUAP_SHIFT 62
11
12#ifdef __ASSEMBLY__
13
14.macro kuap_restore_amr gpr
15#ifdef CONFIG_PPC_KUAP
16 BEGIN_MMU_FTR_SECTION_NESTED(67)
17 ld \gpr, STACK_REGS_KUAP(r1)
18 mtspr SPRN_AMR, \gpr
19 END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
20#endif
21.endm
22
23.macro kuap_check_amr gpr1, gpr2
24#ifdef CONFIG_PPC_KUAP_DEBUG
25 BEGIN_MMU_FTR_SECTION_NESTED(67)
26 mfspr \gpr1, SPRN_AMR
27 li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
28 sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
29999: tdne \gpr1, \gpr2
30 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
31 END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
32#endif
33.endm
34
35.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
36#ifdef CONFIG_PPC_KUAP
37 BEGIN_MMU_FTR_SECTION_NESTED(67)
38 .ifnb \msr_pr_cr
39 bne \msr_pr_cr, 99f
40 .endif
41 mfspr \gpr1, SPRN_AMR
42 std \gpr1, STACK_REGS_KUAP(r1)
43 li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
44 sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
45 cmpd \use_cr, \gpr1, \gpr2
46 beq \use_cr, 99f
47 // We don't isync here because we very recently entered via rfid
48 mtspr SPRN_AMR, \gpr2
49 isync
5099:
51 END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
52#endif
53.endm
54
55#else /* !__ASSEMBLY__ */
56
57#ifdef CONFIG_PPC_KUAP
58
59#include <asm/reg.h>
60
61/*
62 * We support individually allowing read or write, but we don't support nesting
63 * because that would require an expensive read/modify write of the AMR.
64 */
65
66static inline void set_kuap(unsigned long value)
67{
68 if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
69 return;
70
71 /*
72 * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both
73 * before and after the move to AMR. See table 6 on page 1134.
74 */
75 isync();
76 mtspr(SPRN_AMR, value);
77 isync();
78}
79
80static inline void allow_user_access(void __user *to, const void __user *from,
81 unsigned long size)
82{
83 // This is written so we can resolve to a single case at build time
84 if (__builtin_constant_p(to) && to == NULL)
85 set_kuap(AMR_KUAP_BLOCK_WRITE);
86 else if (__builtin_constant_p(from) && from == NULL)
87 set_kuap(AMR_KUAP_BLOCK_READ);
88 else
89 set_kuap(0);
90}
91
92static inline void prevent_user_access(void __user *to, const void __user *from,
93 unsigned long size)
94{
95 set_kuap(AMR_KUAP_BLOCKED);
96}
97
98static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
99{
100 return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
101 (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
102 "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
103}
104#endif /* CONFIG_PPC_KUAP */
105
106#endif /* __ASSEMBLY__ */
107
108#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index a28a28079edb..1e4705516a54 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -588,7 +588,8 @@ extern void slb_set_size(u16 size);
588#endif 588#endif
589 589
590#define MAX_VMALLOC_CTX_CNT 1 590#define MAX_VMALLOC_CTX_CNT 1
591#define MAX_MEMMAP_CTX_CNT 1 591#define MAX_IO_CTX_CNT 1
592#define MAX_VMEMMAP_CTX_CNT 1
592 593
593/* 594/*
594 * 256MB segment 595 * 256MB segment
@@ -601,13 +602,10 @@ extern void slb_set_size(u16 size);
601 * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 602 * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
602 * because of the modulo operation in vsid scramble. 603 * because of the modulo operation in vsid scramble.
603 * 604 *
604 * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
605 * context easily. The +1 is to map the unused 0xe region mapping.
606 */ 605 */
607#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) 606#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
608#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ 607#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
609 MAX_MEMMAP_CTX_CNT + 2) 608 MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
610
611/* 609/*
612 * For platforms that support on 65bit VA we limit the context bits 610 * For platforms that support on 65bit VA we limit the context bits
613 */ 611 */
@@ -657,8 +655,8 @@ extern void slb_set_size(u16 size);
657 655
658/* 4 bits per slice and we have one slice per 1TB */ 656/* 4 bits per slice and we have one slice per 1TB */
659#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) 657#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
660#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41) 658#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE)
661 659#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41)
662#ifndef __ASSEMBLY__ 660#ifndef __ASSEMBLY__
663 661
664#ifdef CONFIG_PPC_SUBPAGE_PROT 662#ifdef CONFIG_PPC_SUBPAGE_PROT
@@ -687,12 +685,41 @@ struct subpage_prot_table {
687#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS) 685#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
688 686
689extern void subpage_prot_free(struct mm_struct *mm); 687extern void subpage_prot_free(struct mm_struct *mm);
690extern void subpage_prot_init_new_context(struct mm_struct *mm);
691#else 688#else
692static inline void subpage_prot_free(struct mm_struct *mm) {} 689static inline void subpage_prot_free(struct mm_struct *mm) {}
693static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
694#endif /* CONFIG_PPC_SUBPAGE_PROT */ 690#endif /* CONFIG_PPC_SUBPAGE_PROT */
695 691
692/*
693 * One bit per slice. We have lower slices which cover 256MB segments
694 * upto 4G range. That gets us 16 low slices. For the rest we track slices
695 * in 1TB size.
696 */
697struct slice_mask {
698 u64 low_slices;
699 DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
700};
701
702struct hash_mm_context {
703 u16 user_psize; /* page size index */
704
705 /* SLB page size encodings*/
706 unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
707 unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
708 unsigned long slb_addr_limit;
709#ifdef CONFIG_PPC_64K_PAGES
710 struct slice_mask mask_64k;
711#endif
712 struct slice_mask mask_4k;
713#ifdef CONFIG_HUGETLB_PAGE
714 struct slice_mask mask_16m;
715 struct slice_mask mask_16g;
716#endif
717
718#ifdef CONFIG_PPC_SUBPAGE_PROT
719 struct subpage_prot_table *spt;
720#endif /* CONFIG_PPC_SUBPAGE_PROT */
721};
722
696#if 0 723#if 0
697/* 724/*
698 * The code below is equivalent to this function for arguments 725 * The code below is equivalent to this function for arguments
@@ -747,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
747 /* 774 /*
748 * Bad address. We return VSID 0 for that 775 * Bad address. We return VSID 0 for that
749 */ 776 */
750 if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) 777 if ((ea & EA_MASK) >= H_PGTABLE_RANGE)
751 return 0; 778 return 0;
752 779
753 if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) 780 if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
@@ -774,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
774 * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff] 801 * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
775 * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff] 802 * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
776 * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff] 803 * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
777
778 * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
779 * 0x00006 - Not used - Can map 0xe000000000000000 range.
780 * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
781 * 804 *
782 * So we can compute the context from the region (top nibble) by 805 * vmap, IO, vmemap
783 * subtracting 11, or 0xc - 1. 806 *
807 * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff]
808 * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff]
809 * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff]
810 *
784 */ 811 */
785static inline unsigned long get_kernel_context(unsigned long ea) 812static inline unsigned long get_kernel_context(unsigned long ea)
786{ 813{
787 unsigned long region_id = REGION_ID(ea); 814 unsigned long region_id = get_region_id(ea);
788 unsigned long ctx; 815 unsigned long ctx;
789 /* 816 /*
790 * For linear mapping we do support multiple context 817 * Depending on Kernel config, kernel region can have one context
818 * or more.
791 */ 819 */
792 if (region_id == KERNEL_REGION_ID) { 820 if (region_id == LINEAR_MAP_REGION_ID) {
793 /* 821 /*
794 * We already verified ea to be not beyond the addr limit. 822 * We already verified ea to be not beyond the addr limit.
795 */ 823 */
796 ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT); 824 ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
797 } else 825 } else
798 ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT; 826 ctx = region_id + MAX_KERNEL_CTX_CNT - 1;
799 return ctx; 827 return ctx;
800} 828}
801 829
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 1ceee000c18d..74d24201fc4f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -25,15 +25,22 @@ struct mmu_psize_def {
25 }; 25 };
26}; 26};
27extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 27extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
28#endif /* __ASSEMBLY__ */
28 29
29/* 30/*
30 * For BOOK3s 64 with 4k and 64K linux page size 31 * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
31 * we want to use pointers, because the page table 32 * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
32 * actually store pfn 33 * page_to_nid does a page->section->node lookup
34 * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
35 * memory requirements with large number of sections.
36 * 51 bits is the max physical real address on POWER9
33 */ 37 */
34typedef pte_t *pgtable_t; 38#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
35 39 defined(CONFIG_PPC_64K_PAGES)
36#endif /* __ASSEMBLY__ */ 40#define MAX_PHYSMEM_BITS 51
41#else
42#define MAX_PHYSMEM_BITS 46
43#endif
37 44
38/* 64-bit classic hash table MMU */ 45/* 64-bit classic hash table MMU */
39#include <asm/book3s/64/mmu-hash.h> 46#include <asm/book3s/64/mmu-hash.h>
@@ -89,16 +96,6 @@ struct spinlock;
89/* Maximum possible number of NPUs in a system. */ 96/* Maximum possible number of NPUs in a system. */
90#define NV_MAX_NPUS 8 97#define NV_MAX_NPUS 8
91 98
92/*
93 * One bit per slice. We have lower slices which cover 256MB segments
94 * upto 4G range. That gets us 16 low slices. For the rest we track slices
95 * in 1TB size.
96 */
97struct slice_mask {
98 u64 low_slices;
99 DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
100};
101
102typedef struct { 99typedef struct {
103 union { 100 union {
104 /* 101 /*
@@ -112,7 +109,6 @@ typedef struct {
112 mm_context_id_t id; 109 mm_context_id_t id;
113 mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; 110 mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
114 }; 111 };
115 u16 user_psize; /* page size index */
116 112
117 /* Number of bits in the mm_cpumask */ 113 /* Number of bits in the mm_cpumask */
118 atomic_t active_cpus; 114 atomic_t active_cpus;
@@ -122,27 +118,9 @@ typedef struct {
122 118
123 /* NPU NMMU context */ 119 /* NPU NMMU context */
124 struct npu_context *npu_context; 120 struct npu_context *npu_context;
121 struct hash_mm_context *hash_context;
125 122
126#ifdef CONFIG_PPC_MM_SLICES
127 /* SLB page size encodings*/
128 unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
129 unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
130 unsigned long slb_addr_limit;
131# ifdef CONFIG_PPC_64K_PAGES
132 struct slice_mask mask_64k;
133# endif
134 struct slice_mask mask_4k;
135# ifdef CONFIG_HUGETLB_PAGE
136 struct slice_mask mask_16m;
137 struct slice_mask mask_16g;
138# endif
139#else
140 u16 sllp; /* SLB page size encoding */
141#endif
142 unsigned long vdso_base; 123 unsigned long vdso_base;
143#ifdef CONFIG_PPC_SUBPAGE_PROT
144 struct subpage_prot_table spt;
145#endif /* CONFIG_PPC_SUBPAGE_PROT */
146 /* 124 /*
147 * pagetable fragment support 125 * pagetable fragment support
148 */ 126 */
@@ -163,6 +141,60 @@ typedef struct {
163#endif 141#endif
164} mm_context_t; 142} mm_context_t;
165 143
144static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
145{
146 return ctx->hash_context->user_psize;
147}
148
149static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
150{
151 ctx->hash_context->user_psize = user_psize;
152}
153
154static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
155{
156 return ctx->hash_context->low_slices_psize;
157}
158
159static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
160{
161 return ctx->hash_context->high_slices_psize;
162}
163
164static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
165{
166 return ctx->hash_context->slb_addr_limit;
167}
168
169static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
170{
171 ctx->hash_context->slb_addr_limit = limit;
172}
173
174static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
175{
176#ifdef CONFIG_PPC_64K_PAGES
177 if (psize == MMU_PAGE_64K)
178 return &ctx->hash_context->mask_64k;
179#endif
180#ifdef CONFIG_HUGETLB_PAGE
181 if (psize == MMU_PAGE_16M)
182 return &ctx->hash_context->mask_16m;
183 if (psize == MMU_PAGE_16G)
184 return &ctx->hash_context->mask_16g;
185#endif
186 BUG_ON(psize != MMU_PAGE_4K);
187
188 return &ctx->hash_context->mask_4k;
189}
190
191#ifdef CONFIG_PPC_SUBPAGE_PROT
192static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
193{
194 return ctx->hash_context->spt;
195}
196#endif
197
166/* 198/*
167 * The current system page and segment sizes 199 * The current system page and segment sizes
168 */ 200 */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 138bc2ecc0c4..d45e4449619f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,29 +19,7 @@ struct vmemmap_backing {
19}; 19};
20extern struct vmemmap_backing *vmemmap_list; 20extern struct vmemmap_backing *vmemmap_list;
21 21
22/*
23 * Functions that deal with pagetables that could be at any level of
24 * the table need to be passed an "index_size" so they know how to
25 * handle allocation. For PTE pages (which are linked to a struct
26 * page for now, and drawn from the main get_free_pages() pool), the
27 * allocation size will be (2^index_size * sizeof(pointer)) and
28 * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
29 *
30 * The maximum index size needs to be big enough to allow any
31 * pagetable sizes we need, but small enough to fit in the low bits of
32 * any page table pointer. In other words all pagetables, even tiny
33 * ones, must be aligned to allow at least enough low 0 bits to
34 * contain this value. This value is also used as a mask, so it must
35 * be one less than a power of two.
36 */
37#define MAX_PGTABLE_INDEX_SIZE 0xf
38
39extern struct kmem_cache *pgtable_cache[];
40#define PGT_CACHE(shift) pgtable_cache[shift]
41
42extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
43extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); 22extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
44extern void pte_fragment_free(unsigned long *, int);
45extern void pmd_fragment_free(unsigned long *); 23extern void pmd_fragment_free(unsigned long *);
46extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); 24extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
47#ifdef CONFIG_SMP 25#ifdef CONFIG_SMP
@@ -81,6 +59,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
81 59
82 pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 60 pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
83 pgtable_gfp_flags(mm, GFP_KERNEL)); 61 pgtable_gfp_flags(mm, GFP_KERNEL));
62 if (unlikely(!pgd))
63 return pgd;
64
84 /* 65 /*
85 * Don't scan the PGD for pointers, it contains references to PUDs but 66 * Don't scan the PGD for pointers, it contains references to PUDs but
86 * those references are not full pointers and so can't be recognised by 67 * those references are not full pointers and so can't be recognised by
@@ -185,31 +166,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
185 *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS); 166 *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
186} 167}
187 168
188static inline pgtable_t pmd_pgtable(pmd_t pmd)
189{
190 return (pgtable_t)pmd_page_vaddr(pmd);
191}
192
193static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
194{
195 return (pte_t *)pte_fragment_alloc(mm, 1);
196}
197
198static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
199{
200 return (pgtable_t)pte_fragment_alloc(mm, 0);
201}
202
203static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
204{
205 pte_fragment_free((unsigned long *)pte, 1);
206}
207
208static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
209{
210 pte_fragment_free((unsigned long *)ptepage, 0);
211}
212
213static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, 169static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
214 unsigned long address) 170 unsigned long address)
215{ 171{
@@ -221,8 +177,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
221 pgtable_free_tlb(tlb, table, PTE_INDEX); 177 pgtable_free_tlb(tlb, table, PTE_INDEX);
222} 178}
223 179
224#define check_pgt_cache() do { } while (0)
225
226extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; 180extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
227static inline void update_page_count(int psize, long count) 181static inline void update_page_count(int psize, long count)
228{ 182{
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 581f91be9dd4..7dede2e34b70 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -277,9 +277,11 @@ extern unsigned long __vmalloc_end;
277extern unsigned long __kernel_virt_start; 277extern unsigned long __kernel_virt_start;
278extern unsigned long __kernel_virt_size; 278extern unsigned long __kernel_virt_size;
279extern unsigned long __kernel_io_start; 279extern unsigned long __kernel_io_start;
280extern unsigned long __kernel_io_end;
280#define KERN_VIRT_START __kernel_virt_start 281#define KERN_VIRT_START __kernel_virt_start
281#define KERN_VIRT_SIZE __kernel_virt_size
282#define KERN_IO_START __kernel_io_start 282#define KERN_IO_START __kernel_io_start
283#define KERN_IO_END __kernel_io_end
284
283extern struct page *vmemmap; 285extern struct page *vmemmap;
284extern unsigned long ioremap_bot; 286extern unsigned long ioremap_bot;
285extern unsigned long pci_io_base; 287extern unsigned long pci_io_base;
@@ -296,8 +298,7 @@ extern unsigned long pci_io_base;
296 298
297#include <asm/barrier.h> 299#include <asm/barrier.h>
298/* 300/*
299 * The second half of the kernel virtual space is used for IO mappings, 301 * IO space itself carved into the PIO region (ISA and PHB IO space) and
300 * it's itself carved into the PIO region (ISA and PHB IO space) and
301 * the ioremap space 302 * the ioremap space
302 * 303 *
303 * ISA_IO_BASE = KERN_IO_START, 64K reserved area 304 * ISA_IO_BASE = KERN_IO_START, 64K reserved area
@@ -310,7 +311,7 @@ extern unsigned long pci_io_base;
310#define PHB_IO_BASE (ISA_IO_END) 311#define PHB_IO_BASE (ISA_IO_END)
311#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) 312#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
312#define IOREMAP_BASE (PHB_IO_END) 313#define IOREMAP_BASE (PHB_IO_END)
313#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) 314#define IOREMAP_END (KERN_IO_END)
314 315
315/* Advertise special mapping type for AGP */ 316/* Advertise special mapping type for AGP */
316#define HAVE_PAGE_AGP 317#define HAVE_PAGE_AGP
@@ -992,7 +993,8 @@ extern struct page *pgd_page(pgd_t pgd);
992 (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) 993 (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
993 994
994#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) 995#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
995#define pte_unmap(pte) do { } while(0) 996
997static inline void pte_unmap(pte_t *pte) { }
996 998
997/* to find an entry in a kernel page-table-directory */ 999/* to find an entry in a kernel page-table-directory */
998/* This now only contains the vmalloc pages */ 1000/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index 863c3e8286fb..d5f5ab73dc7f 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -5,10 +5,11 @@
5/* 5/*
6 * For 4K page size supported index is 13/9/9/9 6 * For 4K page size supported index is 13/9/9/9
7 */ 7 */
8#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */ 8#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB
9#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ 9#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
10#define RADIX_PUD_INDEX_SIZE 9 10#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
11#define RADIX_PGD_INDEX_SIZE 13 11#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
12
12/* 13/*
13 * One fragment per per page 14 * One fragment per per page
14 */ 15 */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index ccb78ca9d0c5..54e33828b0fb 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -5,10 +5,10 @@
5/* 5/*
6 * For 64K page size supported index is 13/9/9/5 6 * For 64K page size supported index is 13/9/9/5
7 */ 7 */
8#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */ 8#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB
9#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ 9#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
10#define RADIX_PUD_INDEX_SIZE 9 10#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
11#define RADIX_PGD_INDEX_SIZE 13 11#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
12 12
13/* 13/*
14 * We use a 256 byte PTE page fragment in radix 14 * We use a 256 byte PTE page fragment in radix
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 5ab134eeed20..574eca33f893 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -72,19 +72,17 @@
72 * | | 72 * | |
73 * | | 73 * | |
74 * | | 74 * | |
75 * +------------------------------+ Kernel IO map end (0xc010000000000000) 75 * +------------------------------+ Kernel vmemmap end (0xc010000000000000)
76 * | | 76 * | |
77 * | 512TB |
77 * | | 78 * | |
78 * | 1/2 of virtual map | 79 * +------------------------------+ Kernel IO map end/vmemap start
79 * | | 80 * | |
81 * | 512TB |
80 * | | 82 * | |
81 * +------------------------------+ Kernel IO map start 83 * +------------------------------+ Kernel vmap end/ IO map start
82 * | | 84 * | |
83 * | 1/4 of virtual map | 85 * | 512TB |
84 * | |
85 * +------------------------------+ Kernel vmemap start
86 * | |
87 * | 1/4 of virtual map |
88 * | | 86 * | |
89 * +------------------------------+ Kernel virt start (0xc008000000000000) 87 * +------------------------------+ Kernel virt start (0xc008000000000000)
90 * | | 88 * | |
@@ -93,24 +91,24 @@
93 * +------------------------------+ Kernel linear (0xc.....) 91 * +------------------------------+ Kernel linear (0xc.....)
94 */ 92 */
95 93
96#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) 94#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
97#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000)
98
99/* 95/*
100 * The vmalloc space starts at the beginning of that region, and 96 * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
101 * occupies a quarter of it on radix config. 97 * the same value as hash.
102 * (we keep a quarter for the virtual memmap)
103 */ 98 */
99#define RADIX_KERN_MAP_SIZE (1UL << 49)
100
104#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START 101#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
105#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2) 102#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE
106#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE) 103#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
107/*
108 * Defines the address of the vmemap area, in its own region on
109 * hash table CPUs.
110 */
111#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
112 104
113#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) 105#define RADIX_KERN_IO_START RADIX_VMALLOC_END
106#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE
107#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
108
109#define RADIX_VMEMMAP_START RADIX_KERN_IO_END
110#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE
111#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
114 112
115#ifndef __ASSEMBLY__ 113#ifndef __ASSEMBLY__
116#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) 114#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
index db0dedab65ee..f0d3194ba41b 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,8 +2,6 @@
2#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H 2#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
3#define _ASM_POWERPC_BOOK3S_64_SLICE_H 3#define _ASM_POWERPC_BOOK3S_64_SLICE_H
4 4
5#ifdef CONFIG_PPC_MM_SLICES
6
7#define SLICE_LOW_SHIFT 28 5#define SLICE_LOW_SHIFT 28
8#define SLICE_LOW_TOP (0x100000000ul) 6#define SLICE_LOW_TOP (0x100000000ul)
9#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) 7#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,15 +11,6 @@
13#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) 11#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
14#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) 12#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
15 13
16#else /* CONFIG_PPC_MM_SLICES */ 14#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
17
18#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
19#define slice_set_user_psize(mm, psize) \
20do { \
21 (mm)->context.user_psize = (psize); \
22 (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
23} while (0)
24
25#endif /* CONFIG_PPC_MM_SLICES */
26 15
27#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ 16#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 43e5f31fe64d..9844b3ded187 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -27,10 +27,11 @@
27 * the THREAD_WINKLE_BITS are set, which indicate which threads have not 27 * the THREAD_WINKLE_BITS are set, which indicate which threads have not
28 * yet woken from the winkle state. 28 * yet woken from the winkle state.
29 */ 29 */
30#define PNV_CORE_IDLE_LOCK_BIT 0x10000000 30#define NR_PNV_CORE_IDLE_LOCK_BIT 28
31#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
31 32
33#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16
32#define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000 34#define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000
33#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT 0x00080000
34#define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000 35#define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000
35#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8 36#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8
36#define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00 37#define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00
@@ -68,16 +69,6 @@
68#define ERR_DEEP_STATE_ESL_MISMATCH -2 69#define ERR_DEEP_STATE_ESL_MISMATCH -2
69 70
70#ifndef __ASSEMBLY__ 71#ifndef __ASSEMBLY__
71/* Additional SPRs that need to be saved/restored during stop */
72struct stop_sprs {
73 u64 pid;
74 u64 ldbar;
75 u64 fscr;
76 u64 hfscr;
77 u64 mmcr1;
78 u64 mmcr2;
79 u64 mmcra;
80};
81 72
82#define PNV_IDLE_NAME_LEN 16 73#define PNV_IDLE_NAME_LEN 16
83struct pnv_idle_states_t { 74struct pnv_idle_states_t {
@@ -92,10 +83,6 @@ struct pnv_idle_states_t {
92 83
93extern struct pnv_idle_states_t *pnv_idle_states; 84extern struct pnv_idle_states_t *pnv_idle_states;
94extern int nr_pnv_idle_states; 85extern int nr_pnv_idle_states;
95extern u32 pnv_fastsleep_workaround_at_entry[];
96extern u32 pnv_fastsleep_workaround_at_exit[];
97
98extern u64 pnv_first_deep_stop_state;
99 86
100unsigned long pnv_cpu_offline(unsigned int cpu); 87unsigned long pnv_cpu_offline(unsigned int cpu);
101int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); 88int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 7c1d8e74b25d..7f3279b014db 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -17,6 +17,9 @@ struct drmem_lmb {
17 u32 drc_index; 17 u32 drc_index;
18 u32 aa_index; 18 u32 aa_index;
19 u32 flags; 19 u32 flags;
20#ifdef CONFIG_MEMORY_HOTPLUG
21 int nid;
22#endif
20}; 23};
21 24
22struct drmem_lmb_info { 25struct drmem_lmb_info {
@@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
104 lmb->aa_index = 0xffffffff; 107 lmb->aa_index = 0xffffffff;
105} 108}
106 109
110#ifdef CONFIG_MEMORY_HOTPLUG
111static inline void lmb_set_nid(struct drmem_lmb *lmb)
112{
113 lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
114}
115static inline void lmb_clear_nid(struct drmem_lmb *lmb)
116{
117 lmb->nid = -1;
118}
119#else
120static inline void lmb_set_nid(struct drmem_lmb *lmb)
121{
122}
123static inline void lmb_clear_nid(struct drmem_lmb *lmb)
124{
125}
126#endif
127
107#endif /* _ASM_POWERPC_LMB_H */ 128#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 937bb630093f..bef4e05a6823 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
497 RESTORE_CTR(r1, area); \ 497 RESTORE_CTR(r1, area); \
498 b bad_stack; \ 498 b bad_stack; \
4993: EXCEPTION_PROLOG_COMMON_1(); \ 4993: EXCEPTION_PROLOG_COMMON_1(); \
500 kuap_save_amr_and_lock r9, r10, cr1, cr0; \
500 beq 4f; /* if from kernel mode */ \ 501 beq 4f; /* if from kernel mode */ \
501 ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ 502 ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
502 SAVE_PPR(area, r9); \ 503 SAVE_PPR(area, r9); \
@@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
691 */ 692 */
692#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ 693#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \
693 EXCEPTION_PROLOG_COMMON_1(); \ 694 EXCEPTION_PROLOG_COMMON_1(); \
695 kuap_save_amr_and_lock r9, r10, cr1; \
694 EXCEPTION_PROLOG_COMMON_2(area); \ 696 EXCEPTION_PROLOG_COMMON_2(area); \
695 EXCEPTION_PROLOG_COMMON_3(trap); \ 697 EXCEPTION_PROLOG_COMMON_3(trap); \
696 /* Volatile regs are potentially clobbered here */ \ 698 /* Volatile regs are potentially clobbered here */ \
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 188776befaf9..e2099c0a15c3 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -219,5 +219,6 @@ extern void fadump_cleanup(void);
219static inline int is_fadump_active(void) { return 0; } 219static inline int is_fadump_active(void) { return 0; }
220static inline int should_fadump_crash(void) { return 0; } 220static inline int should_fadump_crash(void) { return 0; }
221static inline void crash_fadump(struct pt_regs *regs, const char *str) { } 221static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
222static inline void fadump_cleanup(void) { }
222#endif 223#endif
223#endif 224#endif
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 40a6c9261a6b..f6fc31f8baff 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -100,6 +100,9 @@ label##5: \
100#define END_MMU_FTR_SECTION(msk, val) \ 100#define END_MMU_FTR_SECTION(msk, val) \
101 END_MMU_FTR_SECTION_NESTED(msk, val, 97) 101 END_MMU_FTR_SECTION_NESTED(msk, val, 97)
102 102
103#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \
104 END_MMU_FTR_SECTION_NESTED((msk), (msk), label)
105
103#define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) 106#define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk))
104#define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) 107#define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0)
105 108
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index b9fbed84ddca..0cfc365d814b 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -22,7 +22,12 @@
22#include <asm/kmap_types.h> 22#include <asm/kmap_types.h>
23#endif 23#endif
24 24
25#ifdef CONFIG_KASAN
26#include <asm/kasan.h>
27#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE)
28#else
25#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) 29#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE))
30#endif
26 31
27/* 32/*
28 * Here we define all the compile-time 'special' virtual 33 * Here we define all the compile-time 'special' virtual
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 88b38b37c21b..3a6aa57b9d90 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
35{ 35{
36 int oldval = 0, ret; 36 int oldval = 0, ret;
37 37
38 allow_write_to_user(uaddr, sizeof(*uaddr));
38 pagefault_disable(); 39 pagefault_disable();
39 40
40 switch (op) { 41 switch (op) {
@@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
62 if (!ret) 63 if (!ret)
63 *oval = oldval; 64 *oval = oldval;
64 65
66 prevent_write_to_user(uaddr, sizeof(*uaddr));
65 return ret; 67 return ret;
66} 68}
67 69
@@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
75 if (!access_ok(uaddr, sizeof(u32))) 77 if (!access_ok(uaddr, sizeof(u32)))
76 return -EFAULT; 78 return -EFAULT;
77 79
80 allow_write_to_user(uaddr, sizeof(*uaddr));
78 __asm__ __volatile__ ( 81 __asm__ __volatile__ (
79 PPC_ATOMIC_ENTRY_BARRIER 82 PPC_ATOMIC_ENTRY_BARRIER
80"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ 83"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
@@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
95 : "cc", "memory"); 98 : "cc", "memory");
96 99
97 *uval = prev; 100 *uval = prev;
101 prevent_write_to_user(uaddr, sizeof(*uaddr));
98 return ret; 102 return ret;
99} 103}
100 104
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 8d40565ad0c3..20a101046cff 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -6,82 +6,16 @@
6#include <asm/page.h> 6#include <asm/page.h>
7 7
8#ifdef CONFIG_PPC_BOOK3S_64 8#ifdef CONFIG_PPC_BOOK3S_64
9
10#include <asm/book3s/64/hugetlb.h> 9#include <asm/book3s/64/hugetlb.h>
11/* 10#elif defined(CONFIG_PPC_FSL_BOOK3E)
12 * This should work for other subarchs too. But right now we use the 11#include <asm/nohash/hugetlb-book3e.h>
13 * new format only for 64bit book3s 12#elif defined(CONFIG_PPC_8xx)
14 */ 13#include <asm/nohash/32/hugetlb-8xx.h>
15static inline pte_t *hugepd_page(hugepd_t hpd)
16{
17 BUG_ON(!hugepd_ok(hpd));
18 /*
19 * We have only four bits to encode, MMU page size
20 */
21 BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
22 return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
23}
24
25static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
26{
27 return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
28}
29
30static inline unsigned int hugepd_shift(hugepd_t hpd)
31{
32 return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
33}
34static inline void flush_hugetlb_page(struct vm_area_struct *vma,
35 unsigned long vmaddr)
36{
37 if (radix_enabled())
38 return radix__flush_hugetlb_page(vma, vmaddr);
39}
40
41#else
42
43static inline pte_t *hugepd_page(hugepd_t hpd)
44{
45 BUG_ON(!hugepd_ok(hpd));
46#ifdef CONFIG_PPC_8xx
47 return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
48#else
49 return (pte_t *)((hpd_val(hpd) &
50 ~HUGEPD_SHIFT_MASK) | PD_HUGE);
51#endif
52}
53
54static inline unsigned int hugepd_shift(hugepd_t hpd)
55{
56#ifdef CONFIG_PPC_8xx
57 return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17;
58#else
59 return hpd_val(hpd) & HUGEPD_SHIFT_MASK;
60#endif
61}
62
63#endif /* CONFIG_PPC_BOOK3S_64 */ 14#endif /* CONFIG_PPC_BOOK3S_64 */
64 15
16extern bool hugetlb_disabled;
65 17
66static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, 18void hugetlbpage_init_default(void);
67 unsigned pdshift)
68{
69 /*
70 * On FSL BookE, we have multiple higher-level table entries that
71 * point to the same hugepte. Just use the first one since they're all
72 * identical. So for that case, idx=0.
73 */
74 unsigned long idx = 0;
75
76 pte_t *dir = hugepd_page(hpd);
77#ifdef CONFIG_PPC_8xx
78 idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT;
79#elif !defined(CONFIG_PPC_FSL_BOOK3E)
80 idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
81#endif
82
83 return dir + idx;
84}
85 19
86void flush_dcache_icache_hugepage(struct page *page); 20void flush_dcache_icache_hugepage(struct page *page);
87 21
@@ -99,15 +33,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
99 33
100void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, 34void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
101 pte_t pte); 35 pte_t pte);
102#ifdef CONFIG_PPC_8xx
103static inline void flush_hugetlb_page(struct vm_area_struct *vma,
104 unsigned long vmaddr)
105{
106 flush_tlb_page(vma, vmaddr);
107}
108#else
109void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
110#endif
111 36
112#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE 37#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
113void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, 38void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ece4dc89c90b..0fe8c1e46bbc 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void)
90extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); 90extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
91int hw_breakpoint_handler(struct die_args *args); 91int hw_breakpoint_handler(struct die_args *args);
92 92
93extern int set_dawr(struct arch_hw_breakpoint *brk);
94extern bool dawr_force_enable;
95static inline bool dawr_enabled(void)
96{
97 return dawr_force_enable;
98}
99
93#else /* CONFIG_HAVE_HW_BREAKPOINT */ 100#else /* CONFIG_HAVE_HW_BREAKPOINT */
94static inline void hw_breakpoint_disable(void) { } 101static inline void hw_breakpoint_disable(void) { }
95static inline void thread_change_pc(struct task_struct *tsk, 102static inline void thread_change_pc(struct task_struct *tsk,
96 struct pt_regs *regs) { } 103 struct pt_regs *regs) { }
104static inline bool dawr_enabled(void) { return false; }
97#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 105#endif /* CONFIG_HAVE_HW_BREAKPOINT */
98#endif /* __KERNEL__ */ 106#endif /* __KERNEL__ */
99#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ 107#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 69f516ecb2fd..7c2ef0e42661 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -33,6 +33,7 @@
33 */ 33 */
34#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL 34#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL
35#define THREAD_IMC_ENABLE 0x8000000000000000ULL 35#define THREAD_IMC_ENABLE 0x8000000000000000ULL
36#define TRACE_IMC_ENABLE 0x4000000000000000ULL
36 37
37/* 38/*
38 * For debugfs interface for imc-mode and imc-command 39 * For debugfs interface for imc-mode and imc-command
@@ -59,6 +60,34 @@ struct imc_events {
59 char *scale; 60 char *scale;
60}; 61};
61 62
63/*
64 * Trace IMC hardware updates a 64bytes record on
65 * Core Performance Monitoring Counter (CPMC)
66 * overflow. Here is the layout for the trace imc record
67 *
68 * DW 0 : Timebase
69 * DW 1 : Program Counter
70 * DW 2 : PIDR information
71 * DW 3 : CPMC1
72 * DW 4 : CPMC2
73 * DW 5 : CPMC3
74 * Dw 6 : CPMC4
75 * DW 7 : Timebase
76 * .....
77 *
78 * The following is the data structure to hold trace imc data.
79 */
80struct trace_imc_data {
81 u64 tb1;
82 u64 ip;
83 u64 val;
84 u64 cpmc1;
85 u64 cpmc2;
86 u64 cpmc3;
87 u64 cpmc4;
88 u64 tb2;
89};
90
62/* Event attribute array index */ 91/* Event attribute array index */
63#define IMC_FORMAT_ATTR 0 92#define IMC_FORMAT_ATTR 0
64#define IMC_EVENT_ATTR 1 93#define IMC_EVENT_ATTR 1
@@ -69,6 +98,13 @@ struct imc_events {
69#define IMC_EVENT_OFFSET_MASK 0xffffffffULL 98#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
70 99
71/* 100/*
101 * Macro to mask bits 0:21 of first double word(which is the timebase) to
102 * compare with 8th double word (timebase) of trace imc record data.
103 */
104#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL
105
106
107/*
72 * Device tree parser code detects IMC pmu support and 108 * Device tree parser code detects IMC pmu support and
73 * registers new IMC pmus. This structure will hold the 109 * registers new IMC pmus. This structure will hold the
74 * pmu functions, events, counter memory information 110 * pmu functions, events, counter memory information
@@ -113,6 +149,7 @@ struct imc_pmu_ref {
113 149
114enum { 150enum {
115 IMC_TYPE_THREAD = 0x1, 151 IMC_TYPE_THREAD = 0x1,
152 IMC_TYPE_TRACE = 0x2,
116 IMC_TYPE_CORE = 0x4, 153 IMC_TYPE_CORE = 0x4,
117 IMC_TYPE_CHIP = 0x10, 154 IMC_TYPE_CHIP = 0x10,
118}; 155};
@@ -123,6 +160,8 @@ enum {
123#define IMC_DOMAIN_NEST 1 160#define IMC_DOMAIN_NEST 1
124#define IMC_DOMAIN_CORE 2 161#define IMC_DOMAIN_CORE 2
125#define IMC_DOMAIN_THREAD 3 162#define IMC_DOMAIN_THREAD 3
163/* For trace-imc the domain is still thread but it operates in trace-mode */
164#define IMC_DOMAIN_TRACE 4
126 165
127extern int init_imc_pmu(struct device_node *parent, 166extern int init_imc_pmu(struct device_node *parent,
128 struct imc_pmu *pmu_ptr, int pmu_id); 167 struct imc_pmu *pmu_ptr, int pmu_id);
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
new file mode 100644
index 000000000000..296e51c2f066
--- /dev/null
+++ b/arch/powerpc/include/asm/kasan.h
@@ -0,0 +1,40 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __ASM_KASAN_H
3#define __ASM_KASAN_H
4
5#ifdef CONFIG_KASAN
6#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn)
7#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn)
8#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn)
9#else
10#define _GLOBAL_KASAN(fn) _GLOBAL(fn)
11#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn)
12#define EXPORT_SYMBOL_KASAN(fn)
13#endif
14
15#ifndef __ASSEMBLY__
16
17#include <asm/page.h>
18
19#define KASAN_SHADOW_SCALE_SHIFT 3
20
21#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
22 (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT))
23
24#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
25
26#define KASAN_SHADOW_END 0UL
27
28#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)
29
30#ifdef CONFIG_KASAN
31void kasan_early_init(void);
32void kasan_mmu_init(void);
33void kasan_init(void);
34#else
35static inline void kasan_init(void) { }
36static inline void kasan_mmu_init(void) { }
37#endif
38
39#endif /* __ASSEMBLY */
40#endif
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
new file mode 100644
index 000000000000..5b5e39643a27
--- /dev/null
+++ b/arch/powerpc/include/asm/kup.h
@@ -0,0 +1,73 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_KUP_H_
3#define _ASM_POWERPC_KUP_H_
4
5#ifdef CONFIG_PPC64
6#include <asm/book3s/64/kup-radix.h>
7#endif
8#ifdef CONFIG_PPC_8xx
9#include <asm/nohash/32/kup-8xx.h>
10#endif
11#ifdef CONFIG_PPC_BOOK3S_32
12#include <asm/book3s/32/kup.h>
13#endif
14
15#ifdef __ASSEMBLY__
16#ifndef CONFIG_PPC_KUAP
17.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
18.endm
19
20.macro kuap_restore sp, current, gpr1, gpr2, gpr3
21.endm
22
23.macro kuap_check current, gpr
24.endm
25
26#endif
27
28#else /* !__ASSEMBLY__ */
29
30#include <asm/pgtable.h>
31
32void setup_kup(void);
33
34#ifdef CONFIG_PPC_KUEP
35void setup_kuep(bool disabled);
36#else
37static inline void setup_kuep(bool disabled) { }
38#endif /* CONFIG_PPC_KUEP */
39
40#ifdef CONFIG_PPC_KUAP
41void setup_kuap(bool disabled);
42#else
43static inline void setup_kuap(bool disabled) { }
44static inline void allow_user_access(void __user *to, const void __user *from,
45 unsigned long size) { }
46static inline void prevent_user_access(void __user *to, const void __user *from,
47 unsigned long size) { }
48static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; }
49#endif /* CONFIG_PPC_KUAP */
50
51static inline void allow_read_from_user(const void __user *from, unsigned long size)
52{
53 allow_user_access(NULL, from, size);
54}
55
56static inline void allow_write_to_user(void __user *to, unsigned long size)
57{
58 allow_user_access(to, NULL, size);
59}
60
61static inline void prevent_read_from_user(const void __user *from, unsigned long size)
62{
63 prevent_user_access(NULL, from, size);
64}
65
66static inline void prevent_write_to_user(void __user *to, unsigned long size)
67{
68 prevent_user_access(to, NULL, size);
69}
70
71#endif /* !__ASSEMBLY__ */
72
73#endif /* _ASM_POWERPC_KUP_H_ */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 17996bc9382b..23247a132ce8 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -31,7 +31,7 @@ enum MCE_Version {
31enum MCE_Severity { 31enum MCE_Severity {
32 MCE_SEV_NO_ERROR = 0, 32 MCE_SEV_NO_ERROR = 0,
33 MCE_SEV_WARNING = 1, 33 MCE_SEV_WARNING = 1,
34 MCE_SEV_ERROR_SYNC = 2, 34 MCE_SEV_SEVERE = 2,
35 MCE_SEV_FATAL = 3, 35 MCE_SEV_FATAL = 3,
36}; 36};
37 37
@@ -56,6 +56,14 @@ enum MCE_ErrorType {
56 MCE_ERROR_TYPE_LINK = 7, 56 MCE_ERROR_TYPE_LINK = 7,
57}; 57};
58 58
59enum MCE_ErrorClass {
60 MCE_ECLASS_UNKNOWN = 0,
61 MCE_ECLASS_HARDWARE,
62 MCE_ECLASS_HARD_INDETERMINATE,
63 MCE_ECLASS_SOFTWARE,
64 MCE_ECLASS_SOFT_INDETERMINATE,
65};
66
59enum MCE_UeErrorType { 67enum MCE_UeErrorType {
60 MCE_UE_ERROR_INDETERMINATE = 0, 68 MCE_UE_ERROR_INDETERMINATE = 0,
61 MCE_UE_ERROR_IFETCH = 1, 69 MCE_UE_ERROR_IFETCH = 1,
@@ -110,73 +118,75 @@ enum MCE_LinkErrorType {
110}; 118};
111 119
112struct machine_check_event { 120struct machine_check_event {
113 enum MCE_Version version:8; /* 0x00 */ 121 enum MCE_Version version:8;
114 uint8_t in_use; /* 0x01 */ 122 u8 in_use;
115 enum MCE_Severity severity:8; /* 0x02 */ 123 enum MCE_Severity severity:8;
116 enum MCE_Initiator initiator:8; /* 0x03 */ 124 enum MCE_Initiator initiator:8;
117 enum MCE_ErrorType error_type:8; /* 0x04 */ 125 enum MCE_ErrorType error_type:8;
118 enum MCE_Disposition disposition:8; /* 0x05 */ 126 enum MCE_ErrorClass error_class:8;
119 uint8_t reserved_1[2]; /* 0x06 */ 127 enum MCE_Disposition disposition:8;
120 uint64_t gpr3; /* 0x08 */ 128 bool sync_error;
121 uint64_t srr0; /* 0x10 */ 129 u16 cpu;
122 uint64_t srr1; /* 0x18 */ 130 u64 gpr3;
123 union { /* 0x20 */ 131 u64 srr0;
132 u64 srr1;
133 union {
124 struct { 134 struct {
125 enum MCE_UeErrorType ue_error_type:8; 135 enum MCE_UeErrorType ue_error_type:8;
126 uint8_t effective_address_provided; 136 u8 effective_address_provided;
127 uint8_t physical_address_provided; 137 u8 physical_address_provided;
128 uint8_t reserved_1[5]; 138 u8 reserved_1[5];
129 uint64_t effective_address; 139 u64 effective_address;
130 uint64_t physical_address; 140 u64 physical_address;
131 uint8_t reserved_2[8]; 141 u8 reserved_2[8];
132 } ue_error; 142 } ue_error;
133 143
134 struct { 144 struct {
135 enum MCE_SlbErrorType slb_error_type:8; 145 enum MCE_SlbErrorType slb_error_type:8;
136 uint8_t effective_address_provided; 146 u8 effective_address_provided;
137 uint8_t reserved_1[6]; 147 u8 reserved_1[6];
138 uint64_t effective_address; 148 u64 effective_address;
139 uint8_t reserved_2[16]; 149 u8 reserved_2[16];
140 } slb_error; 150 } slb_error;
141 151
142 struct { 152 struct {
143 enum MCE_EratErrorType erat_error_type:8; 153 enum MCE_EratErrorType erat_error_type:8;
144 uint8_t effective_address_provided; 154 u8 effective_address_provided;
145 uint8_t reserved_1[6]; 155 u8 reserved_1[6];
146 uint64_t effective_address; 156 u64 effective_address;
147 uint8_t reserved_2[16]; 157 u8 reserved_2[16];
148 } erat_error; 158 } erat_error;
149 159
150 struct { 160 struct {
151 enum MCE_TlbErrorType tlb_error_type:8; 161 enum MCE_TlbErrorType tlb_error_type:8;
152 uint8_t effective_address_provided; 162 u8 effective_address_provided;
153 uint8_t reserved_1[6]; 163 u8 reserved_1[6];
154 uint64_t effective_address; 164 u64 effective_address;
155 uint8_t reserved_2[16]; 165 u8 reserved_2[16];
156 } tlb_error; 166 } tlb_error;
157 167
158 struct { 168 struct {
159 enum MCE_UserErrorType user_error_type:8; 169 enum MCE_UserErrorType user_error_type:8;
160 uint8_t effective_address_provided; 170 u8 effective_address_provided;
161 uint8_t reserved_1[6]; 171 u8 reserved_1[6];
162 uint64_t effective_address; 172 u64 effective_address;
163 uint8_t reserved_2[16]; 173 u8 reserved_2[16];
164 } user_error; 174 } user_error;
165 175
166 struct { 176 struct {
167 enum MCE_RaErrorType ra_error_type:8; 177 enum MCE_RaErrorType ra_error_type:8;
168 uint8_t effective_address_provided; 178 u8 effective_address_provided;
169 uint8_t reserved_1[6]; 179 u8 reserved_1[6];
170 uint64_t effective_address; 180 u64 effective_address;
171 uint8_t reserved_2[16]; 181 u8 reserved_2[16];
172 } ra_error; 182 } ra_error;
173 183
174 struct { 184 struct {
175 enum MCE_LinkErrorType link_error_type:8; 185 enum MCE_LinkErrorType link_error_type:8;
176 uint8_t effective_address_provided; 186 u8 effective_address_provided;
177 uint8_t reserved_1[6]; 187 u8 reserved_1[6];
178 uint64_t effective_address; 188 u64 effective_address;
179 uint8_t reserved_2[16]; 189 u8 reserved_2[16];
180 } link_error; 190 } link_error;
181 } u; 191 } u;
182}; 192};
@@ -194,6 +204,8 @@ struct mce_error_info {
194 } u; 204 } u;
195 enum MCE_Severity severity:8; 205 enum MCE_Severity severity:8;
196 enum MCE_Initiator initiator:8; 206 enum MCE_Initiator initiator:8;
207 enum MCE_ErrorClass error_class:8;
208 bool sync_error;
197}; 209};
198 210
199#define MAX_MC_EVT 100 211#define MAX_MC_EVT 100
@@ -210,6 +222,7 @@ extern void release_mce_event(void);
210extern void machine_check_queue_event(void); 222extern void machine_check_queue_event(void);
211extern void machine_check_print_event_info(struct machine_check_event *evt, 223extern void machine_check_print_event_info(struct machine_check_event *evt,
212 bool user_mode, bool in_guest); 224 bool user_mode, bool in_guest);
225unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
213#ifdef CONFIG_PPC_BOOK3S_64 226#ifdef CONFIG_PPC_BOOK3S_64
214void flush_and_reload_slb(void); 227void flush_and_reload_slb(void);
215#endif /* CONFIG_PPC_BOOK3S_64 */ 228#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 8ddd4a91bdc1..ba94ce8c22d7 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -107,6 +107,11 @@
107 */ 107 */
108#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) 108#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
109 109
110/*
111 * Supports KUAP (key 0 controlling userspace addresses) on radix
112 */
113#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000)
114
110/* MMU feature bit sets for various CPUs */ 115/* MMU feature bit sets for various CPUs */
111#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ 116#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
112 MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 117 MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
@@ -124,6 +129,9 @@
124#ifndef __ASSEMBLY__ 129#ifndef __ASSEMBLY__
125#include <linux/bug.h> 130#include <linux/bug.h>
126#include <asm/cputable.h> 131#include <asm/cputable.h>
132#include <asm/page.h>
133
134typedef pte_t *pgtable_t;
127 135
128#ifdef CONFIG_PPC_FSL_BOOK3E 136#ifdef CONFIG_PPC_FSL_BOOK3E
129#include <asm/percpu.h> 137#include <asm/percpu.h>
@@ -164,7 +172,10 @@ enum {
164#endif 172#endif
165#ifdef CONFIG_PPC_RADIX_MMU 173#ifdef CONFIG_PPC_RADIX_MMU
166 MMU_FTR_TYPE_RADIX | 174 MMU_FTR_TYPE_RADIX |
167#endif 175#ifdef CONFIG_PPC_KUAP
176 MMU_FTR_RADIX_KUAP |
177#endif /* CONFIG_PPC_KUAP */
178#endif /* CONFIG_PPC_RADIX_MMU */
168 0, 179 0,
169}; 180};
170 181
@@ -341,21 +352,6 @@ static inline bool strict_kernel_rwx_enabled(void)
341 */ 352 */
342#define MMU_PAGE_COUNT 16 353#define MMU_PAGE_COUNT 16
343 354
344/*
345 * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
346 * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
347 * page_to_nid does a page->section->node lookup
348 * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
349 * memory requirements with large number of sections.
350 * 51 bits is the max physical real address on POWER9
351 */
352#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
353 defined (CONFIG_PPC_64K_PAGES)
354#define MAX_PHYSMEM_BITS 51
355#elif defined(CONFIG_PPC64)
356#define MAX_PHYSMEM_BITS 46
357#endif
358
359#ifdef CONFIG_PPC_BOOK3S_64 355#ifdef CONFIG_PPC_BOOK3S_64
360#include <asm/book3s/64/mmu.h> 356#include <asm/book3s/64/mmu.h>
361#else /* CONFIG_PPC_BOOK3S_64 */ 357#else /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6ee8195a2ffb..611204e588b9 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -52,6 +52,7 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
52{ 52{
53 return false; 53 return false;
54} 54}
55static inline void mm_iommu_init(struct mm_struct *mm) { }
55#endif 56#endif
56extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); 57extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
57extern void set_context(unsigned long id, pgd_t *pgd); 58extern void set_context(unsigned long id, pgd_t *pgd);
@@ -228,13 +229,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
228#endif 229#endif
229} 230}
230 231
231#ifdef CONFIG_PPC_BOOK3E_64
232static inline void arch_exit_mmap(struct mm_struct *mm)
233{
234}
235#else
236extern void arch_exit_mmap(struct mm_struct *mm); 232extern void arch_exit_mmap(struct mm_struct *mm);
237#endif
238 233
239static inline void arch_unmap(struct mm_struct *mm, 234static inline void arch_unmap(struct mm_struct *mm,
240 struct vm_area_struct *vma, 235 struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
new file mode 100644
index 000000000000..a46616937d20
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -0,0 +1,44 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
3#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
4
5#define PAGE_SHIFT_8M 23
6
7static inline pte_t *hugepd_page(hugepd_t hpd)
8{
9 BUG_ON(!hugepd_ok(hpd));
10
11 return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
12}
13
14static inline unsigned int hugepd_shift(hugepd_t hpd)
15{
16 return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17;
17}
18
19static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
20 unsigned int pdshift)
21{
22 unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT;
23
24 return hugepd_page(hpd) + idx;
25}
26
27static inline void flush_hugetlb_page(struct vm_area_struct *vma,
28 unsigned long vmaddr)
29{
30 flush_tlb_page(vma, vmaddr);
31}
32
33static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
34{
35 *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT |
36 (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K));
37}
38
39static inline int check_and_get_huge_psize(int shift)
40{
41 return shift_to_mmu_psize(shift);
42}
43
44#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
new file mode 100644
index 000000000000..1c3133b5f86a
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -0,0 +1,58 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_KUP_8XX_H_
3#define _ASM_POWERPC_KUP_8XX_H_
4
5#include <asm/bug.h>
6
7#ifdef CONFIG_PPC_KUAP
8
9#ifdef __ASSEMBLY__
10
11.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
12 lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */
13 mfspr \gpr1, SPRN_MD_AP
14 mtspr SPRN_MD_AP, \gpr2
15 stw \gpr1, STACK_REGS_KUAP(\sp)
16.endm
17
18.macro kuap_restore sp, current, gpr1, gpr2, gpr3
19 lwz \gpr1, STACK_REGS_KUAP(\sp)
20 mtspr SPRN_MD_AP, \gpr1
21.endm
22
23.macro kuap_check current, gpr
24#ifdef CONFIG_PPC_KUAP_DEBUG
25 mfspr \gpr, SPRN_MD_AP
26 rlwinm \gpr, \gpr, 16, 0xffff
27999: twnei \gpr, MD_APG_KUAP@h
28 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
29#endif
30.endm
31
32#else /* !__ASSEMBLY__ */
33
34#include <asm/reg.h>
35
36static inline void allow_user_access(void __user *to, const void __user *from,
37 unsigned long size)
38{
39 mtspr(SPRN_MD_AP, MD_APG_INIT);
40}
41
42static inline void prevent_user_access(void __user *to, const void __user *from,
43 unsigned long size)
44{
45 mtspr(SPRN_MD_AP, MD_APG_KUAP);
46}
47
48static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
49{
50 return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
51 "Bug: fault blocked by AP register !");
52}
53
54#endif /* !__ASSEMBLY__ */
55
56#endif /* CONFIG_PPC_KUAP */
57
58#endif /* _ASM_POWERPC_KUP_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 0a1a3fc54e54..76af5b0cb16e 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -35,11 +35,18 @@
35 * Then we use the APG to say whether accesses are according to Page rules or 35 * Then we use the APG to say whether accesses are according to Page rules or
36 * "all Supervisor" rules (Access to all) 36 * "all Supervisor" rules (Access to all)
37 * Therefore, we define 2 APG groups. lsb is _PMD_USER 37 * Therefore, we define 2 APG groups. lsb is _PMD_USER
38 * 0 => No user => 01 (all accesses performed according to page definition) 38 * 0 => Kernel => 01 (all accesses performed according to page definition)
39 * 1 => User => 00 (all accesses performed as supervisor iaw page definition) 39 * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
40 * We define all 16 groups so that all other bits of APG can take any value 40 * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
41 */
42#define MI_APG_INIT 0x4fffffff
43
44/*
45 * 0 => Kernel => 01 (all accesses performed according to page definition)
46 * 1 => User => 10 (all accesses performed according to swaped page definition)
47 * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
41 */ 48 */
42#define MI_APG_INIT 0x44444444 49#define MI_APG_KUEP 0x6fffffff
43 50
44/* The effective page number register. When read, contains the information 51/* The effective page number register. When read, contains the information
45 * about the last instruction TLB miss. When MI_RPN is written, bits in 52 * about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -108,11 +115,18 @@
108 * Then we use the APG to say whether accesses are according to Page rules or 115 * Then we use the APG to say whether accesses are according to Page rules or
109 * "all Supervisor" rules (Access to all) 116 * "all Supervisor" rules (Access to all)
110 * Therefore, we define 2 APG groups. lsb is _PMD_USER 117 * Therefore, we define 2 APG groups. lsb is _PMD_USER
111 * 0 => No user => 01 (all accesses performed according to page definition) 118 * 0 => Kernel => 01 (all accesses performed according to page definition)
112 * 1 => User => 00 (all accesses performed as supervisor iaw page definition) 119 * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
113 * We define all 16 groups so that all other bits of APG can take any value 120 * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
121 */
122#define MD_APG_INIT 0x4fffffff
123
124/*
125 * 0 => No user => 01 (all accesses performed according to page definition)
126 * 1 => User => 10 (all accesses performed according to swaped page definition)
127 * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
114 */ 128 */
115#define MD_APG_INIT 0x44444444 129#define MD_APG_KUAP 0x6fffffff
116 130
117/* The effective page number register. When read, contains the information 131/* The effective page number register. When read, contains the information
118 * about the last instruction TLB miss. When MD_RPN is written, bits in 132 * about the last instruction TLB miss. When MD_RPN is written, bits in
@@ -167,9 +181,26 @@
167#ifdef CONFIG_PPC_MM_SLICES 181#ifdef CONFIG_PPC_MM_SLICES
168#include <asm/nohash/32/slice.h> 182#include <asm/nohash/32/slice.h>
169#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) 183#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
184#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE
170#endif 185#endif
171 186
187#if defined(CONFIG_PPC_4K_PAGES)
188#define mmu_virtual_psize MMU_PAGE_4K
189#elif defined(CONFIG_PPC_16K_PAGES)
190#define mmu_virtual_psize MMU_PAGE_16K
191#define PTE_FRAG_NR 4
192#define PTE_FRAG_SIZE_SHIFT 12
193#define PTE_FRAG_SIZE (1UL << 12)
194#else
195#error "Unsupported PAGE_SIZE"
196#endif
197
198#define mmu_linear_psize MMU_PAGE_8M
199
172#ifndef __ASSEMBLY__ 200#ifndef __ASSEMBLY__
201
202#include <linux/mmdebug.h>
203
173struct slice_mask { 204struct slice_mask {
174 u64 low_slices; 205 u64 low_slices;
175 DECLARE_BITMAP(high_slices, 0); 206 DECLARE_BITMAP(high_slices, 0);
@@ -185,14 +216,56 @@ typedef struct {
185 unsigned char high_slices_psize[0]; 216 unsigned char high_slices_psize[0];
186 unsigned long slb_addr_limit; 217 unsigned long slb_addr_limit;
187 struct slice_mask mask_base_psize; /* 4k or 16k */ 218 struct slice_mask mask_base_psize; /* 4k or 16k */
188# ifdef CONFIG_HUGETLB_PAGE
189 struct slice_mask mask_512k; 219 struct slice_mask mask_512k;
190 struct slice_mask mask_8m; 220 struct slice_mask mask_8m;
191# endif
192#endif 221#endif
193 void *pte_frag; 222 void *pte_frag;
194} mm_context_t; 223} mm_context_t;
195 224
225#ifdef CONFIG_PPC_MM_SLICES
226static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
227{
228 return ctx->user_psize;
229}
230
231static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
232{
233 ctx->user_psize = user_psize;
234}
235
236static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
237{
238 return ctx->low_slices_psize;
239}
240
241static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
242{
243 return ctx->high_slices_psize;
244}
245
246static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
247{
248 return ctx->slb_addr_limit;
249}
250
251static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
252{
253 ctx->slb_addr_limit = limit;
254}
255
256static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
257{
258 if (psize == MMU_PAGE_512K)
259 return &ctx->mask_512k;
260 if (psize == MMU_PAGE_8M)
261 return &ctx->mask_8m;
262
263 BUG_ON(psize != mmu_virtual_psize);
264
265 return &ctx->mask_base_psize;
266}
267#endif /* CONFIG_PPC_MM_SLICE */
268
196#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) 269#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
197#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) 270#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
198 271
@@ -242,17 +315,4 @@ extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
242 315
243#endif /* !__ASSEMBLY__ */ 316#endif /* !__ASSEMBLY__ */
244 317
245#if defined(CONFIG_PPC_4K_PAGES)
246#define mmu_virtual_psize MMU_PAGE_4K
247#elif defined(CONFIG_PPC_16K_PAGES)
248#define mmu_virtual_psize MMU_PAGE_16K
249#define PTE_FRAG_NR 4
250#define PTE_FRAG_SIZE_SHIFT 12
251#define PTE_FRAG_SIZE (1UL << 12)
252#else
253#error "Unsupported PAGE_SIZE"
254#endif
255
256#define mmu_linear_psize MMU_PAGE_8M
257
258#endif /* _ASM_POWERPC_MMU_8XX_H_ */ 318#endif /* _ASM_POWERPC_MMU_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h
deleted file mode 100644
index 7d94a36d57d2..000000000000
--- a/arch/powerpc/include/asm/nohash/32/mmu.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_
3#define _ASM_POWERPC_NOHASH_32_MMU_H_
4
5#include <asm/page.h>
6
7#if defined(CONFIG_40x)
8/* 40x-style software loaded TLB */
9#include <asm/nohash/32/mmu-40x.h>
10#elif defined(CONFIG_44x)
11/* 44x-style software loaded TLB */
12#include <asm/nohash/32/mmu-44x.h>
13#elif defined(CONFIG_PPC_BOOK3E_MMU)
14/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
15#include <asm/nohash/mmu-book3e.h>
16#elif defined (CONFIG_PPC_8xx)
17/* Motorola/Freescale 8xx software loaded TLB */
18#include <asm/nohash/32/mmu-8xx.h>
19#endif
20
21#ifndef __ASSEMBLY__
22typedef pte_t *pgtable_t;
23#endif
24
25#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index bd186e85b4f7..11eac371e7e0 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -6,39 +6,6 @@
6#include <linux/slab.h> 6#include <linux/slab.h>
7 7
8/* 8/*
9 * Functions that deal with pagetables that could be at any level of
10 * the table need to be passed an "index_size" so they know how to
11 * handle allocation. For PTE pages (which are linked to a struct
12 * page for now, and drawn from the main get_free_pages() pool), the
13 * allocation size will be (2^index_size * sizeof(pointer)) and
14 * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
15 *
16 * The maximum index size needs to be big enough to allow any
17 * pagetable sizes we need, but small enough to fit in the low bits of
18 * any page table pointer. In other words all pagetables, even tiny
19 * ones, must be aligned to allow at least enough low 0 bits to
20 * contain this value. This value is also used as a mask, so it must
21 * be one less than a power of two.
22 */
23#define MAX_PGTABLE_INDEX_SIZE 0xf
24
25extern void __bad_pte(pmd_t *pmd);
26
27extern struct kmem_cache *pgtable_cache[];
28#define PGT_CACHE(shift) pgtable_cache[shift]
29
30static inline pgd_t *pgd_alloc(struct mm_struct *mm)
31{
32 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
33 pgtable_gfp_flags(mm, GFP_KERNEL));
34}
35
36static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
37{
38 kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
39}
40
41/*
42 * We don't have any real pmd's, and this code never triggers because 9 * We don't have any real pmd's, and this code never triggers because
43 * the pgd will always be present.. 10 * the pgd will always be present..
44 */ 11 */
@@ -47,96 +14,22 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
47#define __pmd_free_tlb(tlb,x,a) do { } while (0) 14#define __pmd_free_tlb(tlb,x,a) do { } while (0)
48/* #define pgd_populate(mm, pmd, pte) BUG() */ 15/* #define pgd_populate(mm, pmd, pte) BUG() */
49 16
50#ifndef CONFIG_BOOKE
51
52static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
53 pte_t *pte)
54{
55 *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
56}
57
58static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
59 pgtable_t pte_page)
60{
61 *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
62}
63
64#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
65#else
66
67static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, 17static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
68 pte_t *pte) 18 pte_t *pte)
69{ 19{
70 *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); 20 if (IS_ENABLED(CONFIG_BOOKE))
21 *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
22 else
23 *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
71} 24}
72 25
73static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, 26static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
74 pgtable_t pte_page) 27 pgtable_t pte_page)
75{ 28{
76 *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT); 29 if (IS_ENABLED(CONFIG_BOOKE))
30 *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
31 else
32 *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
77} 33}
78 34
79#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
80#endif
81
82extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
83extern pgtable_t pte_alloc_one(struct mm_struct *mm);
84void pte_frag_destroy(void *pte_frag);
85pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
86void pte_fragment_free(unsigned long *table, int kernel);
87
88static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
89{
90 pte_fragment_free((unsigned long *)pte, 1);
91}
92
93static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
94{
95 pte_fragment_free((unsigned long *)ptepage, 0);
96}
97
98static inline void pgtable_free(void *table, unsigned index_size)
99{
100 if (!index_size) {
101 pte_fragment_free((unsigned long *)table, 0);
102 } else {
103 BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
104 kmem_cache_free(PGT_CACHE(index_size), table);
105 }
106}
107
108#define check_pgt_cache() do { } while (0)
109#define get_hugepd_cache_index(x) (x)
110
111#ifdef CONFIG_SMP
112static inline void pgtable_free_tlb(struct mmu_gather *tlb,
113 void *table, int shift)
114{
115 unsigned long pgf = (unsigned long)table;
116 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
117 pgf |= shift;
118 tlb_remove_table(tlb, (void *)pgf);
119}
120
121static inline void __tlb_remove_table(void *_table)
122{
123 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
124 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
125
126 pgtable_free(table, shift);
127}
128#else
129static inline void pgtable_free_tlb(struct mmu_gather *tlb,
130 void *table, int shift)
131{
132 pgtable_free(table, shift);
133}
134#endif
135
136static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
137 unsigned long address)
138{
139 tlb_flush_pgtable(tlb, address);
140 pgtable_free_tlb(tlb, table, 0);
141}
142#endif /* _ASM_POWERPC_PGALLOC_32_H */ 35#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index bed433358260..0284f8f5305f 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -64,15 +64,24 @@ extern int icache_44x_need_flush;
64#define pgd_ERROR(e) \ 64#define pgd_ERROR(e) \
65 pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 65 pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
66 66
67#ifndef __ASSEMBLY__
68
69int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
70
71#endif /* !__ASSEMBLY__ */
72
73
67/* 74/*
68 * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary 75 * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
69 * value (for now) on others, from where we can start layout kernel 76 * value (for now) on others, from where we can start layout kernel
70 * virtual space that goes below PKMAP and FIXMAP 77 * virtual space that goes below PKMAP and FIXMAP
71 */ 78 */
79#include <asm/fixmap.h>
80
72#ifdef CONFIG_HIGHMEM 81#ifdef CONFIG_HIGHMEM
73#define KVIRT_TOP PKMAP_BASE 82#define KVIRT_TOP PKMAP_BASE
74#else 83#else
75#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ 84#define KVIRT_TOP FIXADDR_START
76#endif 85#endif
77 86
78/* 87/*
@@ -379,8 +388,6 @@ static inline int pte_young(pte_t pte)
379#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) 388#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
380#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) 389#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
381 390
382int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
383
384#endif /* !__ASSEMBLY__ */ 391#endif /* !__ASSEMBLY__ */
385 392
386#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */ 393#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
index 777d62e40ac0..39eb0154ae2d 100644
--- a/arch/powerpc/include/asm/nohash/32/slice.h
+++ b/arch/powerpc/include/asm/nohash/32/slice.h
@@ -13,6 +13,8 @@
13#define SLICE_NUM_HIGH 0ul 13#define SLICE_NUM_HIGH 0ul
14#define GET_HIGH_SLICE_INDEX(addr) (addr & 0) 14#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
15 15
16#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW
17
16#endif /* CONFIG_PPC_MM_SLICES */ 18#endif /* CONFIG_PPC_MM_SLICES */
17 19
18#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ 20#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h
deleted file mode 100644
index e6585480dfc4..000000000000
--- a/arch/powerpc/include/asm/nohash/64/mmu.h
+++ /dev/null
@@ -1,12 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_
3#define _ASM_POWERPC_NOHASH_64_MMU_H_
4
5/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
6#include <asm/nohash/mmu-book3e.h>
7
8#ifndef __ASSEMBLY__
9typedef struct page *pgtable_t;
10#endif
11
12#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 66d086f85bd5..62321cd12da9 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -18,37 +18,6 @@ struct vmemmap_backing {
18}; 18};
19extern struct vmemmap_backing *vmemmap_list; 19extern struct vmemmap_backing *vmemmap_list;
20 20
21/*
22 * Functions that deal with pagetables that could be at any level of
23 * the table need to be passed an "index_size" so they know how to
24 * handle allocation. For PTE pages (which are linked to a struct
25 * page for now, and drawn from the main get_free_pages() pool), the
26 * allocation size will be (2^index_size * sizeof(pointer)) and
27 * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
28 *
29 * The maximum index size needs to be big enough to allow any
30 * pagetable sizes we need, but small enough to fit in the low bits of
31 * any page table pointer. In other words all pagetables, even tiny
32 * ones, must be aligned to allow at least enough low 0 bits to
33 * contain this value. This value is also used as a mask, so it must
34 * be one less than a power of two.
35 */
36#define MAX_PGTABLE_INDEX_SIZE 0xf
37
38extern struct kmem_cache *pgtable_cache[];
39#define PGT_CACHE(shift) pgtable_cache[shift]
40
41static inline pgd_t *pgd_alloc(struct mm_struct *mm)
42{
43 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
44 pgtable_gfp_flags(mm, GFP_KERNEL));
45}
46
47static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
48{
49 kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
50}
51
52#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) 21#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
53 22
54static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 23static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -76,11 +45,9 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
76static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, 45static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
77 pgtable_t pte_page) 46 pgtable_t pte_page)
78{ 47{
79 pmd_set(pmd, (unsigned long)page_address(pte_page)); 48 pmd_set(pmd, (unsigned long)pte_page);
80} 49}
81 50
82#define pmd_pgtable(pmd) pmd_page(pmd)
83
84static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 51static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
85{ 52{
86 return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), 53 return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
@@ -92,91 +59,9 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
92 kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); 59 kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
93} 60}
94 61
95
96static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
97{
98 return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
99}
100
101static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
102{
103 struct page *page;
104 pte_t *pte;
105
106 pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
107 if (!pte)
108 return NULL;
109 page = virt_to_page(pte);
110 if (!pgtable_page_ctor(page)) {
111 __free_page(page);
112 return NULL;
113 }
114 return page;
115}
116
117static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
118{
119 free_page((unsigned long)pte);
120}
121
122static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
123{
124 pgtable_page_dtor(ptepage);
125 __free_page(ptepage);
126}
127
128static inline void pgtable_free(void *table, int shift)
129{
130 if (!shift) {
131 pgtable_page_dtor(virt_to_page(table));
132 free_page((unsigned long)table);
133 } else {
134 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
135 kmem_cache_free(PGT_CACHE(shift), table);
136 }
137}
138
139#define get_hugepd_cache_index(x) (x)
140#ifdef CONFIG_SMP
141static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
142{
143 unsigned long pgf = (unsigned long)table;
144
145 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
146 pgf |= shift;
147 tlb_remove_table(tlb, (void *)pgf);
148}
149
150static inline void __tlb_remove_table(void *_table)
151{
152 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
153 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
154
155 pgtable_free(table, shift);
156}
157
158#else
159static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
160{
161 pgtable_free(table, shift);
162}
163#endif
164
165static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
166 unsigned long address)
167{
168 tlb_flush_pgtable(tlb, address);
169 pgtable_free_tlb(tlb, page_address(table), 0);
170}
171
172#define __pmd_free_tlb(tlb, pmd, addr) \ 62#define __pmd_free_tlb(tlb, pmd, addr) \
173 pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) 63 pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
174#ifndef CONFIG_PPC_64K_PAGES
175#define __pud_free_tlb(tlb, pud, addr) \ 64#define __pud_free_tlb(tlb, pud, addr) \
176 pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) 65 pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
177 66
178#endif /* CONFIG_PPC_64K_PAGES */
179
180#define check_pgt_cache() do { } while (0)
181
182#endif /* _ASM_POWERPC_PGALLOC_64_H */ 67#endif /* _ASM_POWERPC_PGALLOC_64_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index e77ed9761632..b9f66cf15c31 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -10,10 +10,6 @@
10#include <asm/barrier.h> 10#include <asm/barrier.h>
11#include <asm/asm-const.h> 11#include <asm/asm-const.h>
12 12
13#ifdef CONFIG_PPC_64K_PAGES
14#error "Page size not supported"
15#endif
16
17#define FIRST_USER_ADDRESS 0UL 13#define FIRST_USER_ADDRESS 0UL
18 14
19/* 15/*
@@ -23,11 +19,7 @@
23 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) 19 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
24#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) 20#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
25 21
26#ifdef CONFIG_TRANSPARENT_HUGEPAGE
27#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
28#else
29#define PMD_CACHE_INDEX PMD_INDEX_SIZE 22#define PMD_CACHE_INDEX PMD_INDEX_SIZE
30#endif
31#define PUD_CACHE_INDEX PUD_INDEX_SIZE 23#define PUD_CACHE_INDEX PUD_INDEX_SIZE
32 24
33/* 25/*
@@ -73,7 +65,6 @@
73 65
74#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) 66#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START))
75#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) 67#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
76#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
77#define USER_REGION_ID (0UL) 68#define USER_REGION_ID (0UL)
78 69
79/* 70/*
@@ -205,7 +196,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
205 (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) 196 (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
206 197
207#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) 198#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
208#define pte_unmap(pte) do { } while(0) 199
200static inline void pte_unmap(pte_t *pte) { }
209 201
210/* to find an entry in a kernel page-table-directory */ 202/* to find an entry in a kernel page-table-directory */
211/* This now only contains the vmalloc pages */ 203/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h
deleted file mode 100644
index ad0d6e3cc1c5..000000000000
--- a/arch/powerpc/include/asm/nohash/64/slice.h
+++ /dev/null
@@ -1,12 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H
3#define _ASM_POWERPC_NOHASH_64_SLICE_H
4
5#ifdef CONFIG_PPC_64K_PAGES
6#define get_slice_psize(mm, addr) MMU_PAGE_64K
7#else /* CONFIG_PPC_64K_PAGES */
8#define get_slice_psize(mm, addr) MMU_PAGE_4K
9#endif /* !CONFIG_PPC_64K_PAGES */
10#define slice_set_user_psize(mm, psize) do { BUG(); } while (0)
11
12#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h
new file mode 100644
index 000000000000..ecd8694cb229
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h
@@ -0,0 +1,45 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
3#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
4
5static inline pte_t *hugepd_page(hugepd_t hpd)
6{
7 if (WARN_ON(!hugepd_ok(hpd)))
8 return NULL;
9
10 return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
11}
12
13static inline unsigned int hugepd_shift(hugepd_t hpd)
14{
15 return hpd_val(hpd) & HUGEPD_SHIFT_MASK;
16}
17
18static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
19 unsigned int pdshift)
20{
21 /*
22 * On FSL BookE, we have multiple higher-level table entries that
23 * point to the same hugepte. Just use the first one since they're all
24 * identical. So for that case, idx=0.
25 */
26 return hugepd_page(hpd);
27}
28
29void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
30
31static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
32{
33 /* We use the old format for PPC_FSL_BOOK3E */
34 *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
35}
36
37static inline int check_and_get_huge_psize(int shift)
38{
39 if (shift & 1) /* Not a power of 4 */
40 return -EINVAL;
41
42 return shift_to_mmu_psize(shift);
43}
44
45#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index e20072972e35..4c9777d256fb 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -306,6 +306,8 @@ extern int book3e_htw_mode;
306 306
307#define mmu_cleanup_all NULL 307#define mmu_cleanup_all NULL
308 308
309#define MAX_PHYSMEM_BITS 44
310
309#endif 311#endif
310 312
311#endif /* !__ASSEMBLY__ */ 313#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h
index a037cb1efb57..edc793e5f08f 100644
--- a/arch/powerpc/include/asm/nohash/mmu.h
+++ b/arch/powerpc/include/asm/nohash/mmu.h
@@ -2,10 +2,18 @@
2#ifndef _ASM_POWERPC_NOHASH_MMU_H_ 2#ifndef _ASM_POWERPC_NOHASH_MMU_H_
3#define _ASM_POWERPC_NOHASH_MMU_H_ 3#define _ASM_POWERPC_NOHASH_MMU_H_
4 4
5#ifdef CONFIG_PPC64 5#if defined(CONFIG_40x)
6#include <asm/nohash/64/mmu.h> 6/* 40x-style software loaded TLB */
7#else 7#include <asm/nohash/32/mmu-40x.h>
8#include <asm/nohash/32/mmu.h> 8#elif defined(CONFIG_44x)
9/* 44x-style software loaded TLB */
10#include <asm/nohash/32/mmu-44x.h>
11#elif defined(CONFIG_PPC_BOOK3E_MMU)
12/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
13#include <asm/nohash/mmu-book3e.h>
14#elif defined (CONFIG_PPC_8xx)
15/* Motorola/Freescale 8xx software loaded TLB */
16#include <asm/nohash/32/mmu-8xx.h>
9#endif 17#endif
10 18
11#endif /* _ASM_POWERPC_NOHASH_MMU_H_ */ 19#endif /* _ASM_POWERPC_NOHASH_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 0634f2949438..332b13b4ecdb 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -3,6 +3,7 @@
3#define _ASM_POWERPC_NOHASH_PGALLOC_H 3#define _ASM_POWERPC_NOHASH_PGALLOC_H
4 4
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <linux/slab.h>
6 7
7extern void tlb_remove_table(struct mmu_gather *tlb, void *table); 8extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
8#ifdef CONFIG_PPC64 9#ifdef CONFIG_PPC64
@@ -16,9 +17,64 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
16} 17}
17#endif /* !CONFIG_PPC_BOOK3E */ 18#endif /* !CONFIG_PPC_BOOK3E */
18 19
20static inline pgd_t *pgd_alloc(struct mm_struct *mm)
21{
22 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
23 pgtable_gfp_flags(mm, GFP_KERNEL));
24}
25
26static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
27{
28 kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
29}
30
19#ifdef CONFIG_PPC64 31#ifdef CONFIG_PPC64
20#include <asm/nohash/64/pgalloc.h> 32#include <asm/nohash/64/pgalloc.h>
21#else 33#else
22#include <asm/nohash/32/pgalloc.h> 34#include <asm/nohash/32/pgalloc.h>
23#endif 35#endif
36
37static inline void pgtable_free(void *table, int shift)
38{
39 if (!shift) {
40 pte_fragment_free((unsigned long *)table, 0);
41 } else {
42 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
43 kmem_cache_free(PGT_CACHE(shift), table);
44 }
45}
46
47#define get_hugepd_cache_index(x) (x)
48
49#ifdef CONFIG_SMP
50static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
51{
52 unsigned long pgf = (unsigned long)table;
53
54 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
55 pgf |= shift;
56 tlb_remove_table(tlb, (void *)pgf);
57}
58
59static inline void __tlb_remove_table(void *_table)
60{
61 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
62 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
63
64 pgtable_free(table, shift);
65}
66
67#else
68static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
69{
70 pgtable_free(table, shift);
71}
72#endif
73
74static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
75 unsigned long address)
76{
77 tlb_flush_pgtable(tlb, address);
78 pgtable_free_tlb(tlb, table, 0);
79}
24#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */ 80#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index dd40d200f274..813918f40765 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -60,13 +60,8 @@
60#define _PAGE_SPECIAL _PAGE_SW0 60#define _PAGE_SPECIAL _PAGE_SW0
61 61
62/* Base page size */ 62/* Base page size */
63#ifdef CONFIG_PPC_64K_PAGES
64#define _PAGE_PSIZE _PAGE_PSIZE_64K
65#define PTE_RPN_SHIFT (28)
66#else
67#define _PAGE_PSIZE _PAGE_PSIZE_4K 63#define _PAGE_PSIZE _PAGE_PSIZE_4K
68#define PTE_RPN_SHIFT (24) 64#define PTE_RPN_SHIFT (24)
69#endif
70 65
71#define PTE_WIMGE_SHIFT (19) 66#define PTE_WIMGE_SHIFT (19)
72#define PTE_BAP_SHIFT (2) 67#define PTE_BAP_SHIFT (2)
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 870fb7b239ea..e1577cfa7186 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -186,8 +186,8 @@
186#define OPAL_XIVE_FREE_IRQ 140 186#define OPAL_XIVE_FREE_IRQ 140
187#define OPAL_XIVE_SYNC 141 187#define OPAL_XIVE_SYNC 141
188#define OPAL_XIVE_DUMP 142 188#define OPAL_XIVE_DUMP 142
189#define OPAL_XIVE_RESERVED3 143 189#define OPAL_XIVE_GET_QUEUE_STATE 143
190#define OPAL_XIVE_RESERVED4 144 190#define OPAL_XIVE_SET_QUEUE_STATE 144
191#define OPAL_SIGNAL_SYSTEM_RESET 145 191#define OPAL_SIGNAL_SYSTEM_RESET 145
192#define OPAL_NPU_INIT_CONTEXT 146 192#define OPAL_NPU_INIT_CONTEXT 146
193#define OPAL_NPU_DESTROY_CONTEXT 147 193#define OPAL_NPU_DESTROY_CONTEXT 147
@@ -209,8 +209,10 @@
209#define OPAL_SENSOR_GROUP_ENABLE 163 209#define OPAL_SENSOR_GROUP_ENABLE 163
210#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 210#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
211#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 211#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
212#define OPAL_HANDLE_HMI2 166
212#define OPAL_NX_COPROC_INIT 167 213#define OPAL_NX_COPROC_INIT 167
213#define OPAL_LAST 167 214#define OPAL_XIVE_GET_VP_STATE 170
215#define OPAL_LAST 170
214 216
215#define QUIESCE_HOLD 1 /* Spin all calls at entry */ 217#define QUIESCE_HOLD 1 /* Spin all calls at entry */
216#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ 218#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
@@ -634,6 +636,15 @@ struct OpalHMIEvent {
634 } u; 636 } u;
635}; 637};
636 638
639/* OPAL_HANDLE_HMI2 out_flags */
640enum {
641 OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */
642 OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */
643 OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */
644 OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */
645 OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */
646};
647
637enum { 648enum {
638 OPAL_P7IOC_DIAG_TYPE_NONE = 0, 649 OPAL_P7IOC_DIAG_TYPE_NONE = 0,
639 OPAL_P7IOC_DIAG_TYPE_RGC = 1, 650 OPAL_P7IOC_DIAG_TYPE_RGC = 1,
@@ -1118,6 +1129,7 @@ enum {
1118enum { 1129enum {
1119 OPAL_IMC_COUNTERS_NEST = 1, 1130 OPAL_IMC_COUNTERS_NEST = 1,
1120 OPAL_IMC_COUNTERS_CORE = 2, 1131 OPAL_IMC_COUNTERS_CORE = 2,
1132 OPAL_IMC_COUNTERS_TRACE = 3,
1121}; 1133};
1122 1134
1123 1135
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a55b01c90bb1..4cc37e708bc7 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
203int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); 203int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
204int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data); 204int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
205int64_t opal_handle_hmi(void); 205int64_t opal_handle_hmi(void);
206int64_t opal_handle_hmi2(__be64 *out_flags);
206int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); 207int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
207int64_t opal_unregister_dump_region(uint32_t id); 208int64_t opal_unregister_dump_region(uint32_t id);
208int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); 209int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
@@ -279,6 +280,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
279int64_t opal_xive_free_irq(uint32_t girq); 280int64_t opal_xive_free_irq(uint32_t girq);
280int64_t opal_xive_sync(uint32_t type, uint32_t id); 281int64_t opal_xive_sync(uint32_t type, uint32_t id);
281int64_t opal_xive_dump(uint32_t type, uint32_t id); 282int64_t opal_xive_dump(uint32_t type, uint32_t id);
283int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
284 __be32 *out_qtoggle,
285 __be32 *out_qindex);
286int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
287 uint32_t qtoggle,
288 uint32_t qindex);
289int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
282int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, 290int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
283 uint64_t desc, uint16_t pe_number); 291 uint64_t desc, uint16_t pe_number);
284 292
@@ -352,6 +360,7 @@ int opal_power_control_init(void);
352extern int opal_machine_check(struct pt_regs *regs); 360extern int opal_machine_check(struct pt_regs *regs);
353extern bool opal_mce_check_early_recovery(struct pt_regs *regs); 361extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
354extern int opal_hmi_exception_early(struct pt_regs *regs); 362extern int opal_hmi_exception_early(struct pt_regs *regs);
363extern int opal_hmi_exception_early2(struct pt_regs *regs);
355extern int opal_handle_hmi_exception(struct pt_regs *regs); 364extern int opal_handle_hmi_exception(struct pt_regs *regs);
356 365
357extern void opal_shutdown(void); 366extern void opal_shutdown(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 134e912d403f..62f27e0aef7c 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -174,7 +174,6 @@ struct paca_struct {
174 u8 irq_soft_mask; /* mask for irq soft masking */ 174 u8 irq_soft_mask; /* mask for irq soft masking */
175 u8 irq_happened; /* irq happened while soft-disabled */ 175 u8 irq_happened; /* irq happened while soft-disabled */
176 u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ 176 u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
177 u8 nap_state_lost; /* NV GPR values lost in power7_idle */
178#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 177#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
179 u8 pmcregs_in_use; /* pseries puts this in lppaca */ 178 u8 pmcregs_in_use; /* pseries puts this in lppaca */
180#endif 179#endif
@@ -184,23 +183,28 @@ struct paca_struct {
184#endif 183#endif
185 184
186#ifdef CONFIG_PPC_POWERNV 185#ifdef CONFIG_PPC_POWERNV
187 /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */ 186 /* PowerNV idle fields */
188 u32 *core_idle_state_ptr; 187 /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
189 u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */ 188 unsigned long idle_state;
190 /* Mask to indicate thread id in core */ 189 union {
191 u8 thread_mask; 190 /* P7/P8 specific fields */
192 /* Mask to denote subcore sibling threads */ 191 struct {
193 u8 subcore_sibling_mask; 192 /* PNV_THREAD_RUNNING/NAP/SLEEP */
194 /* Flag to request this thread not to stop */ 193 u8 thread_idle_state;
195 atomic_t dont_stop; 194 /* Mask to denote subcore sibling threads */
196 /* The PSSCR value that the kernel requested before going to stop */ 195 u8 subcore_sibling_mask;
197 u64 requested_psscr; 196 };
198 197
199 /* 198 /* P9 specific fields */
200 * Save area for additional SPRs that need to be 199 struct {
201 * saved/restored during cpuidle stop. 200#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
202 */ 201 /* The PSSCR value that the kernel requested before going to stop */
203 struct stop_sprs stop_sprs; 202 u64 requested_psscr;
203 /* Flag to request this thread not to stop */
204 atomic_t dont_stop;
205#endif
206 };
207 };
204#endif 208#endif
205 209
206#ifdef CONFIG_PPC_BOOK3S_64 210#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index ed870468ef6f..dbc8c0679480 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -28,11 +28,15 @@
28#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) 28#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
29 29
30#ifndef __ASSEMBLY__ 30#ifndef __ASSEMBLY__
31#ifdef CONFIG_HUGETLB_PAGE 31#ifndef CONFIG_HUGETLB_PAGE
32extern bool hugetlb_disabled;
33extern unsigned int HPAGE_SHIFT;
34#else
35#define HPAGE_SHIFT PAGE_SHIFT 32#define HPAGE_SHIFT PAGE_SHIFT
33#elif defined(CONFIG_PPC_BOOK3S_64)
34extern unsigned int hpage_shift;
35#define HPAGE_SHIFT hpage_shift
36#elif defined(CONFIG_PPC_8xx)
37#define HPAGE_SHIFT 19 /* 512k pages */
38#elif defined(CONFIG_PPC_FSL_BOOK3E)
39#define HPAGE_SHIFT 22 /* 4M pages */
36#endif 40#endif
37#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 41#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
38#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 42#define HPAGE_MASK (~(HPAGE_SIZE - 1))
@@ -132,18 +136,7 @@ static inline bool pfn_valid(unsigned long pfn)
132#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) 136#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
133#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) 137#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
134 138
135#ifdef CONFIG_PPC_BOOK3S_64
136/*
137 * On hash the vmalloc and other regions alias to the kernel region when passed
138 * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can
139 * return true for some vmalloc addresses, which is incorrect. So explicitly
140 * check that the address is in the kernel region.
141 */
142#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \
143 pfn_valid(virt_to_pfn(kaddr)))
144#else
145#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) 139#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
146#endif
147 140
148/* 141/*
149 * On Book-E parts we need __va to parse the device tree and we can't 142 * On Book-E parts we need __va to parse the device tree and we can't
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index e11f03007b57..2b2c60a1a66d 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -20,10 +20,61 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
20 20
21#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) 21#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
22 22
23pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
24
25static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
26{
27 return (pte_t *)pte_fragment_alloc(mm, 1);
28}
29
30static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
31{
32 return (pgtable_t)pte_fragment_alloc(mm, 0);
33}
34
35void pte_frag_destroy(void *pte_frag);
36void pte_fragment_free(unsigned long *table, int kernel);
37
38static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
39{
40 pte_fragment_free((unsigned long *)pte, 1);
41}
42
43static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
44{
45 pte_fragment_free((unsigned long *)ptepage, 0);
46}
47
48/*
49 * Functions that deal with pagetables that could be at any level of
50 * the table need to be passed an "index_size" so they know how to
51 * handle allocation. For PTE pages, the allocation size will be
52 * (2^index_size * sizeof(pointer)) and allocations are drawn from
53 * the kmem_cache in PGT_CACHE(index_size).
54 *
55 * The maximum index size needs to be big enough to allow any
56 * pagetable sizes we need, but small enough to fit in the low bits of
57 * any page table pointer. In other words all pagetables, even tiny
58 * ones, must be aligned to allow at least enough low 0 bits to
59 * contain this value. This value is also used as a mask, so it must
60 * be one less than a power of two.
61 */
62#define MAX_PGTABLE_INDEX_SIZE 0xf
63
64extern struct kmem_cache *pgtable_cache[];
65#define PGT_CACHE(shift) pgtable_cache[shift]
66
67static inline void check_pgt_cache(void) { }
68
23#ifdef CONFIG_PPC_BOOK3S 69#ifdef CONFIG_PPC_BOOK3S
24#include <asm/book3s/pgalloc.h> 70#include <asm/book3s/pgalloc.h>
25#else 71#else
26#include <asm/nohash/pgalloc.h> 72#include <asm/nohash/pgalloc.h>
27#endif 73#endif
28 74
75static inline pgtable_t pmd_pgtable(pmd_t pmd)
76{
77 return (pgtable_t)pmd_page_vaddr(pmd);
78}
79
29#endif /* _ASM_POWERPC_PGALLOC_H */ 80#endif /* _ASM_POWERPC_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index a89c67b62680..b169bbf95fcb 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -33,11 +33,7 @@ static inline __be64 pmd_raw(pmd_t x)
33 return x.pmd; 33 return x.pmd;
34} 34}
35 35
36/* 36/* 64 bit always use 4 level table. */
37 * 64 bit hash always use 4 level table. Everybody else use 4 level
38 * only for 4K page size.
39 */
40#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
41typedef struct { __be64 pud; } pud_t; 37typedef struct { __be64 pud; } pud_t;
42#define __pud(x) ((pud_t) { cpu_to_be64(x) }) 38#define __pud(x) ((pud_t) { cpu_to_be64(x) })
43#define __pud_raw(x) ((pud_t) { (x) }) 39#define __pud_raw(x) ((pud_t) { (x) })
@@ -51,7 +47,6 @@ static inline __be64 pud_raw(pud_t x)
51 return x.pud; 47 return x.pud;
52} 48}
53 49
54#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
55#endif /* CONFIG_PPC64 */ 50#endif /* CONFIG_PPC64 */
56 51
57/* PGD level */ 52/* PGD level */
@@ -77,7 +72,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
77 * With hash config 64k pages additionally define a bigger "real PTE" type that 72 * With hash config 64k pages additionally define a bigger "real PTE" type that
78 * gathers the "second half" part of the PTE for pseudo 64k pages 73 * gathers the "second half" part of the PTE for pseudo 64k pages
79 */ 74 */
80#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) 75#ifdef CONFIG_PPC_64K_PAGES
81typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; 76typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
82#else 77#else
83typedef struct { pte_t pte; } real_pte_t; 78typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 3b0edf041b2e..d11b4c61d686 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -23,18 +23,13 @@ static inline unsigned long pmd_val(pmd_t x)
23 return x.pmd; 23 return x.pmd;
24} 24}
25 25
26/* 26/* 64 bit always use 4 level table. */
27 * 64 bit hash always use 4 level table. Everybody else use 4 level
28 * only for 4K page size.
29 */
30#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
31typedef struct { unsigned long pud; } pud_t; 27typedef struct { unsigned long pud; } pud_t;
32#define __pud(x) ((pud_t) { (x) }) 28#define __pud(x) ((pud_t) { (x) })
33static inline unsigned long pud_val(pud_t x) 29static inline unsigned long pud_val(pud_t x)
34{ 30{
35 return x.pud; 31 return x.pud;
36} 32}
37#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
38#endif /* CONFIG_PPC64 */ 33#endif /* CONFIG_PPC64 */
39 34
40/* PGD level */ 35/* PGD level */
@@ -54,7 +49,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
54 * With hash config 64k pages additionally define a bigger "real PTE" type that 49 * With hash config 64k pages additionally define a bigger "real PTE" type that
55 * gathers the "second half" part of the PTE for pseudo 64k pages 50 * gathers the "second half" part of the PTE for pseudo 64k pages
56 */ 51 */
57#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) 52#ifdef CONFIG_PPC_64K_PAGES
58typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; 53typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
59#else 54#else
60typedef struct { pte_t pte; } real_pte_t; 55typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 505550fb2935..3f53be60fb01 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -89,9 +89,6 @@ extern void paging_init(void);
89 */ 89 */
90extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); 90extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
91 91
92extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
93 unsigned long end, int write,
94 struct page **pages, int *nr);
95#ifndef CONFIG_TRANSPARENT_HUGEPAGE 92#ifndef CONFIG_TRANSPARENT_HUGEPAGE
96#define pmd_large(pmd) 0 93#define pmd_large(pmd) 0
97#endif 94#endif
@@ -108,6 +105,12 @@ void mark_initmem_nx(void);
108static inline void mark_initmem_nx(void) { } 105static inline void mark_initmem_nx(void) { }
109#endif 106#endif
110 107
108#ifdef CONFIG_PPC_DEBUG_WX
109void ptdump_check_wx(void);
110#else
111static inline void ptdump_check_wx(void) { }
112#endif
113
111/* 114/*
112 * When used, PTE_FRAG_NR is defined in subarch pgtable.h 115 * When used, PTE_FRAG_NR is defined in subarch pgtable.h
113 * so we are sure it is included when arriving here. 116 * so we are sure it is included when arriving here.
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 3351bcf42f2d..706ac5df546f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -164,6 +164,9 @@ struct thread_struct {
164 unsigned long rtas_sp; /* stack pointer for when in RTAS */ 164 unsigned long rtas_sp; /* stack pointer for when in RTAS */
165#endif 165#endif
166#endif 166#endif
167#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
168 unsigned long kuap; /* opened segments for user access */
169#endif
167 /* Debug Registers */ 170 /* Debug Registers */
168 struct debug_reg debug; 171 struct debug_reg debug;
169 struct thread_fp_state fp_state; 172 struct thread_fp_state fp_state;
@@ -411,14 +414,17 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
411} 414}
412#endif 415#endif
413 416
417/* asm stubs */
418extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
419extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
420extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
421
414extern unsigned long cpuidle_disable; 422extern unsigned long cpuidle_disable;
415enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; 423enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
416 424
417extern int powersave_nap; /* set if nap mode can be used in idle loop */ 425extern int powersave_nap; /* set if nap mode can be used in idle loop */
418extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/ 426
419extern void power7_idle_type(unsigned long type); 427extern void power7_idle_type(unsigned long type);
420extern unsigned long power9_idle_stop(unsigned long psscr_val);
421extern unsigned long power9_offline_stop(unsigned long psscr_val);
422extern void power9_idle_type(unsigned long stop_psscr_val, 428extern void power9_idle_type(unsigned long stop_psscr_val,
423 unsigned long stop_psscr_mask); 429 unsigned long stop_psscr_mask);
424 430
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 64271e562fed..6f047730e642 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -52,10 +52,17 @@ struct pt_regs
52 }; 52 };
53 }; 53 };
54 54
55 union {
56 struct {
55#ifdef CONFIG_PPC64 57#ifdef CONFIG_PPC64
56 unsigned long ppr; 58 unsigned long ppr;
57 unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ 59#endif
60#ifdef CONFIG_PPC_KUAP
61 unsigned long kuap;
58#endif 62#endif
63 };
64 unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */
65 };
59}; 66};
60#endif 67#endif
61 68
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c5b2aff0ce8e..10caa145f98b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -168,6 +168,7 @@
168#define PSSCR_ESL 0x00200000 /* Enable State Loss */ 168#define PSSCR_ESL 0x00200000 /* Enable State Loss */
169#define PSSCR_SD 0x00400000 /* Status Disable */ 169#define PSSCR_SD 0x00400000 /* Status Disable */
170#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ 170#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
171#define PSSCR_PLS_SHIFT 60
171#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */ 172#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
172#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ 173#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
173#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ 174#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
@@ -758,10 +759,9 @@
758#define SRR1_WAKERESET 0x00100000 /* System reset */ 759#define SRR1_WAKERESET 0x00100000 /* System reset */
759#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ 760#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
760#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ 761#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
761#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, 762#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */
762 * may not be recoverable */ 763#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */
763#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ 764#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */
764#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
765#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */ 765#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */
766#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ 766#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
767#define SRR1_PROGILL 0x00080000 /* Illegal instruction */ 767#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index eb2a33d5df26..e382bd6ede84 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -41,7 +41,7 @@
41#if defined(CONFIG_PPC_BOOK3E_64) 41#if defined(CONFIG_PPC_BOOK3E_64)
42#define MSR_64BIT MSR_CM 42#define MSR_64BIT MSR_CM
43 43
44#define MSR_ (MSR_ME | MSR_CE) 44#define MSR_ (MSR_ME | MSR_RI | MSR_CE)
45#define MSR_KERNEL (MSR_ | MSR_64BIT) 45#define MSR_KERNEL (MSR_ | MSR_64BIT)
46#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) 46#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
47#define MSR_USER64 (MSR_USER32 | MSR_64BIT) 47#define MSR_USER64 (MSR_USER32 | MSR_64BIT)
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
index 44816cbc4198..c6f466f4c241 100644
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -4,9 +4,7 @@
4 4
5#ifdef CONFIG_PPC_BOOK3S_64 5#ifdef CONFIG_PPC_BOOK3S_64
6#include <asm/book3s/64/slice.h> 6#include <asm/book3s/64/slice.h>
7#elif defined(CONFIG_PPC64) 7#elif defined(CONFIG_PPC_MMU_NOHASH_32)
8#include <asm/nohash/64/slice.h>
9#elif defined(CONFIG_PPC_MMU_NOHASH)
10#include <asm/nohash/32/slice.h> 8#include <asm/nohash/32/slice.h>
11#endif 9#endif
12 10
@@ -38,6 +36,11 @@ void slice_setup_new_exec(void);
38 36
39static inline void slice_init_new_context_exec(struct mm_struct *mm) {} 37static inline void slice_init_new_context_exec(struct mm_struct *mm) {}
40 38
39static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
40{
41 return 0;
42}
43
41#endif /* CONFIG_PPC_MM_SLICES */ 44#endif /* CONFIG_PPC_MM_SLICES */
42 45
43#endif /* __ASSEMBLY__ */ 46#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 68da49320592..3192d454a733 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni
17extern int remove_section_mapping(unsigned long start, unsigned long end); 17extern int remove_section_mapping(unsigned long start, unsigned long end);
18 18
19#ifdef CONFIG_PPC_BOOK3S_64 19#ifdef CONFIG_PPC_BOOK3S_64
20extern void resize_hpt_for_hotplug(unsigned long new_mem_size); 20extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
21#else 21#else
22static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { } 22static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; }
23#endif 23#endif
24 24
25#ifdef CONFIG_NUMA 25#ifdef CONFIG_NUMA
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 1647de15a31e..9bf6dffb4090 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -4,14 +4,17 @@
4 4
5#ifdef __KERNEL__ 5#ifdef __KERNEL__
6 6
7#ifndef CONFIG_KASAN
7#define __HAVE_ARCH_STRNCPY 8#define __HAVE_ARCH_STRNCPY
8#define __HAVE_ARCH_STRNCMP 9#define __HAVE_ARCH_STRNCMP
10#define __HAVE_ARCH_MEMCHR
11#define __HAVE_ARCH_MEMCMP
12#define __HAVE_ARCH_MEMSET16
13#endif
14
9#define __HAVE_ARCH_MEMSET 15#define __HAVE_ARCH_MEMSET
10#define __HAVE_ARCH_MEMCPY 16#define __HAVE_ARCH_MEMCPY
11#define __HAVE_ARCH_MEMMOVE 17#define __HAVE_ARCH_MEMMOVE
12#define __HAVE_ARCH_MEMCMP
13#define __HAVE_ARCH_MEMCHR
14#define __HAVE_ARCH_MEMSET16
15#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 18#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
16 19
17extern char * strcpy(char *,const char *); 20extern char * strcpy(char *,const char *);
@@ -27,7 +30,27 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
27extern void * memchr(const void *,int,__kernel_size_t); 30extern void * memchr(const void *,int,__kernel_size_t);
28extern void * memcpy_flushcache(void *,const void *,__kernel_size_t); 31extern void * memcpy_flushcache(void *,const void *,__kernel_size_t);
29 32
33void *__memset(void *s, int c, __kernel_size_t count);
34void *__memcpy(void *to, const void *from, __kernel_size_t n);
35void *__memmove(void *to, const void *from, __kernel_size_t n);
36
37#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
38/*
39 * For files that are not instrumented (e.g. mm/slub.c) we
40 * should use not instrumented version of mem* functions.
41 */
42#define memcpy(dst, src, len) __memcpy(dst, src, len)
43#define memmove(dst, src, len) __memmove(dst, src, len)
44#define memset(s, c, n) __memset(s, c, n)
45
46#ifndef __NO_FORTIFY
47#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
48#endif
49
50#endif
51
30#ifdef CONFIG_PPC64 52#ifdef CONFIG_PPC64
53#ifndef CONFIG_KASAN
31#define __HAVE_ARCH_MEMSET32 54#define __HAVE_ARCH_MEMSET32
32#define __HAVE_ARCH_MEMSET64 55#define __HAVE_ARCH_MEMSET64
33 56
@@ -49,8 +72,11 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
49{ 72{
50 return __memset64(p, v, n * 8); 73 return __memset64(p, v, n * 8);
51} 74}
75#endif
52#else 76#else
77#ifndef CONFIG_KASAN
53#define __HAVE_ARCH_STRLEN 78#define __HAVE_ARCH_STRLEN
79#endif
54 80
55extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); 81extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
56#endif 82#endif
diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h
index eab4779f6b84..c993482237ed 100644
--- a/arch/powerpc/include/asm/task_size_64.h
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -20,7 +20,7 @@
20/* 20/*
21 * For now 512TB is only supported with book3s and 64K linux page size. 21 * For now 512TB is only supported with book3s and 64K linux page size.
22 */ 22 */
23#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES) 23#ifdef CONFIG_PPC_64K_PAGES
24/* 24/*
25 * Max value currently used: 25 * Max value currently used:
26 */ 26 */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 54bf7e68a7e1..57e968413d1e 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq;
36extern unsigned long ppc_tb_freq; 36extern unsigned long ppc_tb_freq;
37#define DEFAULT_TB_FREQ 125000000UL 37#define DEFAULT_TB_FREQ 125000000UL
38 38
39extern bool tb_invalid;
40
39struct div_result { 41struct div_result {
40 u64 result_high; 42 u64 result_high;
41 u64 result_low; 43 u64 result_low;
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 58ef8c43a89d..08cd60cd70b7 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -54,6 +54,22 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
54 TP_ARGS(regs) 54 TP_ARGS(regs)
55); 55);
56 56
57#ifdef CONFIG_PPC_DOORBELL
58DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry,
59
60 TP_PROTO(struct pt_regs *regs),
61
62 TP_ARGS(regs)
63);
64
65DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit,
66
67 TP_PROTO(struct pt_regs *regs),
68
69 TP_ARGS(regs)
70);
71#endif
72
57#ifdef CONFIG_PPC_PSERIES 73#ifdef CONFIG_PPC_PSERIES
58extern int hcall_tracepoint_regfunc(void); 74extern int hcall_tracepoint_regfunc(void);
59extern void hcall_tracepoint_unregfunc(void); 75extern void hcall_tracepoint_unregfunc(void);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4d6d905e9138..76f34346b642 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -6,6 +6,7 @@
6#include <asm/processor.h> 6#include <asm/processor.h>
7#include <asm/page.h> 7#include <asm/page.h>
8#include <asm/extable.h> 8#include <asm/extable.h>
9#include <asm/kup.h>
9 10
10/* 11/*
11 * The fs value determines whether argument validity checking should be 12 * The fs value determines whether argument validity checking should be
@@ -140,6 +141,7 @@ extern long __put_user_bad(void);
140#define __put_user_size(x, ptr, size, retval) \ 141#define __put_user_size(x, ptr, size, retval) \
141do { \ 142do { \
142 retval = 0; \ 143 retval = 0; \
144 allow_write_to_user(ptr, size); \
143 switch (size) { \ 145 switch (size) { \
144 case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ 146 case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
145 case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ 147 case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
@@ -147,6 +149,7 @@ do { \
147 case 8: __put_user_asm2(x, ptr, retval); break; \ 149 case 8: __put_user_asm2(x, ptr, retval); break; \
148 default: __put_user_bad(); \ 150 default: __put_user_bad(); \
149 } \ 151 } \
152 prevent_write_to_user(ptr, size); \
150} while (0) 153} while (0)
151 154
152#define __put_user_nocheck(x, ptr, size) \ 155#define __put_user_nocheck(x, ptr, size) \
@@ -239,6 +242,7 @@ do { \
239 __chk_user_ptr(ptr); \ 242 __chk_user_ptr(ptr); \
240 if (size > sizeof(x)) \ 243 if (size > sizeof(x)) \
241 (x) = __get_user_bad(); \ 244 (x) = __get_user_bad(); \
245 allow_read_from_user(ptr, size); \
242 switch (size) { \ 246 switch (size) { \
243 case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ 247 case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
244 case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ 248 case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
@@ -246,6 +250,7 @@ do { \
246 case 8: __get_user_asm2(x, ptr, retval); break; \ 250 case 8: __get_user_asm2(x, ptr, retval); break; \
247 default: (x) = __get_user_bad(); \ 251 default: (x) = __get_user_bad(); \
248 } \ 252 } \
253 prevent_read_from_user(ptr, size); \
249} while (0) 254} while (0)
250 255
251/* 256/*
@@ -305,15 +310,21 @@ extern unsigned long __copy_tofrom_user(void __user *to,
305static inline unsigned long 310static inline unsigned long
306raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) 311raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
307{ 312{
308 return __copy_tofrom_user(to, from, n); 313 unsigned long ret;
314
315 allow_user_access(to, from, n);
316 ret = __copy_tofrom_user(to, from, n);
317 prevent_user_access(to, from, n);
318 return ret;
309} 319}
310#endif /* __powerpc64__ */ 320#endif /* __powerpc64__ */
311 321
312static inline unsigned long raw_copy_from_user(void *to, 322static inline unsigned long raw_copy_from_user(void *to,
313 const void __user *from, unsigned long n) 323 const void __user *from, unsigned long n)
314{ 324{
325 unsigned long ret;
315 if (__builtin_constant_p(n) && (n <= 8)) { 326 if (__builtin_constant_p(n) && (n <= 8)) {
316 unsigned long ret = 1; 327 ret = 1;
317 328
318 switch (n) { 329 switch (n) {
319 case 1: 330 case 1:
@@ -338,14 +349,18 @@ static inline unsigned long raw_copy_from_user(void *to,
338 } 349 }
339 350
340 barrier_nospec(); 351 barrier_nospec();
341 return __copy_tofrom_user((__force void __user *)to, from, n); 352 allow_read_from_user(from, n);
353 ret = __copy_tofrom_user((__force void __user *)to, from, n);
354 prevent_read_from_user(from, n);
355 return ret;
342} 356}
343 357
344static inline unsigned long raw_copy_to_user(void __user *to, 358static inline unsigned long raw_copy_to_user(void __user *to,
345 const void *from, unsigned long n) 359 const void *from, unsigned long n)
346{ 360{
361 unsigned long ret;
347 if (__builtin_constant_p(n) && (n <= 8)) { 362 if (__builtin_constant_p(n) && (n <= 8)) {
348 unsigned long ret = 1; 363 ret = 1;
349 364
350 switch (n) { 365 switch (n) {
351 case 1: 366 case 1:
@@ -365,17 +380,24 @@ static inline unsigned long raw_copy_to_user(void __user *to,
365 return 0; 380 return 0;
366 } 381 }
367 382
368 return __copy_tofrom_user(to, (__force const void __user *)from, n); 383 allow_write_to_user(to, n);
384 ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
385 prevent_write_to_user(to, n);
386 return ret;
369} 387}
370 388
371extern unsigned long __clear_user(void __user *addr, unsigned long size); 389extern unsigned long __clear_user(void __user *addr, unsigned long size);
372 390
373static inline unsigned long clear_user(void __user *addr, unsigned long size) 391static inline unsigned long clear_user(void __user *addr, unsigned long size)
374{ 392{
393 unsigned long ret = size;
375 might_fault(); 394 might_fault();
376 if (likely(access_ok(addr, size))) 395 if (likely(access_ok(addr, size))) {
377 return __clear_user(addr, size); 396 allow_write_to_user(addr, size);
378 return size; 397 ret = __clear_user(addr, size);
398 prevent_write_to_user(addr, size);
399 }
400 return ret;
379} 401}
380 402
381extern long strncpy_from_user(char *dst, const char __user *src, long count); 403extern long strncpy_from_user(char *dst, const char __user *src, long count);
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3c704f5dd3ae..b579a943407b 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -109,12 +109,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
109extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); 109extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
110 110
111extern void xive_native_sync_source(u32 hw_irq); 111extern void xive_native_sync_source(u32 hw_irq);
112extern void xive_native_sync_queue(u32 hw_irq);
112extern bool is_xive_irq(struct irq_chip *chip); 113extern bool is_xive_irq(struct irq_chip *chip);
113extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); 114extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
114extern int xive_native_disable_vp(u32 vp_id); 115extern int xive_native_disable_vp(u32 vp_id);
115extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); 116extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
116extern bool xive_native_has_single_escalation(void); 117extern bool xive_native_has_single_escalation(void);
117 118
119extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
120 u64 *out_qpage,
121 u64 *out_qsize,
122 u64 *out_qeoi_page,
123 u32 *out_escalate_irq,
124 u64 *out_qflags);
125
126extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
127 u32 *qindex);
128extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
129 u32 qindex);
130extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
131
118#else 132#else
119 133
120static inline bool xive_enabled(void) { return false; } 134static inline bool xive_enabled(void) { return false; }
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cddadccf551d..0ea6c4aa3a20 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -31,6 +31,18 @@ CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
31CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE) 31CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
32endif 32endif
33 33
34KASAN_SANITIZE_early_32.o := n
35KASAN_SANITIZE_cputable.o := n
36KASAN_SANITIZE_prom_init.o := n
37KASAN_SANITIZE_btext.o := n
38
39ifdef CONFIG_KASAN
40CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
41CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
42CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
43CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
44endif
45
34obj-y := cputable.o ptrace.o syscalls.o \ 46obj-y := cputable.o ptrace.o syscalls.o \
35 irq.o align.o signal_32.o pmc.o vdso.o \ 47 irq.o align.o signal_32.o pmc.o vdso.o \
36 process.o systbl.o idle.o \ 48 process.o systbl.o idle.o \
@@ -93,7 +105,7 @@ extra-y += vmlinux.lds
93 105
94obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o 106obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
95 107
96obj-$(CONFIG_PPC32) += entry_32.o setup_32.o 108obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
97obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o 109obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
98obj-$(CONFIG_KGDB) += kgdb.o 110obj-$(CONFIG_KGDB) += kgdb.o
99obj-$(CONFIG_BOOTX_TEXT) += btext.o 111obj-$(CONFIG_BOOTX_TEXT) += btext.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 86a61e5f8285..8e02444e9d3d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -147,6 +147,9 @@ int main(void)
147#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) 147#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
148 OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); 148 OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
149#endif 149#endif
150#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
151 OFFSET(KUAP, thread_struct, kuap);
152#endif
150 153
151#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 154#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
152 OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); 155 OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -268,7 +271,6 @@ int main(void)
268 OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); 271 OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
269 OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); 272 OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
270 OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); 273 OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
271 OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
272 OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); 274 OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
273#else /* CONFIG_PPC64 */ 275#else /* CONFIG_PPC64 */
274#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 276#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
@@ -332,6 +334,10 @@ int main(void)
332 STACK_PT_REGS_OFFSET(_PPR, ppr); 334 STACK_PT_REGS_OFFSET(_PPR, ppr);
333#endif /* CONFIG_PPC64 */ 335#endif /* CONFIG_PPC64 */
334 336
337#ifdef CONFIG_PPC_KUAP
338 STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
339#endif
340
335#if defined(CONFIG_PPC32) 341#if defined(CONFIG_PPC32)
336#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 342#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
337 DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); 343 DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
@@ -766,23 +772,6 @@ int main(void)
766 OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl); 772 OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
767#endif 773#endif
768 774
769#ifdef CONFIG_PPC_POWERNV
770 OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr);
771 OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
772 OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
773 OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
774 OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
775 OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
776#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
777 STOP_SPR(STOP_PID, pid);
778 STOP_SPR(STOP_LDBAR, ldbar);
779 STOP_SPR(STOP_FSCR, fscr);
780 STOP_SPR(STOP_HFSCR, hfscr);
781 STOP_SPR(STOP_MMCR1, mmcr1);
782 STOP_SPR(STOP_MMCR2, mmcr2);
783 STOP_SPR(STOP_MMCRA, mmcra);
784#endif
785
786 DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); 775 DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
787 DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); 776 DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
788 777
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 53102764fd2f..f2ed3ef4b129 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -759,23 +759,22 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index,
759 759
760 index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); 760 index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
761 if (!index_dir) 761 if (!index_dir)
762 goto err; 762 return;
763 763
764 index_dir->cache = cache; 764 index_dir->cache = cache;
765 765
766 rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, 766 rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
767 cache_dir->kobj, "index%d", index); 767 cache_dir->kobj, "index%d", index);
768 if (rc) 768 if (rc) {
769 goto err; 769 kobject_put(&index_dir->kobj);
770 kfree(index_dir);
771 return;
772 }
770 773
771 index_dir->next = cache_dir->index; 774 index_dir->next = cache_dir->index;
772 cache_dir->index = index_dir; 775 cache_dir->index = index_dir;
773 776
774 cacheinfo_create_index_opt_attrs(index_dir); 777 cacheinfo_create_index_opt_attrs(index_dir);
775
776 return;
777err:
778 kfree(index_dir);
779} 778}
780 779
781static void cacheinfo_sysfs_populate(unsigned int cpu_id, 780static void cacheinfo_sysfs_populate(unsigned int cpu_id,
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1eab54bc6ee9..cd12f362b61f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2147,7 +2147,11 @@ void __init set_cur_cpu_spec(struct cpu_spec *s)
2147 struct cpu_spec *t = &the_cpu_spec; 2147 struct cpu_spec *t = &the_cpu_spec;
2148 2148
2149 t = PTRRELOC(t); 2149 t = PTRRELOC(t);
2150 *t = *s; 2150 /*
2151 * use memcpy() instead of *t = *s so that GCC replaces it
2152 * by __memcpy() when KASAN is active
2153 */
2154 memcpy(t, s, sizeof(*t));
2151 2155
2152 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; 2156 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
2153} 2157}
@@ -2161,8 +2165,11 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
2161 t = PTRRELOC(t); 2165 t = PTRRELOC(t);
2162 old = *t; 2166 old = *t;
2163 2167
2164 /* Copy everything, then do fixups */ 2168 /*
2165 *t = *s; 2169 * Copy everything, then do fixups. Use memcpy() instead of *t = *s
2170 * so that GCC replaces it by __memcpy() when KASAN is active
2171 */
2172 memcpy(t, s, sizeof(*t));
2166 2173
2167 /* 2174 /*
2168 * If we are overriding a previous value derived from the real 2175 * If we are overriding a previous value derived from the real
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index b6fe883b1016..5ec3b3835925 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -18,6 +18,7 @@
18#include <asm/dbell.h> 18#include <asm/dbell.h>
19#include <asm/irq_regs.h> 19#include <asm/irq_regs.h>
20#include <asm/kvm_ppc.h> 20#include <asm/kvm_ppc.h>
21#include <asm/trace.h>
21 22
22#ifdef CONFIG_SMP 23#ifdef CONFIG_SMP
23 24
@@ -81,6 +82,7 @@ void doorbell_exception(struct pt_regs *regs)
81 struct pt_regs *old_regs = set_irq_regs(regs); 82 struct pt_regs *old_regs = set_irq_regs(regs);
82 83
83 irq_enter(); 84 irq_enter();
85 trace_doorbell_entry(regs);
84 86
85 ppc_msgsync(); 87 ppc_msgsync();
86 88
@@ -91,6 +93,7 @@ void doorbell_exception(struct pt_regs *regs)
91 93
92 smp_ipi_demux_relaxed(); /* already performed the barrier */ 94 smp_ipi_demux_relaxed(); /* already performed the barrier */
93 95
96 trace_doorbell_exit(regs);
94 irq_exit(); 97 irq_exit();
95 set_irq_regs(old_regs); 98 set_irq_regs(old_regs);
96} 99}
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
new file mode 100644
index 000000000000..3482118ffe76
--- /dev/null
+++ b/arch/powerpc/kernel/early_32.c
@@ -0,0 +1,36 @@
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Early init before relocation
5 */
6
7#include <linux/init.h>
8#include <linux/kernel.h>
9#include <asm/setup.h>
10#include <asm/sections.h>
11#include <asm/asm-prototypes.h>
12
13/*
14 * We're called here very early in the boot.
15 *
16 * Note that the kernel may be running at an address which is different
17 * from the address that it was linked at, so we must use RELOC/PTRRELOC
18 * to access static data (including strings). -- paulus
19 */
20notrace unsigned long __init early_init(unsigned long dt_ptr)
21{
22 unsigned long offset = reloc_offset();
23
24 /* First zero the BSS */
25 memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
26
27 /*
28 * Identify the CPU type and fix up code sections
29 * that depend on which cpu we have.
30 */
31 identify_cpu(offset, mfspr(SPRN_PVR));
32
33 apply_feature_fixups();
34
35 return KERNELBASE + offset;
36}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index b61cfd29c76f..c18f3490a77e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -36,15 +36,10 @@
36#include <asm/asm-405.h> 36#include <asm/asm-405.h>
37#include <asm/feature-fixups.h> 37#include <asm/feature-fixups.h>
38#include <asm/barrier.h> 38#include <asm/barrier.h>
39#include <asm/kup.h>
40#include <asm/bug.h>
39 41
40/* 42#include "head_32.h"
41 * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
42 */
43#if MSR_KERNEL >= 0x10000
44#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
45#else
46#define LOAD_MSR_KERNEL(r, x) li r,(x)
47#endif
48 43
49/* 44/*
50 * Align to 4k in order to ensure that all functions modyfing srr0/srr1 45 * Align to 4k in order to ensure that all functions modyfing srr0/srr1
@@ -150,8 +145,8 @@ transfer_to_handler:
150 stw r12,_CTR(r11) 145 stw r12,_CTR(r11)
151 stw r2,_XER(r11) 146 stw r2,_XER(r11)
152 mfspr r12,SPRN_SPRG_THREAD 147 mfspr r12,SPRN_SPRG_THREAD
153 addi r2,r12,-THREAD
154 beq 2f /* if from user, fix up THREAD.regs */ 148 beq 2f /* if from user, fix up THREAD.regs */
149 addi r2, r12, -THREAD
155 addi r11,r1,STACK_FRAME_OVERHEAD 150 addi r11,r1,STACK_FRAME_OVERHEAD
156 stw r11,PT_REGS(r12) 151 stw r11,PT_REGS(r12)
157#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 152#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
@@ -161,6 +156,9 @@ transfer_to_handler:
161 andis. r12,r12,DBCR0_IDM@h 156 andis. r12,r12,DBCR0_IDM@h
162#endif 157#endif
163 ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) 158 ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
159#ifdef CONFIG_PPC_BOOK3S_32
160 kuep_lock r11, r12
161#endif
164#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 162#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
165 beq+ 3f 163 beq+ 3f
166 /* From user and task is ptraced - load up global dbcr0 */ 164 /* From user and task is ptraced - load up global dbcr0 */
@@ -186,6 +184,8 @@ transfer_to_handler:
1862: /* if from kernel, check interrupted DOZE/NAP mode and 1842: /* if from kernel, check interrupted DOZE/NAP mode and
187 * check for stack overflow 185 * check for stack overflow
188 */ 186 */
187 kuap_save_and_lock r11, r12, r9, r2, r0
188 addi r2, r12, -THREAD
189 lwz r9,KSP_LIMIT(r12) 189 lwz r9,KSP_LIMIT(r12)
190 cmplw r1,r9 /* if r1 <= ksp_limit */ 190 cmplw r1,r9 /* if r1 <= ksp_limit */
191 ble- stack_ovf /* then the kernel stack overflowed */ 191 ble- stack_ovf /* then the kernel stack overflowed */
@@ -207,26 +207,43 @@ transfer_to_handler_cont:
207 mtspr SPRN_NRI, r0 207 mtspr SPRN_NRI, r0
208#endif 208#endif
209#ifdef CONFIG_TRACE_IRQFLAGS 209#ifdef CONFIG_TRACE_IRQFLAGS
210 /*
211 * When tracing IRQ state (lockdep) we enable the MMU before we call
212 * the IRQ tracing functions as they might access vmalloc space or
213 * perform IOs for console output.
214 *
215 * To speed up the syscall path where interrupts stay on, let's check
216 * first if we are changing the MSR value at all.
217 */
218 tophys(r12, r1)
219 lwz r12,_MSR(r12)
220 andi. r12,r12,MSR_EE
221 bne 1f
222
223 /* MSR isn't changing, just transition directly */
224#endif
225 mtspr SPRN_SRR0,r11
226 mtspr SPRN_SRR1,r10
227 mtlr r9
228 SYNC
229 RFI /* jump to handler, enable MMU */
230
231#ifdef CONFIG_TRACE_IRQFLAGS
2321: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
233 * keep interrupts disabled at this point otherwise we might risk
234 * taking an interrupt before we tell lockdep they are enabled.
235 */
210 lis r12,reenable_mmu@h 236 lis r12,reenable_mmu@h
211 ori r12,r12,reenable_mmu@l 237 ori r12,r12,reenable_mmu@l
238 LOAD_MSR_KERNEL(r0, MSR_KERNEL)
212 mtspr SPRN_SRR0,r12 239 mtspr SPRN_SRR0,r12
213 mtspr SPRN_SRR1,r10 240 mtspr SPRN_SRR1,r0
214 SYNC 241 SYNC
215 RFI 242 RFI
216reenable_mmu: /* re-enable mmu so we can */
217 mfmsr r10
218 lwz r12,_MSR(r1)
219 xor r10,r10,r12
220 andi. r10,r10,MSR_EE /* Did EE change? */
221 beq 1f
222 243
244reenable_mmu:
223 /* 245 /*
224 * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. 246 * We save a bunch of GPRs,
225 * If from user mode there is only one stack frame on the stack, and
226 * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
227 * stack frame to make trace_hardirqs_off happy.
228 *
229 * This is handy because we also need to save a bunch of GPRs,
230 * r3 can be different from GPR3(r1) at this point, r9 and r11 247 * r3 can be different from GPR3(r1) at this point, r9 and r11
231 * contains the old MSR and handler address respectively, 248 * contains the old MSR and handler address respectively,
232 * r4 & r5 can contain page fault arguments that need to be passed 249 * r4 & r5 can contain page fault arguments that need to be passed
@@ -234,14 +251,19 @@ reenable_mmu: /* re-enable mmu so we can */
234 * they aren't useful past this point (aren't syscall arguments), 251 * they aren't useful past this point (aren't syscall arguments),
235 * the rest is restored from the exception frame. 252 * the rest is restored from the exception frame.
236 */ 253 */
254
237 stwu r1,-32(r1) 255 stwu r1,-32(r1)
238 stw r9,8(r1) 256 stw r9,8(r1)
239 stw r11,12(r1) 257 stw r11,12(r1)
240 stw r3,16(r1) 258 stw r3,16(r1)
241 stw r4,20(r1) 259 stw r4,20(r1)
242 stw r5,24(r1) 260 stw r5,24(r1)
243 bl trace_hardirqs_off 261
244 lwz r5,24(r1) 262 /* If we are disabling interrupts (normal case), simply log it with
263 * lockdep
264 */
2651: bl trace_hardirqs_off
2662: lwz r5,24(r1)
245 lwz r4,20(r1) 267 lwz r4,20(r1)
246 lwz r3,16(r1) 268 lwz r3,16(r1)
247 lwz r11,12(r1) 269 lwz r11,12(r1)
@@ -251,15 +273,9 @@ reenable_mmu: /* re-enable mmu so we can */
251 lwz r6,GPR6(r1) 273 lwz r6,GPR6(r1)
252 lwz r7,GPR7(r1) 274 lwz r7,GPR7(r1)
253 lwz r8,GPR8(r1) 275 lwz r8,GPR8(r1)
2541: mtctr r11 276 mtctr r11
255 mtlr r9 277 mtlr r9
256 bctr /* jump to handler */ 278 bctr /* jump to handler */
257#else /* CONFIG_TRACE_IRQFLAGS */
258 mtspr SPRN_SRR0,r11
259 mtspr SPRN_SRR1,r10
260 mtlr r9
261 SYNC
262 RFI /* jump to handler, enable MMU */
263#endif /* CONFIG_TRACE_IRQFLAGS */ 279#endif /* CONFIG_TRACE_IRQFLAGS */
264 280
265#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 281#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -272,6 +288,7 @@ reenable_mmu: /* re-enable mmu so we can */
272 lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ 288 lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
273 rlwinm r9,r9,0,~MSR_EE 289 rlwinm r9,r9,0,~MSR_EE
274 lwz r12,_LINK(r11) /* and return to address in LR */ 290 lwz r12,_LINK(r11) /* and return to address in LR */
291 kuap_restore r11, r2, r3, r4, r5
275 b fast_exception_return 292 b fast_exception_return
276#endif 293#endif
277 294
@@ -301,6 +318,33 @@ stack_ovf:
301 SYNC 318 SYNC
302 RFI 319 RFI
303 320
321#ifdef CONFIG_TRACE_IRQFLAGS
322trace_syscall_entry_irq_off:
323 /*
324 * Syscall shouldn't happen while interrupts are disabled,
325 * so let's do a warning here.
326 */
3270: trap
328 EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
329 bl trace_hardirqs_on
330
331 /* Now enable for real */
332 LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
333 mtmsr r10
334
335 REST_GPR(0, r1)
336 REST_4GPRS(3, r1)
337 REST_2GPRS(7, r1)
338 b DoSyscall
339#endif /* CONFIG_TRACE_IRQFLAGS */
340
341 .globl transfer_to_syscall
342transfer_to_syscall:
343#ifdef CONFIG_TRACE_IRQFLAGS
344 andi. r12,r9,MSR_EE
345 beq- trace_syscall_entry_irq_off
346#endif /* CONFIG_TRACE_IRQFLAGS */
347
304/* 348/*
305 * Handle a system call. 349 * Handle a system call.
306 */ 350 */
@@ -312,33 +356,14 @@ _GLOBAL(DoSyscall)
312 stw r3,ORIG_GPR3(r1) 356 stw r3,ORIG_GPR3(r1)
313 li r12,0 357 li r12,0
314 stw r12,RESULT(r1) 358 stw r12,RESULT(r1)
315 lwz r11,_CCR(r1) /* Clear SO bit in CR */
316 rlwinm r11,r11,0,4,2
317 stw r11,_CCR(r1)
318#ifdef CONFIG_TRACE_IRQFLAGS 359#ifdef CONFIG_TRACE_IRQFLAGS
319 /* Return from syscalls can (and generally will) hard enable 360 /* Make sure interrupts are enabled */
320 * interrupts. You aren't supposed to call a syscall with
321 * interrupts disabled in the first place. However, to ensure
322 * that we get it right vs. lockdep if it happens, we force
323 * that hard enable here with appropriate tracing if we see
324 * that we have been called with interrupts off
325 */
326 mfmsr r11 361 mfmsr r11
327 andi. r12,r11,MSR_EE 362 andi. r12,r11,MSR_EE
328 bne+ 1f 363 /* We came in with interrupts disabled, we WARN and mark them enabled
329 /* We came in with interrupts disabled, we enable them now */ 364 * for lockdep now */
330 bl trace_hardirqs_on 3650: tweqi r12, 0
331 mfmsr r11 366 EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
332 lwz r0,GPR0(r1)
333 lwz r3,GPR3(r1)
334 lwz r4,GPR4(r1)
335 ori r11,r11,MSR_EE
336 lwz r5,GPR5(r1)
337 lwz r6,GPR6(r1)
338 lwz r7,GPR7(r1)
339 lwz r8,GPR8(r1)
340 mtmsr r11
3411:
342#endif /* CONFIG_TRACE_IRQFLAGS */ 367#endif /* CONFIG_TRACE_IRQFLAGS */
343 lwz r11,TI_FLAGS(r2) 368 lwz r11,TI_FLAGS(r2)
344 andi. r11,r11,_TIF_SYSCALL_DOTRACE 369 andi. r11,r11,_TIF_SYSCALL_DOTRACE
@@ -392,8 +417,7 @@ syscall_exit_cont:
392 lwz r8,_MSR(r1) 417 lwz r8,_MSR(r1)
393#ifdef CONFIG_TRACE_IRQFLAGS 418#ifdef CONFIG_TRACE_IRQFLAGS
394 /* If we are going to return from the syscall with interrupts 419 /* If we are going to return from the syscall with interrupts
395 * off, we trace that here. It shouldn't happen though but we 420 * off, we trace that here. It shouldn't normally happen.
396 * want to catch the bugger if it does right ?
397 */ 421 */
398 andi. r10,r8,MSR_EE 422 andi. r10,r8,MSR_EE
399 bne+ 1f 423 bne+ 1f
@@ -422,12 +446,11 @@ BEGIN_FTR_SECTION
422 lwarx r7,0,r1 446 lwarx r7,0,r1
423END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) 447END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
424 stwcx. r0,0,r1 /* to clear the reservation */ 448 stwcx. r0,0,r1 /* to clear the reservation */
425#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
426 andi. r4,r8,MSR_PR
427 beq 3f
428 ACCOUNT_CPU_USER_EXIT(r2, r5, r7) 449 ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
4293: 450#ifdef CONFIG_PPC_BOOK3S_32
451 kuep_unlock r5, r7
430#endif 452#endif
453 kuap_check r2, r4
431 lwz r4,_LINK(r1) 454 lwz r4,_LINK(r1)
432 lwz r5,_CCR(r1) 455 lwz r5,_CCR(r1)
433 mtlr r4 456 mtlr r4
@@ -678,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
678 stw r10,_CCR(r1) 701 stw r10,_CCR(r1)
679 stw r1,KSP(r3) /* Set old stack pointer */ 702 stw r1,KSP(r3) /* Set old stack pointer */
680 703
704 kuap_check r2, r4
681#ifdef CONFIG_SMP 705#ifdef CONFIG_SMP
682 /* We need a sync somewhere here to make sure that if the 706 /* We need a sync somewhere here to make sure that if the
683 * previous task gets rescheduled on another CPU, it sees all 707 * previous task gets rescheduled on another CPU, it sees all
@@ -820,6 +844,9 @@ restore_user:
820 bnel- load_dbcr0 844 bnel- load_dbcr0
821#endif 845#endif
822 ACCOUNT_CPU_USER_EXIT(r2, r10, r11) 846 ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
847#ifdef CONFIG_PPC_BOOK3S_32
848 kuep_unlock r10, r11
849#endif
823 850
824 b restore 851 b restore
825 852
@@ -866,12 +893,12 @@ resume_kernel:
866 /* check current_thread_info->preempt_count */ 893 /* check current_thread_info->preempt_count */
867 lwz r0,TI_PREEMPT(r2) 894 lwz r0,TI_PREEMPT(r2)
868 cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ 895 cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
869 bne restore 896 bne restore_kuap
870 andi. r8,r8,_TIF_NEED_RESCHED 897 andi. r8,r8,_TIF_NEED_RESCHED
871 beq+ restore 898 beq+ restore_kuap
872 lwz r3,_MSR(r1) 899 lwz r3,_MSR(r1)
873 andi. r0,r3,MSR_EE /* interrupts off? */ 900 andi. r0,r3,MSR_EE /* interrupts off? */
874 beq restore /* don't schedule if so */ 901 beq restore_kuap /* don't schedule if so */
875#ifdef CONFIG_TRACE_IRQFLAGS 902#ifdef CONFIG_TRACE_IRQFLAGS
876 /* Lockdep thinks irqs are enabled, we need to call 903 /* Lockdep thinks irqs are enabled, we need to call
877 * preempt_schedule_irq with IRQs off, so we inform lockdep 904 * preempt_schedule_irq with IRQs off, so we inform lockdep
@@ -879,10 +906,7 @@ resume_kernel:
879 */ 906 */
880 bl trace_hardirqs_off 907 bl trace_hardirqs_off
881#endif 908#endif
8821: bl preempt_schedule_irq 909 bl preempt_schedule_irq
883 lwz r3,TI_FLAGS(r2)
884 andi. r0,r3,_TIF_NEED_RESCHED
885 bne- 1b
886#ifdef CONFIG_TRACE_IRQFLAGS 910#ifdef CONFIG_TRACE_IRQFLAGS
887 /* And now, to properly rebalance the above, we tell lockdep they 911 /* And now, to properly rebalance the above, we tell lockdep they
888 * are being turned back on, which will happen when we return 912 * are being turned back on, which will happen when we return
@@ -890,6 +914,8 @@ resume_kernel:
890 bl trace_hardirqs_on 914 bl trace_hardirqs_on
891#endif 915#endif
892#endif /* CONFIG_PREEMPT */ 916#endif /* CONFIG_PREEMPT */
917restore_kuap:
918 kuap_restore r1, r2, r9, r10, r0
893 919
894 /* interrupts are hard-disabled at this point */ 920 /* interrupts are hard-disabled at this point */
895restore: 921restore:
@@ -913,28 +939,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
913 * off in this assembly code while peeking at TI_FLAGS() and such. However 939 * off in this assembly code while peeking at TI_FLAGS() and such. However
914 * we need to inform it if the exception turned interrupts off, and we 940 * we need to inform it if the exception turned interrupts off, and we
915 * are about to trun them back on. 941 * are about to trun them back on.
916 *
917 * The problem here sadly is that we don't know whether the exceptions was
918 * one that turned interrupts off or not. So we always tell lockdep about
919 * turning them on here when we go back to wherever we came from with EE
920 * on, even if that may meen some redudant calls being tracked. Maybe later
921 * we could encode what the exception did somewhere or test the exception
922 * type in the pt_regs but that sounds overkill
923 */ 942 */
924 andi. r10,r9,MSR_EE 943 andi. r10,r9,MSR_EE
925 beq 1f 944 beq 1f
926 /*
927 * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
928 * which is the stack frame here, we need to force a stack frame
929 * in case we came from user space.
930 */
931 stwu r1,-32(r1) 945 stwu r1,-32(r1)
932 mflr r0 946 mflr r0
933 stw r0,4(r1) 947 stw r0,4(r1)
934 stwu r1,-32(r1)
935 bl trace_hardirqs_on 948 bl trace_hardirqs_on
936 lwz r1,0(r1) 949 addi r1, r1, 32
937 lwz r1,0(r1)
938 lwz r9,_MSR(r1) 950 lwz r9,_MSR(r1)
9391: 9511:
940#endif /* CONFIG_TRACE_IRQFLAGS */ 952#endif /* CONFIG_TRACE_IRQFLAGS */
@@ -1197,6 +1209,7 @@ load_dbcr0:
1197 1209
1198 .section .bss 1210 .section .bss
1199 .align 4 1211 .align 4
1212 .global global_dbcr0
1200global_dbcr0: 1213global_dbcr0:
1201 .space 8*NR_CPUS 1214 .space 8*NR_CPUS
1202 .previous 1215 .previous
@@ -1207,9 +1220,10 @@ do_work: /* r10 contains MSR_KERNEL here */
1207 beq do_user_signal 1220 beq do_user_signal
1208 1221
1209do_resched: /* r10 contains MSR_KERNEL here */ 1222do_resched: /* r10 contains MSR_KERNEL here */
1210 /* Note: We don't need to inform lockdep that we are enabling 1223#ifdef CONFIG_TRACE_IRQFLAGS
1211 * interrupts here. As far as it knows, they are already enabled 1224 bl trace_hardirqs_on
1212 */ 1225 mfmsr r10
1226#endif
1213 ori r10,r10,MSR_EE 1227 ori r10,r10,MSR_EE
1214 SYNC 1228 SYNC
1215 MTMSRD(r10) /* hard-enable interrupts */ 1229 MTMSRD(r10) /* hard-enable interrupts */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15c67d2c0534..d978af78bf2a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -46,6 +46,7 @@
46#include <asm/exception-64e.h> 46#include <asm/exception-64e.h>
47#endif 47#endif
48#include <asm/feature-fixups.h> 48#include <asm/feature-fixups.h>
49#include <asm/kup.h>
49 50
50/* 51/*
51 * System calls. 52 * System calls.
@@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION
120 addi r9,r1,STACK_FRAME_OVERHEAD 121 addi r9,r1,STACK_FRAME_OVERHEAD
121 ld r11,exception_marker@toc(r2) 122 ld r11,exception_marker@toc(r2)
122 std r11,-16(r9) /* "regshere" marker */ 123 std r11,-16(r9) /* "regshere" marker */
124
125 kuap_check_amr r10, r11
126
123#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) 127#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
124BEGIN_FW_FTR_SECTION 128BEGIN_FW_FTR_SECTION
125 beq 33f 129 beq 33f
@@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
275 andi. r6,r8,MSR_PR 279 andi. r6,r8,MSR_PR
276 ld r4,_LINK(r1) 280 ld r4,_LINK(r1)
277 281
282 kuap_check_amr r10, r11
283
278#ifdef CONFIG_PPC_BOOK3S 284#ifdef CONFIG_PPC_BOOK3S
279 /* 285 /*
280 * Clear MSR_RI, MSR_EE is already and remains disabled. We could do 286 * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
@@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
296 std r8, PACATMSCRATCH(r13) 302 std r8, PACATMSCRATCH(r13)
297#endif 303#endif
298 304
305 /*
306 * We don't need to restore AMR on the way back to userspace for KUAP.
307 * The value of AMR only matters while we're in the kernel.
308 */
299 ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 309 ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
300 ld r2,GPR2(r1) 310 ld r2,GPR2(r1)
301 ld r1,GPR1(r1) 311 ld r1,GPR1(r1)
@@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
306 RFI_TO_USER 316 RFI_TO_USER
307 b . /* prevent speculative execution */ 317 b . /* prevent speculative execution */
308 318
309 /* exit to kernel */ 3191: /* exit to kernel */
3101: ld r2,GPR2(r1) 320 kuap_restore_amr r2
321
322 ld r2,GPR2(r1)
311 ld r1,GPR1(r1) 323 ld r1,GPR1(r1)
312 mtlr r4 324 mtlr r4
313 mtcr r5 325 mtcr r5
@@ -594,6 +606,8 @@ _GLOBAL(_switch)
594 std r23,_CCR(r1) 606 std r23,_CCR(r1)
595 std r1,KSP(r3) /* Set old stack pointer */ 607 std r1,KSP(r3) /* Set old stack pointer */
596 608
609 kuap_check_amr r9, r10
610
597 FLUSH_COUNT_CACHE 611 FLUSH_COUNT_CACHE
598 612
599 /* 613 /*
@@ -851,13 +865,7 @@ resume_kernel:
851 * sure we are soft-disabled first and reconcile irq state. 865 * sure we are soft-disabled first and reconcile irq state.
852 */ 866 */
853 RECONCILE_IRQ_STATE(r3,r4) 867 RECONCILE_IRQ_STATE(r3,r4)
8541: bl preempt_schedule_irq 868 bl preempt_schedule_irq
855
856 /* Re-test flags and eventually loop */
857 ld r9, PACA_THREAD_INFO(r13)
858 ld r4,TI_FLAGS(r9)
859 andi. r0,r4,_TIF_NEED_RESCHED
860 bne 1b
861 869
862 /* 870 /*
863 * arch_local_irq_restore() from preempt_schedule_irq above may 871 * arch_local_irq_restore() from preempt_schedule_irq above may
@@ -942,6 +950,8 @@ fast_exception_return:
942 ld r4,_XER(r1) 950 ld r4,_XER(r1)
943 mtspr SPRN_XER,r4 951 mtspr SPRN_XER,r4
944 952
953 kuap_check_amr r5, r6
954
945 REST_8GPRS(5, r1) 955 REST_8GPRS(5, r1)
946 956
947 andi. r0,r3,MSR_RI 957 andi. r0,r3,MSR_RI
@@ -974,6 +984,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
974 ACCOUNT_CPU_USER_EXIT(r13, r2, r4) 984 ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
975 REST_GPR(13, r1) 985 REST_GPR(13, r1)
976 986
987 /*
988 * We don't need to restore AMR on the way back to userspace for KUAP.
989 * The value of AMR only matters while we're in the kernel.
990 */
977 mtspr SPRN_SRR1,r3 991 mtspr SPRN_SRR1,r3
978 992
979 ld r2,_CCR(r1) 993 ld r2,_CCR(r1)
@@ -1006,6 +1020,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1006 ld r0,GPR0(r1) 1020 ld r0,GPR0(r1)
1007 ld r2,GPR2(r1) 1021 ld r2,GPR2(r1)
1008 ld r3,GPR3(r1) 1022 ld r3,GPR3(r1)
1023
1024 kuap_restore_amr r4
1025
1009 ld r4,GPR4(r1) 1026 ld r4,GPR4(r1)
1010 ld r1,GPR1(r1) 1027 ld r1,GPR1(r1)
1011 RFI_TO_KERNEL 1028 RFI_TO_KERNEL
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9481a117e242..6b86055e5251 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -19,6 +19,7 @@
19#include <asm/cpuidle.h> 19#include <asm/cpuidle.h>
20#include <asm/head-64.h> 20#include <asm/head-64.h>
21#include <asm/feature-fixups.h> 21#include <asm/feature-fixups.h>
22#include <asm/kup.h>
22 23
23/* 24/*
24 * There are a few constraints to be concerned with. 25 * There are a few constraints to be concerned with.
@@ -120,7 +121,9 @@ EXC_VIRT_NONE(0x4000, 0x100)
120 mfspr r10,SPRN_SRR1 ; \ 121 mfspr r10,SPRN_SRR1 ; \
121 rlwinm. r10,r10,47-31,30,31 ; \ 122 rlwinm. r10,r10,47-31,30,31 ; \
122 beq- 1f ; \ 123 beq- 1f ; \
123 cmpwi cr3,r10,2 ; \ 124 cmpwi cr1,r10,2 ; \
125 mfspr r3,SPRN_SRR1 ; \
126 bltlr cr1 ; /* no state loss, return to idle caller */ \
124 BRANCH_TO_C000(r10, system_reset_idle_common) ; \ 127 BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1251: \ 1281: \
126 KVMTEST_PR(n) ; \ 129 KVMTEST_PR(n) ; \
@@ -144,8 +147,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100)
144 147
145#ifdef CONFIG_PPC_P7_NAP 148#ifdef CONFIG_PPC_P7_NAP
146EXC_COMMON_BEGIN(system_reset_idle_common) 149EXC_COMMON_BEGIN(system_reset_idle_common)
147 mfspr r12,SPRN_SRR1 150 /*
148 b pnv_powersave_wakeup 151 * This must be a direct branch (without linker branch stub) because
152 * we can not use TOC at this point as r2 may not be restored yet.
153 */
154 b idle_return_gpr_loss
149#endif 155#endif
150 156
151/* 157/*
@@ -309,6 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
309 mfspr r11,SPRN_DSISR /* Save DSISR */ 315 mfspr r11,SPRN_DSISR /* Save DSISR */
310 std r11,_DSISR(r1) 316 std r11,_DSISR(r1)
311 std r9,_CCR(r1) /* Save CR in stackframe */ 317 std r9,_CCR(r1) /* Save CR in stackframe */
318 kuap_save_amr_and_lock r9, r10, cr1
312 /* Save r9 through r13 from EXMC save area to stack frame. */ 319 /* Save r9 through r13 from EXMC save area to stack frame. */
313 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) 320 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
314 mfmsr r11 /* get MSR value */ 321 mfmsr r11 /* get MSR value */
@@ -427,17 +434,17 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
427 * Then decrement MCE nesting after finishing with the stack. 434 * Then decrement MCE nesting after finishing with the stack.
428 */ 435 */
429 ld r3,_MSR(r1) 436 ld r3,_MSR(r1)
437 ld r4,_LINK(r1)
430 438
431 lhz r11,PACA_IN_MCE(r13) 439 lhz r11,PACA_IN_MCE(r13)
432 subi r11,r11,1 440 subi r11,r11,1
433 sth r11,PACA_IN_MCE(r13) 441 sth r11,PACA_IN_MCE(r13)
434 442
435 /* Turn off the RI bit because SRR1 is used by idle wakeup code. */ 443 mtlr r4
436 /* Recoverability could be improved by reducing the use of SRR1. */ 444 rlwinm r10,r3,47-31,30,31
437 li r11,0 445 cmpwi cr1,r10,2
438 mtmsrd r11,1 446 bltlr cr1 /* no state loss, return to idle caller */
439 447 b idle_return_gpr_loss
440 b pnv_powersave_wakeup_mce
441#endif 448#endif
442 /* 449 /*
443 * Handle machine check early in real mode. We come here with 450 * Handle machine check early in real mode. We come here with
@@ -1109,6 +1116,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
1109 mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ 1116 mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
1110 mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ 1117 mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
1111 EXCEPTION_PROLOG_COMMON_1() 1118 EXCEPTION_PROLOG_COMMON_1()
1119 /* We don't touch AMR here, we never go to virtual mode */
1112 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) 1120 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
1113 EXCEPTION_PROLOG_COMMON_3(0xe60) 1121 EXCEPTION_PROLOG_COMMON_3(0xe60)
1114 addi r3,r1,STACK_FRAME_OVERHEAD 1122 addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 45a8d0be1c96..25f063f56ec5 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -36,6 +36,7 @@
36#include <linux/sysfs.h> 36#include <linux/sysfs.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/cma.h> 38#include <linux/cma.h>
39#include <linux/hugetlb.h>
39 40
40#include <asm/debugfs.h> 41#include <asm/debugfs.h>
41#include <asm/page.h> 42#include <asm/page.h>
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 529dcc21c3f9..cecd57e1d046 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -63,6 +63,7 @@ _GLOBAL(load_fp_state)
63 REST_32FPVSRS(0, R4, R3) 63 REST_32FPVSRS(0, R4, R3)
64 blr 64 blr
65EXPORT_SYMBOL(load_fp_state) 65EXPORT_SYMBOL(load_fp_state)
66_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */
66 67
67/* 68/*
68 * Store FP state into memory, including FPSCR 69 * Store FP state into memory, including FPSCR
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e25b615e9f9e..755fab9641d6 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -37,6 +37,8 @@
37#include <asm/export.h> 37#include <asm/export.h>
38#include <asm/feature-fixups.h> 38#include <asm/feature-fixups.h>
39 39
40#include "head_32.h"
41
40/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ 42/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
41#define LOAD_BAT(n, reg, RA, RB) \ 43#define LOAD_BAT(n, reg, RA, RB) \
42 /* see the comment for clear_bats() -- Cort */ \ 44 /* see the comment for clear_bats() -- Cort */ \
@@ -160,6 +162,10 @@ __after_mmu_off:
160 bl flush_tlbs 162 bl flush_tlbs
161 163
162 bl initial_bats 164 bl initial_bats
165 bl load_segment_registers
166#ifdef CONFIG_KASAN
167 bl early_hash_table
168#endif
163#if defined(CONFIG_BOOTX_TEXT) 169#if defined(CONFIG_BOOTX_TEXT)
164 bl setup_disp_bat 170 bl setup_disp_bat
165#endif 171#endif
@@ -205,7 +211,7 @@ __after_mmu_off:
205 */ 211 */
206turn_on_mmu: 212turn_on_mmu:
207 mfmsr r0 213 mfmsr r0
208 ori r0,r0,MSR_DR|MSR_IR 214 ori r0,r0,MSR_DR|MSR_IR|MSR_RI
209 mtspr SPRN_SRR1,r0 215 mtspr SPRN_SRR1,r0
210 lis r0,start_here@h 216 lis r0,start_here@h
211 ori r0,r0,start_here@l 217 ori r0,r0,start_here@l
@@ -242,103 +248,6 @@ __secondary_hold_spinloop:
242__secondary_hold_acknowledge: 248__secondary_hold_acknowledge:
243 .long -1 249 .long -1
244 250
245/*
246 * Exception entry code. This code runs with address translation
247 * turned off, i.e. using physical addresses.
248 * We assume sprg3 has the physical address of the current
249 * task's thread_struct.
250 */
251#define EXCEPTION_PROLOG \
252 mtspr SPRN_SPRG_SCRATCH0,r10; \
253 mtspr SPRN_SPRG_SCRATCH1,r11; \
254 mfcr r10; \
255 EXCEPTION_PROLOG_1; \
256 EXCEPTION_PROLOG_2
257
258#define EXCEPTION_PROLOG_1 \
259 mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
260 andi. r11,r11,MSR_PR; \
261 tophys(r11,r1); /* use tophys(r1) if kernel */ \
262 beq 1f; \
263 mfspr r11,SPRN_SPRG_THREAD; \
264 lwz r11,TASK_STACK-THREAD(r11); \
265 addi r11,r11,THREAD_SIZE; \
266 tophys(r11,r11); \
2671: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
268
269
270#define EXCEPTION_PROLOG_2 \
271 stw r10,_CCR(r11); /* save registers */ \
272 stw r12,GPR12(r11); \
273 stw r9,GPR9(r11); \
274 mfspr r10,SPRN_SPRG_SCRATCH0; \
275 stw r10,GPR10(r11); \
276 mfspr r12,SPRN_SPRG_SCRATCH1; \
277 stw r12,GPR11(r11); \
278 mflr r10; \
279 stw r10,_LINK(r11); \
280 mfspr r12,SPRN_SRR0; \
281 mfspr r9,SPRN_SRR1; \
282 stw r1,GPR1(r11); \
283 stw r1,0(r11); \
284 tovirt(r1,r11); /* set new kernel sp */ \
285 li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
286 MTMSRD(r10); /* (except for mach check in rtas) */ \
287 stw r0,GPR0(r11); \
288 lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
289 addi r10,r10,STACK_FRAME_REGS_MARKER@l; \
290 stw r10,8(r11); \
291 SAVE_4GPRS(3, r11); \
292 SAVE_2GPRS(7, r11)
293
294/*
295 * Note: code which follows this uses cr0.eq (set if from kernel),
296 * r11, r12 (SRR0), and r9 (SRR1).
297 *
298 * Note2: once we have set r1 we are in a position to take exceptions
299 * again, and we could thus set MSR:RI at that point.
300 */
301
302/*
303 * Exception vectors.
304 */
305#define EXCEPTION(n, label, hdlr, xfer) \
306 . = n; \
307 DO_KVM n; \
308label: \
309 EXCEPTION_PROLOG; \
310 addi r3,r1,STACK_FRAME_OVERHEAD; \
311 xfer(n, hdlr)
312
313#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
314 li r10,trap; \
315 stw r10,_TRAP(r11); \
316 li r10,MSR_KERNEL; \
317 copyee(r10, r9); \
318 bl tfer; \
319i##n: \
320 .long hdlr; \
321 .long ret
322
323#define COPY_EE(d, s) rlwimi d,s,0,16,16
324#define NOCOPY(d, s)
325
326#define EXC_XFER_STD(n, hdlr) \
327 EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
328 ret_from_except_full)
329
330#define EXC_XFER_LITE(n, hdlr) \
331 EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
332 ret_from_except)
333
334#define EXC_XFER_EE(n, hdlr) \
335 EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
336 ret_from_except_full)
337
338#define EXC_XFER_EE_LITE(n, hdlr) \
339 EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
340 ret_from_except)
341
342/* System reset */ 251/* System reset */
343/* core99 pmac starts the seconary here by changing the vector, and 252/* core99 pmac starts the seconary here by changing the vector, and
344 putting it back to what it was (unknown_exception) when done. */ 253 putting it back to what it was (unknown_exception) when done. */
@@ -387,7 +296,11 @@ DataAccess:
387 EXCEPTION_PROLOG 296 EXCEPTION_PROLOG
388 mfspr r10,SPRN_DSISR 297 mfspr r10,SPRN_DSISR
389 stw r10,_DSISR(r11) 298 stw r10,_DSISR(r11)
299#ifdef CONFIG_PPC_KUAP
300 andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
301#else
390 andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h 302 andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
303#endif
391 bne 1f /* if not, try to put a PTE */ 304 bne 1f /* if not, try to put a PTE */
392 mfspr r4,SPRN_DAR /* into the hash table */ 305 mfspr r4,SPRN_DAR /* into the hash table */
393 rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ 306 rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
@@ -428,7 +341,7 @@ Alignment:
428 mfspr r5,SPRN_DSISR 341 mfspr r5,SPRN_DSISR
429 stw r5,_DSISR(r11) 342 stw r5,_DSISR(r11)
430 addi r3,r1,STACK_FRAME_OVERHEAD 343 addi r3,r1,STACK_FRAME_OVERHEAD
431 EXC_XFER_EE(0x600, alignment_exception) 344 EXC_XFER_STD(0x600, alignment_exception)
432 345
433/* Program check exception */ 346/* Program check exception */
434 EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) 347 EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -449,24 +362,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
449 bl load_up_fpu /* if from user, just load it up */ 362 bl load_up_fpu /* if from user, just load it up */
450 b fast_exception_return 363 b fast_exception_return
4511: addi r3,r1,STACK_FRAME_OVERHEAD 3641: addi r3,r1,STACK_FRAME_OVERHEAD
452 EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) 365 EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
453 366
454/* Decrementer */ 367/* Decrementer */
455 EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) 368 EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
456 369
457 EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) 370 EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
458 EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) 371 EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
459 372
460/* System call */ 373/* System call */
461 . = 0xc00 374 . = 0xc00
462 DO_KVM 0xc00 375 DO_KVM 0xc00
463SystemCall: 376SystemCall:
464 EXCEPTION_PROLOG 377 SYSCALL_ENTRY 0xc00
465 EXC_XFER_EE_LITE(0xc00, DoSyscall)
466 378
467/* Single step - not used on 601 */ 379/* Single step - not used on 601 */
468 EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) 380 EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
469 EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) 381 EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
470 382
471/* 383/*
472 * The Altivec unavailable trap is at 0x0f20. Foo. 384 * The Altivec unavailable trap is at 0x0f20. Foo.
@@ -522,9 +434,9 @@ InstructionTLBMiss:
522 andc. r1,r1,r0 /* check access & ~permission */ 434 andc. r1,r1,r0 /* check access & ~permission */
523 bne- InstructionAddressInvalid /* return if access not permitted */ 435 bne- InstructionAddressInvalid /* return if access not permitted */
524 /* Convert linux-style PTE to low word of PPC-style PTE */ 436 /* Convert linux-style PTE to low word of PPC-style PTE */
525 rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ 437 rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
526 ori r1, r1, 0xe05 /* clear out reserved bits */ 438 ori r1, r1, 0xe06 /* clear out reserved bits */
527 andc r1, r0, r1 /* PP = user? 2 : 0 */ 439 andc r1, r0, r1 /* PP = user? 1 : 0 */
528BEGIN_FTR_SECTION 440BEGIN_FTR_SECTION
529 rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ 441 rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
530END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 442END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -590,11 +502,11 @@ DataLoadTLBMiss:
590 * we would need to update the pte atomically with lwarx/stwcx. 502 * we would need to update the pte atomically with lwarx/stwcx.
591 */ 503 */
592 /* Convert linux-style PTE to low word of PPC-style PTE */ 504 /* Convert linux-style PTE to low word of PPC-style PTE */
593 rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ 505 rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
594 rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ 506 rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
595 rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ 507 rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
596 ori r1,r1,0xe04 /* clear out reserved bits */ 508 ori r1,r1,0xe04 /* clear out reserved bits */
597 andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */ 509 andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
598BEGIN_FTR_SECTION 510BEGIN_FTR_SECTION
599 rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ 511 rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
600END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 512END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -670,9 +582,9 @@ DataStoreTLBMiss:
670 * we would need to update the pte atomically with lwarx/stwcx. 582 * we would need to update the pte atomically with lwarx/stwcx.
671 */ 583 */
672 /* Convert linux-style PTE to low word of PPC-style PTE */ 584 /* Convert linux-style PTE to low word of PPC-style PTE */
673 rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ 585 rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
674 li r1,0xe05 /* clear out reserved bits & PP lsb */ 586 li r1,0xe06 /* clear out reserved bits & PP msb */
675 andc r1,r0,r1 /* PP = user? 2: 0 */ 587 andc r1,r0,r1 /* PP = user? 1: 0 */
676BEGIN_FTR_SECTION 588BEGIN_FTR_SECTION
677 rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ 589 rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
678END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 590END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -698,35 +610,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
698#define altivec_assist_exception unknown_exception 610#define altivec_assist_exception unknown_exception
699#endif 611#endif
700 612
701 EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) 613 EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
702 EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) 614 EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
703 EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) 615 EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
704 EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) 616 EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
705 EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) 617 EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
706 EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) 618 EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
707 EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) 619 EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
708 EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) 620 EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
709 EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) 621 EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
710 EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) 622 EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
711 EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) 623 EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
712 EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) 624 EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
713 EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) 625 EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
714 EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) 626 EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
715 EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) 627 EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
716 EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) 628 EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
717 EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) 629 EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
718 EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) 630 EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
719 EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) 631 EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
720 EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) 632 EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
721 EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) 633 EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
722 EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) 634 EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
723 EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) 635 EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
724 EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) 636 EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
725 EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) 637 EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
726 EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) 638 EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
727 EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) 639 EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
728 EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) 640 EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
729 EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) 641 EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
730 642
731 . = 0x3000 643 . = 0x3000
732 644
@@ -738,7 +650,7 @@ AltiVecUnavailable:
738 b fast_exception_return 650 b fast_exception_return
739#endif /* CONFIG_ALTIVEC */ 651#endif /* CONFIG_ALTIVEC */
7401: addi r3,r1,STACK_FRAME_OVERHEAD 6521: addi r3,r1,STACK_FRAME_OVERHEAD
741 EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) 653 EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
742 654
743PerformanceMonitor: 655PerformanceMonitor:
744 EXCEPTION_PROLOG 656 EXCEPTION_PROLOG
@@ -880,11 +792,24 @@ _ENTRY(__restore_cpu_setup)
880 blr 792 blr
881#endif /* !defined(CONFIG_PPC_BOOK3S_32) */ 793#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
882 794
883
884/* 795/*
885 * Load stuff into the MMU. Intended to be called with 796 * Load stuff into the MMU. Intended to be called with
886 * IR=0 and DR=0. 797 * IR=0 and DR=0.
887 */ 798 */
799#ifdef CONFIG_KASAN
800early_hash_table:
801 sync /* Force all PTE updates to finish */
802 isync
803 tlbia /* Clear all TLB entries */
804 sync /* wait for tlbia/tlbie to finish */
805 TLBSYNC /* ... on all CPUs */
806 /* Load the SDR1 register (hash table base & size) */
807 lis r6, early_hash - PAGE_OFFSET@h
808 ori r6, r6, 3 /* 256kB table */
809 mtspr SPRN_SDR1, r6
810 blr
811#endif
812
888load_up_mmu: 813load_up_mmu:
889 sync /* Force all PTE updates to finish */ 814 sync /* Force all PTE updates to finish */
890 isync 815 isync
@@ -896,14 +821,6 @@ load_up_mmu:
896 tophys(r6,r6) 821 tophys(r6,r6)
897 lwz r6,_SDR1@l(r6) 822 lwz r6,_SDR1@l(r6)
898 mtspr SPRN_SDR1,r6 823 mtspr SPRN_SDR1,r6
899 li r0,16 /* load up segment register values */
900 mtctr r0 /* for context 0 */
901 lis r3,0x2000 /* Ku = 1, VSID = 0 */
902 li r4,0
9033: mtsrin r3,r4
904 addi r3,r3,0x111 /* increment VSID */
905 addis r4,r4,0x1000 /* address of next segment */
906 bdnz 3b
907 824
908/* Load the BAT registers with the values set up by MMU_init. 825/* Load the BAT registers with the values set up by MMU_init.
909 MMU_init takes care of whether we're on a 601 or not. */ 826 MMU_init takes care of whether we're on a 601 or not. */
@@ -925,6 +842,32 @@ BEGIN_MMU_FTR_SECTION
925END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) 842END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
926 blr 843 blr
927 844
845load_segment_registers:
846 li r0, NUM_USER_SEGMENTS /* load up user segment register values */
847 mtctr r0 /* for context 0 */
848 li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
849#ifdef CONFIG_PPC_KUEP
850 oris r3, r3, SR_NX@h /* Set Nx */
851#endif
852#ifdef CONFIG_PPC_KUAP
853 oris r3, r3, SR_KS@h /* Set Ks */
854#endif
855 li r4, 0
8563: mtsrin r3, r4
857 addi r3, r3, 0x111 /* increment VSID */
858 addis r4, r4, 0x1000 /* address of next segment */
859 bdnz 3b
860 li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
861 mtctr r0 /* for context 0 */
862 rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */
863 rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */
864 oris r3, r3, SR_KP@h /* Kp = 1 */
8653: mtsrin r3, r4
866 addi r3, r3, 0x111 /* increment VSID */
867 addis r4, r4, 0x1000 /* address of next segment */
868 bdnz 3b
869 blr
870
928/* 871/*
929 * This is where the main kernel code starts. 872 * This is where the main kernel code starts.
930 */ 873 */
@@ -950,11 +893,17 @@ start_here:
950 * Do early platform-specific initialization, 893 * Do early platform-specific initialization,
951 * and set up the MMU. 894 * and set up the MMU.
952 */ 895 */
896#ifdef CONFIG_KASAN
897 bl kasan_early_init
898#endif
953 li r3,0 899 li r3,0
954 mr r4,r31 900 mr r4,r31
955 bl machine_init 901 bl machine_init
956 bl __save_cpu_setup 902 bl __save_cpu_setup
957 bl MMU_init 903 bl MMU_init
904BEGIN_MMU_FTR_SECTION
905 bl MMU_init_hw_patch
906END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
958 907
959/* 908/*
960 * Go back to running unmapped so we can load up new values 909 * Go back to running unmapped so we can load up new values
@@ -1006,7 +955,12 @@ _ENTRY(switch_mmu_context)
1006 blt- 4f 955 blt- 4f
1007 mulli r3,r3,897 /* multiply context by skew factor */ 956 mulli r3,r3,897 /* multiply context by skew factor */
1008 rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ 957 rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
1009 addis r3,r3,0x6000 /* Set Ks, Ku bits */ 958#ifdef CONFIG_PPC_KUEP
959 oris r3, r3, SR_NX@h /* Set Nx */
960#endif
961#ifdef CONFIG_PPC_KUAP
962 oris r3, r3, SR_KS@h /* Set Ks */
963#endif
1010 li r0,NUM_USER_SEGMENTS 964 li r0,NUM_USER_SEGMENTS
1011 mtctr r0 965 mtctr r0
1012 966
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
new file mode 100644
index 000000000000..4a692553651f
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.h
@@ -0,0 +1,203 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __HEAD_32_H__
3#define __HEAD_32_H__
4
5#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
6
7/*
8 * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
9 */
10.macro __LOAD_MSR_KERNEL r, x
11.if \x >= 0x8000
12 lis \r, (\x)@h
13 ori \r, \r, (\x)@l
14.else
15 li \r, (\x)
16.endif
17.endm
18#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
19
20/*
21 * Exception entry code. This code runs with address translation
22 * turned off, i.e. using physical addresses.
23 * We assume sprg3 has the physical address of the current
24 * task's thread_struct.
25 */
26
27.macro EXCEPTION_PROLOG
28 mtspr SPRN_SPRG_SCRATCH0,r10
29 mtspr SPRN_SPRG_SCRATCH1,r11
30 mfcr r10
31 EXCEPTION_PROLOG_1
32 EXCEPTION_PROLOG_2
33.endm
34
35.macro EXCEPTION_PROLOG_1
36 mfspr r11,SPRN_SRR1 /* check whether user or kernel */
37 andi. r11,r11,MSR_PR
38 tophys(r11,r1) /* use tophys(r1) if kernel */
39 beq 1f
40 mfspr r11,SPRN_SPRG_THREAD
41 lwz r11,TASK_STACK-THREAD(r11)
42 addi r11,r11,THREAD_SIZE
43 tophys(r11,r11)
441: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
45.endm
46
47.macro EXCEPTION_PROLOG_2
48 stw r10,_CCR(r11) /* save registers */
49 stw r12,GPR12(r11)
50 stw r9,GPR9(r11)
51 mfspr r10,SPRN_SPRG_SCRATCH0
52 stw r10,GPR10(r11)
53 mfspr r12,SPRN_SPRG_SCRATCH1
54 stw r12,GPR11(r11)
55 mflr r10
56 stw r10,_LINK(r11)
57 mfspr r12,SPRN_SRR0
58 mfspr r9,SPRN_SRR1
59 stw r1,GPR1(r11)
60 stw r1,0(r11)
61 tovirt(r1,r11) /* set new kernel sp */
62#ifdef CONFIG_40x
63 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
64#else
65 li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
66 MTMSRD(r10) /* (except for mach check in rtas) */
67#endif
68 stw r0,GPR0(r11)
69 lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
70 addi r10,r10,STACK_FRAME_REGS_MARKER@l
71 stw r10,8(r11)
72 SAVE_4GPRS(3, r11)
73 SAVE_2GPRS(7, r11)
74.endm
75
76.macro SYSCALL_ENTRY trapno
77 mfspr r12,SPRN_SPRG_THREAD
78 mfcr r10
79 lwz r11,TASK_STACK-THREAD(r12)
80 mflr r9
81 addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
82 rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
83 tophys(r11,r11)
84 stw r10,_CCR(r11) /* save registers */
85 mfspr r10,SPRN_SRR0
86 stw r9,_LINK(r11)
87 mfspr r9,SPRN_SRR1
88 stw r1,GPR1(r11)
89 stw r1,0(r11)
90 tovirt(r1,r11) /* set new kernel sp */
91 stw r10,_NIP(r11)
92#ifdef CONFIG_40x
93 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
94#else
95 LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
96 MTMSRD(r10) /* (except for mach check in rtas) */
97#endif
98 lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
99 stw r2,GPR2(r11)
100 addi r10,r10,STACK_FRAME_REGS_MARKER@l
101 stw r9,_MSR(r11)
102 li r2, \trapno + 1
103 stw r10,8(r11)
104 stw r2,_TRAP(r11)
105 SAVE_GPR(0, r11)
106 SAVE_4GPRS(3, r11)
107 SAVE_2GPRS(7, r11)
108 addi r11,r1,STACK_FRAME_OVERHEAD
109 addi r2,r12,-THREAD
110 stw r11,PT_REGS(r12)
111#if defined(CONFIG_40x)
112 /* Check to see if the dbcr0 register is set up to debug. Use the
113 internal debug mode bit to do this. */
114 lwz r12,THREAD_DBCR0(r12)
115 andis. r12,r12,DBCR0_IDM@h
116#endif
117 ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
118#if defined(CONFIG_40x)
119 beq+ 3f
120 /* From user and task is ptraced - load up global dbcr0 */
121 li r12,-1 /* clear all pending debug events */
122 mtspr SPRN_DBSR,r12
123 lis r11,global_dbcr0@ha
124 tophys(r11,r11)
125 addi r11,r11,global_dbcr0@l
126 lwz r12,0(r11)
127 mtspr SPRN_DBCR0,r12
128 lwz r12,4(r11)
129 addi r12,r12,-1
130 stw r12,4(r11)
131#endif
132
1333:
134 tovirt(r2, r2) /* set r2 to current */
135 lis r11, transfer_to_syscall@h
136 ori r11, r11, transfer_to_syscall@l
137#ifdef CONFIG_TRACE_IRQFLAGS
138 /*
139 * If MSR is changing we need to keep interrupts disabled at this point
140 * otherwise we might risk taking an interrupt before we tell lockdep
141 * they are enabled.
142 */
143 LOAD_MSR_KERNEL(r10, MSR_KERNEL)
144 rlwimi r10, r9, 0, MSR_EE
145#else
146 LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
147#endif
148#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
149 mtspr SPRN_NRI, r0
150#endif
151 mtspr SPRN_SRR1,r10
152 mtspr SPRN_SRR0,r11
153 SYNC
154 RFI /* jump to handler, enable MMU */
155.endm
156
157/*
158 * Note: code which follows this uses cr0.eq (set if from kernel),
159 * r11, r12 (SRR0), and r9 (SRR1).
160 *
161 * Note2: once we have set r1 we are in a position to take exceptions
162 * again, and we could thus set MSR:RI at that point.
163 */
164
165/*
166 * Exception vectors.
167 */
168#ifdef CONFIG_PPC_BOOK3S
169#define START_EXCEPTION(n, label) \
170 . = n; \
171 DO_KVM n; \
172label:
173
174#else
175#define START_EXCEPTION(n, label) \
176 . = n; \
177label:
178
179#endif
180
181#define EXCEPTION(n, label, hdlr, xfer) \
182 START_EXCEPTION(n, label) \
183 EXCEPTION_PROLOG; \
184 addi r3,r1,STACK_FRAME_OVERHEAD; \
185 xfer(n, hdlr)
186
187#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
188 li r10,trap; \
189 stw r10,_TRAP(r11); \
190 LOAD_MSR_KERNEL(r10, msr); \
191 bl tfer; \
192 .long hdlr; \
193 .long ret
194
195#define EXC_XFER_STD(n, hdlr) \
196 EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
197 ret_from_except_full)
198
199#define EXC_XFER_LITE(n, hdlr) \
200 EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
201 ret_from_except)
202
203#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index a9c934f2319b..cf54b784100d 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -44,6 +44,8 @@
44#include <asm/export.h> 44#include <asm/export.h>
45#include <asm/asm-405.h> 45#include <asm/asm-405.h>
46 46
47#include "head_32.h"
48
47/* As with the other PowerPC ports, it is expected that when code 49/* As with the other PowerPC ports, it is expected that when code
48 * execution begins here, the following registers contain valid, yet 50 * execution begins here, the following registers contain valid, yet
49 * optional, information: 51 * optional, information:
@@ -99,46 +101,6 @@ _ENTRY(saved_ksp_limit)
99 .space 4 101 .space 4
100 102
101/* 103/*
102 * Exception vector entry code. This code runs with address translation
103 * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
104 * the physical address of the current task thread_struct.
105 * Note that we have to have decremented r1 before we write to any fields
106 * of the exception frame, since a critical interrupt could occur at any
107 * time, and it will write to the area immediately below the current r1.
108 */
109#define NORMAL_EXCEPTION_PROLOG \
110 mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
111 mtspr SPRN_SPRG_SCRATCH1,r11; \
112 mtspr SPRN_SPRG_SCRATCH2,r1; \
113 mfcr r10; /* save CR in r10 for now */\
114 mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
115 andi. r11,r11,MSR_PR; \
116 beq 1f; \
117 mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
118 lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\
119 addi r1,r1,THREAD_SIZE; \
1201: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
121 tophys(r11,r1); \
122 stw r10,_CCR(r11); /* save various registers */\
123 stw r12,GPR12(r11); \
124 stw r9,GPR9(r11); \
125 mfspr r10,SPRN_SPRG_SCRATCH0; \
126 stw r10,GPR10(r11); \
127 mfspr r12,SPRN_SPRG_SCRATCH1; \
128 stw r12,GPR11(r11); \
129 mflr r10; \
130 stw r10,_LINK(r11); \
131 mfspr r10,SPRN_SPRG_SCRATCH2; \
132 mfspr r12,SPRN_SRR0; \
133 stw r10,GPR1(r11); \
134 mfspr r9,SPRN_SRR1; \
135 stw r10,0(r11); \
136 rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
137 stw r0,GPR0(r11); \
138 SAVE_4GPRS(3, r11); \
139 SAVE_2GPRS(7, r11)
140
141/*
142 * Exception prolog for critical exceptions. This is a little different 104 * Exception prolog for critical exceptions. This is a little different
143 * from the normal exception prolog above since a critical exception 105 * from the normal exception prolog above since a critical exception
144 * can potentially occur at any point during normal exception processing. 106 * can potentially occur at any point during normal exception processing.
@@ -177,6 +139,9 @@ _ENTRY(saved_ksp_limit)
177 tovirt(r1,r11); \ 139 tovirt(r1,r11); \
178 rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ 140 rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
179 stw r0,GPR0(r11); \ 141 stw r0,GPR0(r11); \
142 lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
143 addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
144 stw r10, 8(r11); \
180 SAVE_4GPRS(3, r11); \ 145 SAVE_4GPRS(3, r11); \
181 SAVE_2GPRS(7, r11) 146 SAVE_2GPRS(7, r11)
182 147
@@ -196,53 +161,12 @@ _ENTRY(saved_ksp_limit)
196/* 161/*
197 * Exception vectors. 162 * Exception vectors.
198 */ 163 */
199#define START_EXCEPTION(n, label) \
200 . = n; \
201label:
202
203#define EXCEPTION(n, label, hdlr, xfer) \
204 START_EXCEPTION(n, label); \
205 NORMAL_EXCEPTION_PROLOG; \
206 addi r3,r1,STACK_FRAME_OVERHEAD; \
207 xfer(n, hdlr)
208
209#define CRITICAL_EXCEPTION(n, label, hdlr) \ 164#define CRITICAL_EXCEPTION(n, label, hdlr) \
210 START_EXCEPTION(n, label); \ 165 START_EXCEPTION(n, label); \
211 CRITICAL_EXCEPTION_PROLOG; \ 166 CRITICAL_EXCEPTION_PROLOG; \
212 addi r3,r1,STACK_FRAME_OVERHEAD; \ 167 addi r3,r1,STACK_FRAME_OVERHEAD; \
213 EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ 168 EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
214 NOCOPY, crit_transfer_to_handler, \ 169 crit_transfer_to_handler, ret_from_crit_exc)
215 ret_from_crit_exc)
216
217#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
218 li r10,trap; \
219 stw r10,_TRAP(r11); \
220 lis r10,msr@h; \
221 ori r10,r10,msr@l; \
222 copyee(r10, r9); \
223 bl tfer; \
224 .long hdlr; \
225 .long ret
226
227#define COPY_EE(d, s) rlwimi d,s,0,16,16
228#define NOCOPY(d, s)
229
230#define EXC_XFER_STD(n, hdlr) \
231 EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
232 ret_from_except_full)
233
234#define EXC_XFER_LITE(n, hdlr) \
235 EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
236 ret_from_except)
237
238#define EXC_XFER_EE(n, hdlr) \
239 EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
240 ret_from_except_full)
241
242#define EXC_XFER_EE_LITE(n, hdlr) \
243 EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
244 ret_from_except)
245
246 170
247/* 171/*
248 * 0x0100 - Critical Interrupt Exception 172 * 0x0100 - Critical Interrupt Exception
@@ -393,7 +317,7 @@ label:
393 * This is caused by a fetch from non-execute or guarded pages. 317 * This is caused by a fetch from non-execute or guarded pages.
394 */ 318 */
395 START_EXCEPTION(0x0400, InstructionAccess) 319 START_EXCEPTION(0x0400, InstructionAccess)
396 NORMAL_EXCEPTION_PROLOG 320 EXCEPTION_PROLOG
397 mr r4,r12 /* Pass SRR0 as arg2 */ 321 mr r4,r12 /* Pass SRR0 as arg2 */
398 li r5,0 /* Pass zero as arg3 */ 322 li r5,0 /* Pass zero as arg3 */
399 EXC_XFER_LITE(0x400, handle_page_fault) 323 EXC_XFER_LITE(0x400, handle_page_fault)
@@ -403,33 +327,32 @@ label:
403 327
404/* 0x0600 - Alignment Exception */ 328/* 0x0600 - Alignment Exception */
405 START_EXCEPTION(0x0600, Alignment) 329 START_EXCEPTION(0x0600, Alignment)
406 NORMAL_EXCEPTION_PROLOG 330 EXCEPTION_PROLOG
407 mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ 331 mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
408 stw r4,_DEAR(r11) 332 stw r4,_DEAR(r11)
409 addi r3,r1,STACK_FRAME_OVERHEAD 333 addi r3,r1,STACK_FRAME_OVERHEAD
410 EXC_XFER_EE(0x600, alignment_exception) 334 EXC_XFER_STD(0x600, alignment_exception)
411 335
412/* 0x0700 - Program Exception */ 336/* 0x0700 - Program Exception */
413 START_EXCEPTION(0x0700, ProgramCheck) 337 START_EXCEPTION(0x0700, ProgramCheck)
414 NORMAL_EXCEPTION_PROLOG 338 EXCEPTION_PROLOG
415 mfspr r4,SPRN_ESR /* Grab the ESR and save it */ 339 mfspr r4,SPRN_ESR /* Grab the ESR and save it */
416 stw r4,_ESR(r11) 340 stw r4,_ESR(r11)
417 addi r3,r1,STACK_FRAME_OVERHEAD 341 addi r3,r1,STACK_FRAME_OVERHEAD
418 EXC_XFER_STD(0x700, program_check_exception) 342 EXC_XFER_STD(0x700, program_check_exception)
419 343
420 EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) 344 EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
421 EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) 345 EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
422 EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) 346 EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
423 EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) 347 EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
424 348
425/* 0x0C00 - System Call Exception */ 349/* 0x0C00 - System Call Exception */
426 START_EXCEPTION(0x0C00, SystemCall) 350 START_EXCEPTION(0x0C00, SystemCall)
427 NORMAL_EXCEPTION_PROLOG 351 SYSCALL_ENTRY 0xc00
428 EXC_XFER_EE_LITE(0xc00, DoSyscall)
429 352
430 EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) 353 EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
431 EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) 354 EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
432 EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) 355 EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
433 356
434/* 0x1000 - Programmable Interval Timer (PIT) Exception */ 357/* 0x1000 - Programmable Interval Timer (PIT) Exception */
435 . = 0x1000 358 . = 0x1000
@@ -646,25 +569,25 @@ label:
646 mfspr r10, SPRN_SPRG_SCRATCH0 569 mfspr r10, SPRN_SPRG_SCRATCH0
647 b InstructionAccess 570 b InstructionAccess
648 571
649 EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) 572 EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
650 EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) 573 EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
651 EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) 574 EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
652 EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) 575 EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
653#ifdef CONFIG_IBM405_ERR51 576#ifdef CONFIG_IBM405_ERR51
654 /* 405GP errata 51 */ 577 /* 405GP errata 51 */
655 START_EXCEPTION(0x1700, Trap_17) 578 START_EXCEPTION(0x1700, Trap_17)
656 b DTLBMiss 579 b DTLBMiss
657#else 580#else
658 EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) 581 EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
659#endif 582#endif
660 EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) 583 EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
661 EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) 584 EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
662 EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) 585 EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
663 EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) 586 EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
664 EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) 587 EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
665 EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) 588 EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
666 EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) 589 EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
667 EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) 590 EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
668 591
669/* Check for a single step debug exception while in an exception 592/* Check for a single step debug exception while in an exception
670 * handler before state has been saved. This is to catch the case 593 * handler before state has been saved. This is to catch the case
@@ -726,11 +649,11 @@ label:
726 addi r3,r1,STACK_FRAME_OVERHEAD 649 addi r3,r1,STACK_FRAME_OVERHEAD
727 EXC_XFER_TEMPLATE(DebugException, 0x2002, \ 650 EXC_XFER_TEMPLATE(DebugException, 0x2002, \
728 (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ 651 (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
729 NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) 652 crit_transfer_to_handler, ret_from_crit_exc)
730 653
731 /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ 654 /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
732Decrementer: 655Decrementer:
733 NORMAL_EXCEPTION_PROLOG 656 EXCEPTION_PROLOG
734 lis r0,TSR_PIS@h 657 lis r0,TSR_PIS@h
735 mtspr SPRN_TSR,r0 /* Clear the PIT exception */ 658 mtspr SPRN_TSR,r0 /* Clear the PIT exception */
736 addi r3,r1,STACK_FRAME_OVERHEAD 659 addi r3,r1,STACK_FRAME_OVERHEAD
@@ -738,9 +661,9 @@ Decrementer:
738 661
739 /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ 662 /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
740FITException: 663FITException:
741 NORMAL_EXCEPTION_PROLOG 664 EXCEPTION_PROLOG
742 addi r3,r1,STACK_FRAME_OVERHEAD; 665 addi r3,r1,STACK_FRAME_OVERHEAD;
743 EXC_XFER_EE(0x1010, unknown_exception) 666 EXC_XFER_STD(0x1010, unknown_exception)
744 667
745 /* Watchdog Timer (WDT) Exception. (from 0x1020) */ 668 /* Watchdog Timer (WDT) Exception. (from 0x1020) */
746WDTException: 669WDTException:
@@ -748,15 +671,14 @@ WDTException:
748 addi r3,r1,STACK_FRAME_OVERHEAD; 671 addi r3,r1,STACK_FRAME_OVERHEAD;
749 EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, 672 EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
750 (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), 673 (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
751 NOCOPY, crit_transfer_to_handler, 674 crit_transfer_to_handler, ret_from_crit_exc)
752 ret_from_crit_exc)
753 675
754/* 676/*
755 * The other Data TLB exceptions bail out to this point 677 * The other Data TLB exceptions bail out to this point
756 * if they can't resolve the lightweight TLB fault. 678 * if they can't resolve the lightweight TLB fault.
757 */ 679 */
758DataAccess: 680DataAccess:
759 NORMAL_EXCEPTION_PROLOG 681 EXCEPTION_PROLOG
760 mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ 682 mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
761 stw r5,_ESR(r11) 683 stw r5,_ESR(r11)
762 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ 684 mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
@@ -848,6 +770,9 @@ start_here:
848/* 770/*
849 * Decide what sort of machine this is and initialize the MMU. 771 * Decide what sort of machine this is and initialize the MMU.
850 */ 772 */
773#ifdef CONFIG_KASAN
774 bl kasan_early_init
775#endif
851 li r3,0 776 li r3,0
852 mr r4,r31 777 mr r4,r31
853 bl machine_init 778 bl machine_init
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 37117ab11584..f15fba58c744 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -203,6 +203,9 @@ _ENTRY(_start);
203/* 203/*
204 * Decide what sort of machine this is and initialize the MMU. 204 * Decide what sort of machine this is and initialize the MMU.
205 */ 205 */
206#ifdef CONFIG_KASAN
207 bl kasan_early_init
208#endif
206 li r3,0 209 li r3,0
207 mr r4,r31 210 mr r4,r31
208 bl machine_init 211 bl machine_init
@@ -278,16 +281,15 @@ interrupt_base:
278 FP_UNAVAILABLE_EXCEPTION 281 FP_UNAVAILABLE_EXCEPTION
279#else 282#else
280 EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ 283 EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
281 FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) 284 FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
282#endif 285#endif
283 /* System Call Interrupt */ 286 /* System Call Interrupt */
284 START_EXCEPTION(SystemCall) 287 START_EXCEPTION(SystemCall)
285 NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) 288 SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL
286 EXC_XFER_EE_LITE(0x0c00, DoSyscall)
287 289
288 /* Auxiliary Processor Unavailable Interrupt */ 290 /* Auxiliary Processor Unavailable Interrupt */
289 EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ 291 EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
290 AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) 292 AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
291 293
292 /* Decrementer Interrupt */ 294 /* Decrementer Interrupt */
293 DECREMENTER_EXCEPTION 295 DECREMENTER_EXCEPTION
@@ -295,7 +297,7 @@ interrupt_base:
295 /* Fixed Internal Timer Interrupt */ 297 /* Fixed Internal Timer Interrupt */
296 /* TODO: Add FIT support */ 298 /* TODO: Add FIT support */
297 EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ 299 EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
298 unknown_exception, EXC_XFER_EE) 300 unknown_exception, EXC_XFER_STD)
299 301
300 /* Watchdog Timer Interrupt */ 302 /* Watchdog Timer Interrupt */
301 /* TODO: Add watchdog support */ 303 /* TODO: Add watchdog support */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3fad8d499767..5321a11c2835 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -968,7 +968,9 @@ start_here_multiplatform:
968 968
969 /* Restore parameters passed from prom_init/kexec */ 969 /* Restore parameters passed from prom_init/kexec */
970 mr r3,r31 970 mr r3,r31
971 bl early_setup /* also sets r13 and SPRG_PACA */ 971 LOAD_REG_ADDR(r12, DOTSYM(early_setup))
972 mtctr r12
973 bctrl /* also sets r13 and SPRG_PACA */
972 974
973 LOAD_REG_ADDR(r3, start_here_common) 975 LOAD_REG_ADDR(r3, start_here_common)
974 ld r4,PACAKMSR(r13) 976 ld r4,PACAKMSR(r13)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 03c73b4c6435..885be7f3d29a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -33,6 +33,8 @@
33#include <asm/export.h> 33#include <asm/export.h>
34#include <asm/code-patching-asm.h> 34#include <asm/code-patching-asm.h>
35 35
36#include "head_32.h"
37
36#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 38#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
37/* By simply checking Address >= 0x80000000, we know if its a kernel address */ 39/* By simply checking Address >= 0x80000000, we know if its a kernel address */
38#define SIMPLE_KERNEL_ADDRESS 1 40#define SIMPLE_KERNEL_ADDRESS 1
@@ -123,102 +125,6 @@ instruction_counter:
123 .space 4 125 .space 4
124#endif 126#endif
125 127
126/*
127 * Exception entry code. This code runs with address translation
128 * turned off, i.e. using physical addresses.
129 * We assume sprg3 has the physical address of the current
130 * task's thread_struct.
131 */
132#define EXCEPTION_PROLOG \
133 mtspr SPRN_SPRG_SCRATCH0, r10; \
134 mtspr SPRN_SPRG_SCRATCH1, r11; \
135 mfcr r10; \
136 EXCEPTION_PROLOG_1; \
137 EXCEPTION_PROLOG_2
138
139#define EXCEPTION_PROLOG_1 \
140 mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
141 andi. r11,r11,MSR_PR; \
142 tophys(r11,r1); /* use tophys(r1) if kernel */ \
143 beq 1f; \
144 mfspr r11,SPRN_SPRG_THREAD; \
145 lwz r11,TASK_STACK-THREAD(r11); \
146 addi r11,r11,THREAD_SIZE; \
147 tophys(r11,r11); \
1481: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
149
150
151#define EXCEPTION_PROLOG_2 \
152 stw r10,_CCR(r11); /* save registers */ \
153 stw r12,GPR12(r11); \
154 stw r9,GPR9(r11); \
155 mfspr r10,SPRN_SPRG_SCRATCH0; \
156 stw r10,GPR10(r11); \
157 mfspr r12,SPRN_SPRG_SCRATCH1; \
158 stw r12,GPR11(r11); \
159 mflr r10; \
160 stw r10,_LINK(r11); \
161 mfspr r12,SPRN_SRR0; \
162 mfspr r9,SPRN_SRR1; \
163 stw r1,GPR1(r11); \
164 stw r1,0(r11); \
165 tovirt(r1,r11); /* set new kernel sp */ \
166 li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
167 mtmsr r10; \
168 stw r0,GPR0(r11); \
169 lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
170 addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
171 stw r10, 8(r11); \
172 SAVE_4GPRS(3, r11); \
173 SAVE_2GPRS(7, r11)
174
175/*
176 * Note: code which follows this uses cr0.eq (set if from kernel),
177 * r11, r12 (SRR0), and r9 (SRR1).
178 *
179 * Note2: once we have set r1 we are in a position to take exceptions
180 * again, and we could thus set MSR:RI at that point.
181 */
182
183/*
184 * Exception vectors.
185 */
186#define EXCEPTION(n, label, hdlr, xfer) \
187 . = n; \
188label: \
189 EXCEPTION_PROLOG; \
190 addi r3,r1,STACK_FRAME_OVERHEAD; \
191 xfer(n, hdlr)
192
193#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
194 li r10,trap; \
195 stw r10,_TRAP(r11); \
196 li r10,MSR_KERNEL; \
197 copyee(r10, r9); \
198 bl tfer; \
199i##n: \
200 .long hdlr; \
201 .long ret
202
203#define COPY_EE(d, s) rlwimi d,s,0,16,16
204#define NOCOPY(d, s)
205
206#define EXC_XFER_STD(n, hdlr) \
207 EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
208 ret_from_except_full)
209
210#define EXC_XFER_LITE(n, hdlr) \
211 EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
212 ret_from_except)
213
214#define EXC_XFER_EE(n, hdlr) \
215 EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
216 ret_from_except_full)
217
218#define EXC_XFER_EE_LITE(n, hdlr) \
219 EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
220 ret_from_except)
221
222/* System reset */ 128/* System reset */
223 EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) 129 EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
224 130
@@ -261,7 +167,7 @@ Alignment:
261 mfspr r5,SPRN_DSISR 167 mfspr r5,SPRN_DSISR
262 stw r5,_DSISR(r11) 168 stw r5,_DSISR(r11)
263 addi r3,r1,STACK_FRAME_OVERHEAD 169 addi r3,r1,STACK_FRAME_OVERHEAD
264 EXC_XFER_EE(0x600, alignment_exception) 170 EXC_XFER_STD(0x600, alignment_exception)
265 171
266/* Program check exception */ 172/* Program check exception */
267 EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) 173 EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -273,19 +179,18 @@ Alignment:
273/* Decrementer */ 179/* Decrementer */
274 EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) 180 EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
275 181
276 EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) 182 EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
277 EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) 183 EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
278 184
279/* System call */ 185/* System call */
280 . = 0xc00 186 . = 0xc00
281SystemCall: 187SystemCall:
282 EXCEPTION_PROLOG 188 SYSCALL_ENTRY 0xc00
283 EXC_XFER_EE_LITE(0xc00, DoSyscall)
284 189
285/* Single step - not used on 601 */ 190/* Single step - not used on 601 */
286 EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) 191 EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
287 EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) 192 EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
288 EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) 193 EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
289 194
290/* On the MPC8xx, this is a software emulation interrupt. It occurs 195/* On the MPC8xx, this is a software emulation interrupt. It occurs
291 * for all unimplemented and illegal instructions. 196 * for all unimplemented and illegal instructions.
@@ -615,13 +520,13 @@ DARFixed:/* Return from dcbx instruction bug workaround */
615 /* 0x300 is DataAccess exception, needed by bad_page_fault() */ 520 /* 0x300 is DataAccess exception, needed by bad_page_fault() */
616 EXC_XFER_LITE(0x300, handle_page_fault) 521 EXC_XFER_LITE(0x300, handle_page_fault)
617 522
618 EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) 523 EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
619 EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) 524 EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
620 EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) 525 EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
621 EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) 526 EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
622 EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) 527 EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
623 EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) 528 EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
624 EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) 529 EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
625 530
626/* On the MPC8xx, these next four traps are used for development 531/* On the MPC8xx, these next four traps are used for development
627 * support of breakpoints and such. Someday I will get around to 532 * support of breakpoints and such. Someday I will get around to
@@ -643,7 +548,7 @@ DataBreakpoint:
643 mfspr r4,SPRN_BAR 548 mfspr r4,SPRN_BAR
644 stw r4,_DAR(r11) 549 stw r4,_DAR(r11)
645 mfspr r5,SPRN_DSISR 550 mfspr r5,SPRN_DSISR
646 EXC_XFER_EE(0x1c00, do_break) 551 EXC_XFER_STD(0x1c00, do_break)
64711: 55211:
648 mtcr r10 553 mtcr r10
649 mfspr r10, SPRN_SPRG_SCRATCH0 554 mfspr r10, SPRN_SPRG_SCRATCH0
@@ -663,10 +568,10 @@ InstructionBreakpoint:
663 mfspr r10, SPRN_SPRG_SCRATCH0 568 mfspr r10, SPRN_SPRG_SCRATCH0
664 rfi 569 rfi
665#else 570#else
666 EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) 571 EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
667#endif 572#endif
668 EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) 573 EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
669 EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) 574 EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
670 575
671 . = 0x2000 576 . = 0x2000
672 577
@@ -853,6 +758,9 @@ start_here:
853/* 758/*
854 * Decide what sort of machine this is and initialize the MMU. 759 * Decide what sort of machine this is and initialize the MMU.
855 */ 760 */
761#ifdef CONFIG_KASAN
762 bl kasan_early_init
763#endif
856 li r3,0 764 li r3,0
857 mr r4,r31 765 mr r4,r31
858 bl machine_init 766 bl machine_init
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 1b22a8dea399..bfeb469e8106 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -6,6 +6,8 @@
6#include <asm/kvm_asm.h> 6#include <asm/kvm_asm.h>
7#include <asm/kvm_booke_hv_asm.h> 7#include <asm/kvm_booke_hv_asm.h>
8 8
9#ifdef __ASSEMBLY__
10
9/* 11/*
10 * Macros used for common Book-e exception handling 12 * Macros used for common Book-e exception handling
11 */ 13 */
@@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION
81 SAVE_4GPRS(3, r11); \ 83 SAVE_4GPRS(3, r11); \
82 SAVE_2GPRS(7, r11) 84 SAVE_2GPRS(7, r11)
83 85
86.macro SYSCALL_ENTRY trapno intno
87 mfspr r10, SPRN_SPRG_THREAD
88#ifdef CONFIG_KVM_BOOKE_HV
89BEGIN_FTR_SECTION
90 mtspr SPRN_SPRG_WSCRATCH0, r10
91 stw r11, THREAD_NORMSAVE(0)(r10)
92 stw r13, THREAD_NORMSAVE(2)(r10)
93 mfcr r13 /* save CR in r13 for now */
94 mfspr r11, SPRN_SRR1
95 mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
96 bf 3, 1975f
97 b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1
981975:
99 mr r12, r13
100 lwz r13, THREAD_NORMSAVE(2)(r10)
101FTR_SECTION_ELSE
102#endif
103 mfcr r12
104#ifdef CONFIG_KVM_BOOKE_HV
105ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
106#endif
107 BOOKE_CLEAR_BTB(r11)
108 lwz r11, TASK_STACK - THREAD(r10)
109 rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
110 ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
111 stw r12, _CCR(r11) /* save various registers */
112 mflr r12
113 stw r12,_LINK(r11)
114 mfspr r12,SPRN_SRR0
115 stw r1, GPR1(r11)
116 mfspr r9,SPRN_SRR1
117 stw r1, 0(r11)
118 mr r1, r11
119 stw r12,_NIP(r11)
120 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
121 lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
122 stw r2,GPR2(r11)
123 addi r12, r12, STACK_FRAME_REGS_MARKER@l
124 stw r9,_MSR(r11)
125 li r2, \trapno + 1
126 stw r12, 8(r11)
127 stw r2,_TRAP(r11)
128 SAVE_GPR(0, r11)
129 SAVE_4GPRS(3, r11)
130 SAVE_2GPRS(7, r11)
131
132 addi r11,r1,STACK_FRAME_OVERHEAD
133 addi r2,r10,-THREAD
134 stw r11,PT_REGS(r10)
135 /* Check to see if the dbcr0 register is set up to debug. Use the
136 internal debug mode bit to do this. */
137 lwz r12,THREAD_DBCR0(r10)
138 andis. r12,r12,DBCR0_IDM@h
139 ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
140 beq+ 3f
141 /* From user and task is ptraced - load up global dbcr0 */
142 li r12,-1 /* clear all pending debug events */
143 mtspr SPRN_DBSR,r12
144 lis r11,global_dbcr0@ha
145 tophys(r11,r11)
146 addi r11,r11,global_dbcr0@l
147#ifdef CONFIG_SMP
148 lwz r9,TASK_CPU(r2)
149 slwi r9,r9,3
150 add r11,r11,r9
151#endif
152 lwz r12,0(r11)
153 mtspr SPRN_DBCR0,r12
154 lwz r12,4(r11)
155 addi r12,r12,-1
156 stw r12,4(r11)
157
1583:
159 tovirt(r2, r2) /* set r2 to current */
160 lis r11, transfer_to_syscall@h
161 ori r11, r11, transfer_to_syscall@l
162#ifdef CONFIG_TRACE_IRQFLAGS
163 /*
164 * If MSR is changing we need to keep interrupts disabled at this point
165 * otherwise we might risk taking an interrupt before we tell lockdep
166 * they are enabled.
167 */
168 lis r10, MSR_KERNEL@h
169 ori r10, r10, MSR_KERNEL@l
170 rlwimi r10, r9, 0, MSR_EE
171#else
172 lis r10, (MSR_KERNEL | MSR_EE)@h
173 ori r10, r10, (MSR_KERNEL | MSR_EE)@l
174#endif
175 mtspr SPRN_SRR1,r10
176 mtspr SPRN_SRR0,r11
177 SYNC
178 RFI /* jump to handler, enable MMU */
179.endm
180
84/* To handle the additional exception priority levels on 40x and Book-E 181/* To handle the additional exception priority levels on 40x and Book-E
85 * processors we allocate a stack per additional priority level. 182 * processors we allocate a stack per additional priority level.
86 * 183 *
@@ -217,8 +314,7 @@ label:
217 CRITICAL_EXCEPTION_PROLOG(intno); \ 314 CRITICAL_EXCEPTION_PROLOG(intno); \
218 addi r3,r1,STACK_FRAME_OVERHEAD; \ 315 addi r3,r1,STACK_FRAME_OVERHEAD; \
219 EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ 316 EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
220 NOCOPY, crit_transfer_to_handler, \ 317 crit_transfer_to_handler, ret_from_crit_exc)
221 ret_from_crit_exc)
222 318
223#define MCHECK_EXCEPTION(n, label, hdlr) \ 319#define MCHECK_EXCEPTION(n, label, hdlr) \
224 START_EXCEPTION(label); \ 320 START_EXCEPTION(label); \
@@ -227,36 +323,23 @@ label:
227 stw r5,_ESR(r11); \ 323 stw r5,_ESR(r11); \
228 addi r3,r1,STACK_FRAME_OVERHEAD; \ 324 addi r3,r1,STACK_FRAME_OVERHEAD; \
229 EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ 325 EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
230 NOCOPY, mcheck_transfer_to_handler, \ 326 mcheck_transfer_to_handler, ret_from_mcheck_exc)
231 ret_from_mcheck_exc)
232 327
233#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ 328#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
234 li r10,trap; \ 329 li r10,trap; \
235 stw r10,_TRAP(r11); \ 330 stw r10,_TRAP(r11); \
236 lis r10,msr@h; \ 331 lis r10,msr@h; \
237 ori r10,r10,msr@l; \ 332 ori r10,r10,msr@l; \
238 copyee(r10, r9); \
239 bl tfer; \ 333 bl tfer; \
240 .long hdlr; \ 334 .long hdlr; \
241 .long ret 335 .long ret
242 336
243#define COPY_EE(d, s) rlwimi d,s,0,16,16
244#define NOCOPY(d, s)
245
246#define EXC_XFER_STD(n, hdlr) \ 337#define EXC_XFER_STD(n, hdlr) \
247 EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ 338 EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
248 ret_from_except_full) 339 ret_from_except_full)
249 340
250#define EXC_XFER_LITE(n, hdlr) \ 341#define EXC_XFER_LITE(n, hdlr) \
251 EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ 342 EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
252 ret_from_except)
253
254#define EXC_XFER_EE(n, hdlr) \
255 EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
256 ret_from_except_full)
257
258#define EXC_XFER_EE_LITE(n, hdlr) \
259 EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
260 ret_from_except) 343 ret_from_except)
261 344
262/* Check for a single step debug exception while in an exception 345/* Check for a single step debug exception while in an exception
@@ -323,7 +406,7 @@ label:
323 /* continue normal handling for a debug exception... */ \ 406 /* continue normal handling for a debug exception... */ \
3242: mfspr r4,SPRN_DBSR; \ 4072: mfspr r4,SPRN_DBSR; \
325 addi r3,r1,STACK_FRAME_OVERHEAD; \ 408 addi r3,r1,STACK_FRAME_OVERHEAD; \
326 EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) 409 EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
327 410
328#define DEBUG_CRIT_EXCEPTION \ 411#define DEBUG_CRIT_EXCEPTION \
329 START_EXCEPTION(DebugCrit); \ 412 START_EXCEPTION(DebugCrit); \
@@ -376,7 +459,7 @@ label:
376 /* continue normal handling for a critical exception... */ \ 459 /* continue normal handling for a critical exception... */ \
3772: mfspr r4,SPRN_DBSR; \ 4602: mfspr r4,SPRN_DBSR; \
378 addi r3,r1,STACK_FRAME_OVERHEAD; \ 461 addi r3,r1,STACK_FRAME_OVERHEAD; \
379 EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) 462 EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
380 463
381#define DATA_STORAGE_EXCEPTION \ 464#define DATA_STORAGE_EXCEPTION \
382 START_EXCEPTION(DataStorage) \ 465 START_EXCEPTION(DataStorage) \
@@ -401,7 +484,7 @@ label:
401 mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ 484 mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
402 stw r4,_DEAR(r11); \ 485 stw r4,_DEAR(r11); \
403 addi r3,r1,STACK_FRAME_OVERHEAD; \ 486 addi r3,r1,STACK_FRAME_OVERHEAD; \
404 EXC_XFER_EE(0x0600, alignment_exception) 487 EXC_XFER_STD(0x0600, alignment_exception)
405 488
406#define PROGRAM_EXCEPTION \ 489#define PROGRAM_EXCEPTION \
407 START_EXCEPTION(Program) \ 490 START_EXCEPTION(Program) \
@@ -426,9 +509,9 @@ label:
426 bl load_up_fpu; /* if from user, just load it up */ \ 509 bl load_up_fpu; /* if from user, just load it up */ \
427 b fast_exception_return; \ 510 b fast_exception_return; \
4281: addi r3,r1,STACK_FRAME_OVERHEAD; \ 5111: addi r3,r1,STACK_FRAME_OVERHEAD; \
429 EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) 512 EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
430 513
431#ifndef __ASSEMBLY__ 514#else /* __ASSEMBLY__ */
432struct exception_regs { 515struct exception_regs {
433 unsigned long mas0; 516 unsigned long mas0;
434 unsigned long mas1; 517 unsigned long mas1;
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 32332e24e421..6621f230cc37 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -268,6 +268,9 @@ set_ivor:
268/* 268/*
269 * Decide what sort of machine this is and initialize the MMU. 269 * Decide what sort of machine this is and initialize the MMU.
270 */ 270 */
271#ifdef CONFIG_KASAN
272 bl kasan_early_init
273#endif
271 mr r3,r30 274 mr r3,r30
272 mr r4,r31 275 mr r4,r31
273 bl machine_init 276 bl machine_init
@@ -380,7 +383,7 @@ interrupt_base:
380 EXC_XFER_LITE(0x0300, handle_page_fault) 383 EXC_XFER_LITE(0x0300, handle_page_fault)
3811: 3841:
382 addi r3,r1,STACK_FRAME_OVERHEAD 385 addi r3,r1,STACK_FRAME_OVERHEAD
383 EXC_XFER_EE_LITE(0x0300, CacheLockingException) 386 EXC_XFER_LITE(0x0300, CacheLockingException)
384 387
385 /* Instruction Storage Interrupt */ 388 /* Instruction Storage Interrupt */
386 INSTRUCTION_STORAGE_EXCEPTION 389 INSTRUCTION_STORAGE_EXCEPTION
@@ -401,21 +404,20 @@ interrupt_base:
401#ifdef CONFIG_E200 404#ifdef CONFIG_E200
402 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ 405 /* E200 treats 'normal' floating point instructions as FP Unavail exception */
403 EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ 406 EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
404 program_check_exception, EXC_XFER_EE) 407 program_check_exception, EXC_XFER_STD)
405#else 408#else
406 EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ 409 EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
407 unknown_exception, EXC_XFER_EE) 410 unknown_exception, EXC_XFER_STD)
408#endif 411#endif
409#endif 412#endif
410 413
411 /* System Call Interrupt */ 414 /* System Call Interrupt */
412 START_EXCEPTION(SystemCall) 415 START_EXCEPTION(SystemCall)
413 NORMAL_EXCEPTION_PROLOG(SYSCALL) 416 SYSCALL_ENTRY 0xc00 SYSCALL
414 EXC_XFER_EE_LITE(0x0c00, DoSyscall)
415 417
416 /* Auxiliary Processor Unavailable Interrupt */ 418 /* Auxiliary Processor Unavailable Interrupt */
417 EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ 419 EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
418 unknown_exception, EXC_XFER_EE) 420 unknown_exception, EXC_XFER_STD)
419 421
420 /* Decrementer Interrupt */ 422 /* Decrementer Interrupt */
421 DECREMENTER_EXCEPTION 423 DECREMENTER_EXCEPTION
@@ -423,7 +425,7 @@ interrupt_base:
423 /* Fixed Internal Timer Interrupt */ 425 /* Fixed Internal Timer Interrupt */
424 /* TODO: Add FIT support */ 426 /* TODO: Add FIT support */
425 EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ 427 EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
426 unknown_exception, EXC_XFER_EE) 428 unknown_exception, EXC_XFER_STD)
427 429
428 /* Watchdog Timer Interrupt */ 430 /* Watchdog Timer Interrupt */
429#ifdef CONFIG_BOOKE_WDT 431#ifdef CONFIG_BOOKE_WDT
@@ -633,25 +635,25 @@ END_BTB_FLUSH_SECTION
633 bl load_up_spe 635 bl load_up_spe
634 b fast_exception_return 636 b fast_exception_return
6351: addi r3,r1,STACK_FRAME_OVERHEAD 6371: addi r3,r1,STACK_FRAME_OVERHEAD
636 EXC_XFER_EE_LITE(0x2010, KernelSPE) 638 EXC_XFER_LITE(0x2010, KernelSPE)
637#elif defined(CONFIG_SPE_POSSIBLE) 639#elif defined(CONFIG_SPE_POSSIBLE)
638 EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ 640 EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
639 unknown_exception, EXC_XFER_EE) 641 unknown_exception, EXC_XFER_STD)
640#endif /* CONFIG_SPE_POSSIBLE */ 642#endif /* CONFIG_SPE_POSSIBLE */
641 643
642 /* SPE Floating Point Data */ 644 /* SPE Floating Point Data */
643#ifdef CONFIG_SPE 645#ifdef CONFIG_SPE
644 EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, 646 EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
645 SPEFloatingPointException, EXC_XFER_EE) 647 SPEFloatingPointException, EXC_XFER_STD)
646 648
647 /* SPE Floating Point Round */ 649 /* SPE Floating Point Round */
648 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ 650 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
649 SPEFloatingPointRoundException, EXC_XFER_EE) 651 SPEFloatingPointRoundException, EXC_XFER_STD)
650#elif defined(CONFIG_SPE_POSSIBLE) 652#elif defined(CONFIG_SPE_POSSIBLE)
651 EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, 653 EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
652 unknown_exception, EXC_XFER_EE) 654 unknown_exception, EXC_XFER_STD)
653 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ 655 EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
654 unknown_exception, EXC_XFER_EE) 656 unknown_exception, EXC_XFER_STD)
655#endif /* CONFIG_SPE_POSSIBLE */ 657#endif /* CONFIG_SPE_POSSIBLE */
656 658
657 659
@@ -674,10 +676,10 @@ END_BTB_FLUSH_SECTION
674 unknown_exception) 676 unknown_exception)
675 677
676 /* Hypercall */ 678 /* Hypercall */
677 EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE) 679 EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
678 680
679 /* Embedded Hypervisor Privilege */ 681 /* Embedded Hypervisor Privilege */
680 EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE) 682 EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
681 683
682interrupt_end: 684interrupt_end:
683 685
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index fec8a6773119..da307dd93ee3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,11 +29,15 @@
29#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/smp.h> 31#include <linux/smp.h>
32#include <linux/debugfs.h>
33#include <linux/init.h>
32 34
33#include <asm/hw_breakpoint.h> 35#include <asm/hw_breakpoint.h>
34#include <asm/processor.h> 36#include <asm/processor.h>
35#include <asm/sstep.h> 37#include <asm/sstep.h>
36#include <asm/debug.h> 38#include <asm/debug.h>
39#include <asm/debugfs.h>
40#include <asm/hvcall.h>
37#include <linux/uaccess.h> 41#include <linux/uaccess.h>
38 42
39/* 43/*
@@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
174 if (!ppc_breakpoint_available()) 178 if (!ppc_breakpoint_available())
175 return -ENODEV; 179 return -ENODEV;
176 length_max = 8; /* DABR */ 180 length_max = 8; /* DABR */
177 if (cpu_has_feature(CPU_FTR_DAWR)) { 181 if (dawr_enabled()) {
178 length_max = 512 ; /* 64 doublewords */ 182 length_max = 512 ; /* 64 doublewords */
179 /* DAWR region can't cross 512 boundary */ 183 /* DAWR region can't cross 512 boundary */
180 if ((attr->bp_addr >> 9) != 184 if ((attr->bp_addr >> 9) !=
@@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
376{ 380{
377 /* TODO */ 381 /* TODO */
378} 382}
383
384bool dawr_force_enable;
385EXPORT_SYMBOL_GPL(dawr_force_enable);
386
387static ssize_t dawr_write_file_bool(struct file *file,
388 const char __user *user_buf,
389 size_t count, loff_t *ppos)
390{
391 struct arch_hw_breakpoint null_brk = {0, 0, 0};
392 size_t rc;
393
394 /* Send error to user if they hypervisor won't allow us to write DAWR */
395 if ((!dawr_force_enable) &&
396 (firmware_has_feature(FW_FEATURE_LPAR)) &&
397 (set_dawr(&null_brk) != H_SUCCESS))
398 return -1;
399
400 rc = debugfs_write_file_bool(file, user_buf, count, ppos);
401 if (rc)
402 return rc;
403
404 /* If we are clearing, make sure all CPUs have the DAWR cleared */
405 if (!dawr_force_enable)
406 smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0);
407
408 return rc;
409}
410
411static const struct file_operations dawr_enable_fops = {
412 .read = debugfs_read_file_bool,
413 .write = dawr_write_file_bool,
414 .open = simple_open,
415 .llseek = default_llseek,
416};
417
418static int __init dawr_force_setup(void)
419{
420 dawr_force_enable = false;
421
422 if (cpu_has_feature(CPU_FTR_DAWR)) {
423 /* Don't setup sysfs file for user control on P8 */
424 dawr_force_enable = true;
425 return 0;
426 }
427
428 if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
429 /* Turn DAWR off by default, but allow admin to turn it on */
430 dawr_force_enable = false;
431 debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
432 powerpc_debugfs_root,
433 &dawr_force_enable,
434 &dawr_enable_fops);
435 }
436 return 0;
437}
438arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 7f5ac2e8581b..2dfbd5d5b932 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -1,956 +1,188 @@
1/* 1/*
2 * This file contains idle entry/exit functions for POWER7, 2 * Copyright 2018, IBM Corporation.
3 * POWER8 and POWER9 CPUs.
4 * 3 *
5 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 *
9 * This file contains general idle entry/exit functions to save
10 * and restore stack and NVGPRs which allows C code to call idle
11 * states that lose GPRs, and it will return transparently with
12 * SRR1 wakeup reason return value.
13 *
14 * The platform / CPU caller must ensure SPRs and any other non-GPR
15 * state is saved and restored correctly, handle KVM, interrupts, etc.
9 */ 16 */
10 17
11#include <linux/threads.h>
12#include <asm/processor.h>
13#include <asm/page.h>
14#include <asm/cputable.h>
15#include <asm/thread_info.h>
16#include <asm/ppc_asm.h> 18#include <asm/ppc_asm.h>
17#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
18#include <asm/ppc-opcode.h> 20#include <asm/ppc-opcode.h>
19#include <asm/hw_irq.h>
20#include <asm/kvm_book3s_asm.h>
21#include <asm/opal.h>
22#include <asm/cpuidle.h> 21#include <asm/cpuidle.h>
23#include <asm/exception-64s.h>
24#include <asm/book3s/64/mmu-hash.h>
25#include <asm/mmu.h>
26#include <asm/asm-compat.h>
27#include <asm/feature-fixups.h>
28
29#undef DEBUG
30
31/*
32 * Use unused space in the interrupt stack to save and restore
33 * registers for winkle support.
34 */
35#define _MMCR0 GPR0
36#define _SDR1 GPR3
37#define _PTCR GPR3
38#define _RPR GPR4
39#define _SPURR GPR5
40#define _PURR GPR6
41#define _TSCR GPR7
42#define _DSCR GPR8
43#define _AMOR GPR9
44#define _WORT GPR10
45#define _WORC GPR11
46#define _LPCR GPR12
47
48#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
49 22
50 .text
51
52/*
53 * Used by threads before entering deep idle states. Saves SPRs
54 * in interrupt stack frame
55 */
56save_sprs_to_stack:
57 /*
58 * Note all register i.e per-core, per-subcore or per-thread is saved
59 * here since any thread in the core might wake up first
60 */
61BEGIN_FTR_SECTION
62 /*
63 * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
64 * SDR1 here
65 */
66 mfspr r3,SPRN_PTCR
67 std r3,_PTCR(r1)
68 mfspr r3,SPRN_LPCR
69 std r3,_LPCR(r1)
70FTR_SECTION_ELSE
71 mfspr r3,SPRN_SDR1
72 std r3,_SDR1(r1)
73ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
74 mfspr r3,SPRN_RPR
75 std r3,_RPR(r1)
76 mfspr r3,SPRN_SPURR
77 std r3,_SPURR(r1)
78 mfspr r3,SPRN_PURR
79 std r3,_PURR(r1)
80 mfspr r3,SPRN_TSCR
81 std r3,_TSCR(r1)
82 mfspr r3,SPRN_DSCR
83 std r3,_DSCR(r1)
84 mfspr r3,SPRN_AMOR
85 std r3,_AMOR(r1)
86 mfspr r3,SPRN_WORT
87 std r3,_WORT(r1)
88 mfspr r3,SPRN_WORC
89 std r3,_WORC(r1)
90/* 23/*
91 * On POWER9, there are idle states such as stop4, invoked via cpuidle, 24 * Desired PSSCR in r3
92 * that lose hypervisor resources. In such cases, we need to save
93 * additional SPRs before entering those idle states so that they can
94 * be restored to their older values on wakeup from the idle state.
95 * 25 *
96 * On POWER8, the only such deep idle state is winkle which is used 26 * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
97 * only in the context of CPU-Hotplug, where these additional SPRs are 27 *
98 * reinitiazed to a sane value. Hence there is no need to save/restore 28 * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
99 * these SPRs. 29 * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
30 * and must blr, to return to caller with r3 set according to caller's expected
31 * return code (for Book3S/64 that is SRR1).
100 */ 32 */
101BEGIN_FTR_SECTION 33_GLOBAL(isa300_idle_stop_noloss)
102 blr 34 mtspr SPRN_PSSCR,r3
103END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 35 PPC_STOP
104 36 li r3,0
105power9_save_additional_sprs:
106 mfspr r3, SPRN_PID
107 mfspr r4, SPRN_LDBAR
108 std r3, STOP_PID(r13)
109 std r4, STOP_LDBAR(r13)
110
111 mfspr r3, SPRN_FSCR
112 mfspr r4, SPRN_HFSCR
113 std r3, STOP_FSCR(r13)
114 std r4, STOP_HFSCR(r13)
115
116 mfspr r3, SPRN_MMCRA
117 mfspr r4, SPRN_MMCR0
118 std r3, STOP_MMCRA(r13)
119 std r4, _MMCR0(r1)
120
121 mfspr r3, SPRN_MMCR1
122 mfspr r4, SPRN_MMCR2
123 std r3, STOP_MMCR1(r13)
124 std r4, STOP_MMCR2(r13)
125 blr
126
127power9_restore_additional_sprs:
128 ld r3,_LPCR(r1)
129 ld r4, STOP_PID(r13)
130 mtspr SPRN_LPCR,r3
131 mtspr SPRN_PID, r4
132
133 ld r3, STOP_LDBAR(r13)
134 ld r4, STOP_FSCR(r13)
135 mtspr SPRN_LDBAR, r3
136 mtspr SPRN_FSCR, r4
137
138 ld r3, STOP_HFSCR(r13)
139 ld r4, STOP_MMCRA(r13)
140 mtspr SPRN_HFSCR, r3
141 mtspr SPRN_MMCRA, r4
142
143 ld r3, _MMCR0(r1)
144 ld r4, STOP_MMCR1(r13)
145 mtspr SPRN_MMCR0, r3
146 mtspr SPRN_MMCR1, r4
147
148 ld r3, STOP_MMCR2(r13)
149 ld r4, PACA_SPRG_VDSO(r13)
150 mtspr SPRN_MMCR2, r3
151 mtspr SPRN_SPRG3, r4
152 blr 37 blr
153 38
154/* 39/*
155 * Used by threads when the lock bit of core_idle_state is set. 40 * Desired PSSCR in r3
156 * Threads will spin in HMT_LOW until the lock bit is cleared. 41 *
157 * r14 - pointer to core_idle_state 42 * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
158 * r15 - used to load contents of core_idle_state 43 * The SRESET wakeup returns to this function's caller by calling
159 * r9 - used as a temporary variable 44 * idle_return_gpr_loss with r3 set to desired return value.
45 *
46 * A wakeup without GPR loss may alteratively be handled as in
47 * isa300_idle_stop_noloss and blr directly, as an optimisation.
48 *
49 * The caller is responsible for saving/restoring SPRs, MSR, timebase,
50 * etc.
160 */ 51 */
161 52_GLOBAL(isa300_idle_stop_mayloss)
162core_idle_lock_held: 53 mtspr SPRN_PSSCR,r3
163 HMT_LOW 54 std r1,PACAR1(r13)
1643: lwz r15,0(r14) 55 mflr r4
165 andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h 56 mfcr r5
166 bne 3b 57 /* use stack red zone rather than a new frame for saving regs */
167 HMT_MEDIUM 58 std r2,-8*0(r1)
168 lwarx r15,0,r14 59 std r14,-8*1(r1)
169 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h 60 std r15,-8*2(r1)
170 bne- core_idle_lock_held 61 std r16,-8*3(r1)
171 blr 62 std r17,-8*4(r1)
63 std r18,-8*5(r1)
64 std r19,-8*6(r1)
65 std r20,-8*7(r1)
66 std r21,-8*8(r1)
67 std r22,-8*9(r1)
68 std r23,-8*10(r1)
69 std r24,-8*11(r1)
70 std r25,-8*12(r1)
71 std r26,-8*13(r1)
72 std r27,-8*14(r1)
73 std r28,-8*15(r1)
74 std r29,-8*16(r1)
75 std r30,-8*17(r1)
76 std r31,-8*18(r1)
77 std r4,-8*19(r1)
78 std r5,-8*20(r1)
79 /* 168 bytes */
80 PPC_STOP
81 b . /* catch bugs */
172 82
173/* 83/*
174 * Pass requested state in r3: 84 * Desired return value in r3
175 * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 85 *
176 * - Requested PSSCR value in POWER9 86 * The idle wakeup SRESET interrupt can call this after calling
87 * to return to the idle sleep function caller with r3 as the return code.
177 * 88 *
178 * Address of idle handler to branch to in realmode in r4 89 * This must not be used if idle was entered via a _noloss function (use
90 * a simple blr instead).
179 */ 91 */
180pnv_powersave_common: 92_GLOBAL(idle_return_gpr_loss)
181 /* Use r3 to pass state nap/sleep/winkle */ 93 ld r1,PACAR1(r13)
182 /* NAP is a state loss, we create a regs frame on the 94 ld r4,-8*19(r1)
183 * stack, fill it up with the state we care about and 95 ld r5,-8*20(r1)
184 * stick a pointer to it in PACAR1. We really only 96 mtlr r4
185 * need to save PC, some CR bits and the NV GPRs, 97 mtcr r5
186 * but for now an interrupt frame will do. 98 /*
187 */ 99 * KVM nap requires r2 to be saved, rather than just restoring it
188 mtctr r4 100 * from PACATOC. This could be avoided for that less common case
189 101 * if KVM saved its r2.
190 mflr r0 102 */
191 std r0,16(r1) 103 ld r2,-8*0(r1)
192 stdu r1,-INT_FRAME_SIZE(r1) 104 ld r14,-8*1(r1)
193 std r0,_LINK(r1) 105 ld r15,-8*2(r1)
194 std r0,_NIP(r1) 106 ld r16,-8*3(r1)
195 107 ld r17,-8*4(r1)
196 /* We haven't lost state ... yet */ 108 ld r18,-8*5(r1)
197 li r0,0 109 ld r19,-8*6(r1)
198 stb r0,PACA_NAPSTATELOST(r13) 110 ld r20,-8*7(r1)
199 111 ld r21,-8*8(r1)
200 /* Continue saving state */ 112 ld r22,-8*9(r1)
201 SAVE_GPR(2, r1) 113 ld r23,-8*10(r1)
202 SAVE_NVGPRS(r1) 114 ld r24,-8*11(r1)
203 mfcr r5 115 ld r25,-8*12(r1)
204 std r5,_CCR(r1) 116 ld r26,-8*13(r1)
205 std r1,PACAR1(r13) 117 ld r27,-8*14(r1)
206 118 ld r28,-8*15(r1)
207BEGIN_FTR_SECTION 119 ld r29,-8*16(r1)
208 /* 120 ld r30,-8*17(r1)
209 * POWER9 does not require real mode to stop, and presently does not 121 ld r31,-8*18(r1)
210 * set hwthread_state for KVM (threads don't share MMU context), so 122 blr
211 * we can remain in virtual mode for this.
212 */
213 bctr
214END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
215 /*
216 * POWER8
217 * Go to real mode to do the nap, as required by the architecture.
218 * Also, we need to be in real mode before setting hwthread_state,
219 * because as soon as we do that, another thread can switch
220 * the MMU context to the guest.
221 */
222 LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
223 mtmsrd r7,0
224 bctr
225 123
226/* 124/*
227 * This is the sequence required to execute idle instructions, as 125 * This is the sequence required to execute idle instructions, as
228 * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. 126 * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
127 *
128 * The 0(r1) slot is used to save r2 in isa206, so use that here.
229 */ 129 */
230#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ 130#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
231 /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ 131 /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
232 std r0,0(r1); \ 132 std r2,0(r1); \
233 ptesync; \ 133 ptesync; \
234 ld r0,0(r1); \ 134 ld r2,0(r1); \
235236: cmpd cr0,r0,r0; \ 135236: cmpd cr0,r2,r2; \
236 bne 236b; \ 136 bne 236b; \
237 IDLE_INST; 137 IDLE_INST; \
238 138 b . /* catch bugs */
239
240 .globl pnv_enter_arch207_idle_mode
241pnv_enter_arch207_idle_mode:
242#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
243 /* Tell KVM we're entering idle */
244 li r4,KVM_HWTHREAD_IN_IDLE
245 /******************************************************/
246 /* N O T E W E L L ! ! ! N O T E W E L L */
247 /* The following store to HSTATE_HWTHREAD_STATE(r13) */
248 /* MUST occur in real mode, i.e. with the MMU off, */
249 /* and the MMU must stay off until we clear this flag */
250 /* and test HSTATE_HWTHREAD_REQ(r13) in */
251 /* pnv_powersave_wakeup in this file. */
252 /* The reason is that another thread can switch the */
253 /* MMU to a guest context whenever this flag is set */
254 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
255 /* that would potentially cause this thread to start */
256 /* executing instructions from guest memory in */
257 /* hypervisor mode, leading to a host crash or data */
258 /* corruption, or worse. */
259 /******************************************************/
260 stb r4,HSTATE_HWTHREAD_STATE(r13)
261#endif
262 stb r3,PACA_THREAD_IDLE_STATE(r13)
263 cmpwi cr3,r3,PNV_THREAD_SLEEP
264 bge cr3,2f
265 IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
266 /* No return */
2672:
268 /* Sleep or winkle */
269 lbz r7,PACA_THREAD_MASK(r13)
270 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
271 li r5,0
272 beq cr3,3f
273 lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
2743:
275lwarx_loop1:
276 lwarx r15,0,r14
277
278 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
279 bnel- core_idle_lock_held
280
281 add r15,r15,r5 /* Add if winkle */
282 andc r15,r15,r7 /* Clear thread bit */
283
284 andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
285
286/*
287 * If cr0 = 0, then current thread is the last thread of the core entering
288 * sleep. Last thread needs to execute the hardware bug workaround code if
289 * required by the platform.
290 * Make the workaround call unconditionally here. The below branch call is
291 * patched out when the idle states are discovered if the platform does not
292 * require it.
293 */
294.global pnv_fastsleep_workaround_at_entry
295pnv_fastsleep_workaround_at_entry:
296 beq fastsleep_workaround_at_entry
297
298 stwcx. r15,0,r14
299 bne- lwarx_loop1
300 isync
301
302common_enter: /* common code for all the threads entering sleep or winkle */
303 bgt cr3,enter_winkle
304 IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
305
306fastsleep_workaround_at_entry:
307 oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
308 stwcx. r15,0,r14
309 bne- lwarx_loop1
310 isync
311
312 /* Fast sleep workaround */
313 li r3,1
314 li r4,1
315 bl opal_config_cpu_idle_state
316
317 /* Unlock */
318 xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
319 lwsync
320 stw r15,0(r14)
321 b common_enter
322
323enter_winkle:
324 bl save_sprs_to_stack
325
326 IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
327
328/*
329 * r3 - PSSCR value corresponding to the requested stop state.
330 */
331power_enter_stop:
332/*
333 * Check if we are executing the lite variant with ESL=EC=0
334 */
335 andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
336 clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
337 bne .Lhandle_esl_ec_set
338 PPC_STOP
339 li r3,0 /* Since we didn't lose state, return 0 */
340 std r3, PACA_REQ_PSSCR(r13)
341
342 /*
343 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
344 * it can determine if the wakeup reason is an HMI in
345 * CHECK_HMI_INTERRUPT.
346 *
347 * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
348 * reason, so there is no point setting r12 to SRR1.
349 *
350 * Further, we clear r12 here, so that we don't accidentally enter the
351 * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
352 */
353 li r12, 0
354 b pnv_wakeup_noloss
355
356.Lhandle_esl_ec_set:
357BEGIN_FTR_SECTION
358 /*
359 * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
360 * a state-loss idle. Saving and restoring MMCR0 over idle is a
361 * workaround.
362 */
363 mfspr r4,SPRN_MMCR0
364 std r4,_MMCR0(r1)
365END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
366 139
367/* 140/*
368 * Check if the requested state is a deep idle state. 141 * Desired instruction type in r3
369 */
370 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
371 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
372 cmpd r3,r4
373 bge .Lhandle_deep_stop
374 PPC_STOP /* Does not return (system reset interrupt) */
375
376.Lhandle_deep_stop:
377/*
378 * Entering deep idle state.
379 * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
380 * stack and enter stop
381 */
382 lbz r7,PACA_THREAD_MASK(r13)
383 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
384
385lwarx_loop_stop:
386 lwarx r15,0,r14
387 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
388 bnel- core_idle_lock_held
389 andc r15,r15,r7 /* Clear thread bit */
390
391 stwcx. r15,0,r14
392 bne- lwarx_loop_stop
393 isync
394
395 bl save_sprs_to_stack
396
397 PPC_STOP /* Does not return (system reset interrupt) */
398
399/*
400 * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
401 * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
402 */
403_GLOBAL(power7_idle_insn)
404 /* Now check if user or arch enabled NAP mode */
405 LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
406 b pnv_powersave_common
407
408#define CHECK_HMI_INTERRUPT \
409BEGIN_FTR_SECTION_NESTED(66); \
410 rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
411FTR_SECTION_ELSE_NESTED(66); \
412 rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
413ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
414 cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
415 bne+ 20f; \
416 /* Invoke opal call to handle hmi */ \
417 ld r2,PACATOC(r13); \
418 ld r1,PACAR1(r13); \
419 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
420 li r3,0; /* NULL argument */ \
421 bl hmi_exception_realmode; \
422 nop; \
423 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
42420: nop;
425
426/*
427 * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
428 * r3 contains desired PSSCR register value.
429 * 142 *
430 * Offline (CPU unplug) case also must notify KVM that the CPU is 143 * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
431 * idle. 144 * The SRESET wakeup returns to this function's caller by calling
432 */ 145 * idle_return_gpr_loss with r3 set to desired return value.
433_GLOBAL(power9_offline_stop)
434#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
435 /*
436 * Tell KVM we're entering idle.
437 * This does not have to be done in real mode because the P9 MMU
438 * is independent per-thread. Some steppings share radix/hash mode
439 * between threads, but in that case KVM has a barrier sync in real
440 * mode before and after switching between radix and hash.
441 */
442 li r4,KVM_HWTHREAD_IN_IDLE
443 stb r4,HSTATE_HWTHREAD_STATE(r13)
444#endif
445 /* fall through */
446
447_GLOBAL(power9_idle_stop)
448 std r3, PACA_REQ_PSSCR(r13)
449#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
450BEGIN_FTR_SECTION
451 sync
452 lwz r5, PACA_DONT_STOP(r13)
453 cmpwi r5, 0
454 bne 1f
455END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
456#endif
457 mtspr SPRN_PSSCR,r3
458 LOAD_REG_ADDR(r4,power_enter_stop)
459 b pnv_powersave_common
460 /* No return */
461#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
4621:
463 /*
464 * We get here when TM / thread reconfiguration bug workaround
465 * code wants to get the CPU into SMT4 mode, and therefore
466 * we are being asked not to stop.
467 */
468 li r3, 0
469 std r3, PACA_REQ_PSSCR(r13)
470 blr /* return 0 for wakeup cause / SRR1 value */
471#endif
472
473/*
474 * Called from machine check handler for powersave wakeups.
475 * Low level machine check processing has already been done. Now just
476 * go through the wake up path to get everything in order.
477 * 146 *
478 * r3 - The original SRR1 value. 147 * A wakeup without GPR loss may alteratively be handled as in
479 * Original SRR[01] have been clobbered. 148 * isa300_idle_stop_noloss and blr directly, as an optimisation.
480 * MSR_RI is clear.
481 */
482.global pnv_powersave_wakeup_mce
483pnv_powersave_wakeup_mce:
484 /* Set cr3 for pnv_powersave_wakeup */
485 rlwinm r11,r3,47-31,30,31
486 cmpwi cr3,r11,2
487
488 /*
489 * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
490 * reason into r12, which allows reuse of the system reset wakeup
491 * code without being mistaken for another type of wakeup.
492 */
493 oris r12,r3,SRR1_WAKEMCE_RESVD@h
494
495 b pnv_powersave_wakeup
496
497/*
498 * Called from reset vector for powersave wakeups.
499 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
500 * r12 - SRR1
501 */
502.global pnv_powersave_wakeup
503pnv_powersave_wakeup:
504 ld r2, PACATOC(r13)
505
506BEGIN_FTR_SECTION
507 bl pnv_restore_hyp_resource_arch300
508FTR_SECTION_ELSE
509 bl pnv_restore_hyp_resource_arch207
510ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
511
512 li r0,PNV_THREAD_RUNNING
513 stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
514
515 mr r3,r12
516
517#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
518 lbz r0,HSTATE_HWTHREAD_STATE(r13)
519 cmpwi r0,KVM_HWTHREAD_IN_KERNEL
520 beq 0f
521 li r0,KVM_HWTHREAD_IN_KERNEL
522 stb r0,HSTATE_HWTHREAD_STATE(r13)
523 /* Order setting hwthread_state vs. testing hwthread_req */
524 sync
5250: lbz r0,HSTATE_HWTHREAD_REQ(r13)
526 cmpwi r0,0
527 beq 1f
528 b kvm_start_guest
5291:
530#endif
531
532 /* Return SRR1 from power7_nap() */
533 blt cr3,pnv_wakeup_noloss
534 b pnv_wakeup_loss
535
536/*
537 * Check whether we have woken up with hypervisor state loss.
538 * If yes, restore hypervisor state and return back to link.
539 * 149 *
540 * cr3 - set to gt if waking up with partial/complete hypervisor state loss 150 * The caller is responsible for saving/restoring SPRs, MSR, timebase,
541 */ 151 * etc.
542pnv_restore_hyp_resource_arch300:
543 /*
544 * Workaround for POWER9, if we lost resources, the ERAT
545 * might have been mixed up and needs flushing. We also need
546 * to reload MMCR0 (see comment above). We also need to set
547 * then clear bit 60 in MMCRA to ensure the PMU starts running.
548 */
549 blt cr3,1f
550BEGIN_FTR_SECTION
551 PPC_INVALIDATE_ERAT
552 ld r1,PACAR1(r13)
553 ld r4,_MMCR0(r1)
554 mtspr SPRN_MMCR0,r4
555END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
556 mfspr r4,SPRN_MMCRA
557 ori r4,r4,(1 << (63-60))
558 mtspr SPRN_MMCRA,r4
559 xori r4,r4,(1 << (63-60))
560 mtspr SPRN_MMCRA,r4
5611:
562 /*
563 * POWER ISA 3. Use PSSCR to determine if we
564 * are waking up from deep idle state
565 */
566 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
567 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
568
569 /*
570 * 0-3 bits correspond to Power-Saving Level Status
571 * which indicates the idle state we are waking up from
572 */
573 mfspr r5, SPRN_PSSCR
574 rldicl r5,r5,4,60
575 li r0, 0 /* clear requested_psscr to say we're awake */
576 std r0, PACA_REQ_PSSCR(r13)
577 cmpd cr4,r5,r4
578 bge cr4,pnv_wakeup_tb_loss /* returns to caller */
579
580 blr /* Waking up without hypervisor state loss. */
581
582/* Same calling convention as arch300 */
583pnv_restore_hyp_resource_arch207:
584 /*
585 * POWER ISA 2.07 or less.
586 * Check if we slept with sleep or winkle.
587 */
588 lbz r4,PACA_THREAD_IDLE_STATE(r13)
589 cmpwi cr2,r4,PNV_THREAD_NAP
590 bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
591
592 /*
593 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
594 * up from nap. At this stage CR3 shouldn't contains 'gt' since that
595 * indicates we are waking with hypervisor state loss from nap.
596 */
597 bgt cr3,.
598
599 blr /* Waking up without hypervisor state loss */
600
601/*
602 * Called if waking up from idle state which can cause either partial or
603 * complete hyp state loss.
604 * In POWER8, called if waking up from fastsleep or winkle
605 * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
606 *
607 * r13 - PACA
608 * cr3 - gt if waking up with partial/complete hypervisor state loss
609 *
610 * If ISA300:
611 * cr4 - gt or eq if waking up from complete hypervisor state loss.
612 * 152 *
613 * If ISA207: 153 * This must be called in real-mode (MSR_IDLE).
614 * r4 - PACA_THREAD_IDLE_STATE
615 */ 154 */
616pnv_wakeup_tb_loss: 155_GLOBAL(isa206_idle_insn_mayloss)
617 ld r1,PACAR1(r13) 156 std r1,PACAR1(r13)
618 /* 157 mflr r4
619 * Before entering any idle state, the NVGPRs are saved in the stack. 158 mfcr r5
620 * If there was a state loss, or PACA_NAPSTATELOST was set, then the 159 /* use stack red zone rather than a new frame for saving regs */
621 * NVGPRs are restored. If we are here, it is likely that state is lost, 160 std r2,-8*0(r1)
622 * but not guaranteed -- neither ISA207 nor ISA300 tests to reach 161 std r14,-8*1(r1)
623 * here are the same as the test to restore NVGPRS: 162 std r15,-8*2(r1)
624 * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300, 163 std r16,-8*3(r1)
625 * and SRR1 test for restoring NVGPRs. 164 std r17,-8*4(r1)
626 * 165 std r18,-8*5(r1)
627 * We are about to clobber NVGPRs now, so set NAPSTATELOST to 166 std r19,-8*6(r1)
628 * guarantee they will always be restored. This might be tightened 167 std r20,-8*7(r1)
629 * with careful reading of specs (particularly for ISA300) but this 168 std r21,-8*8(r1)
630 * is already a slow wakeup path and it's simpler to be safe. 169 std r22,-8*9(r1)
631 */ 170 std r23,-8*10(r1)
632 li r0,1 171 std r24,-8*11(r1)
633 stb r0,PACA_NAPSTATELOST(r13) 172 std r25,-8*12(r1)
634 173 std r26,-8*13(r1)
635 /* 174 std r27,-8*14(r1)
636 * 175 std r28,-8*15(r1)
637 * Save SRR1 and LR in NVGPRs as they might be clobbered in 176 std r29,-8*16(r1)
638 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required 177 std r30,-8*17(r1)
639 * to determine the wakeup reason if we branch to kvm_start_guest. LR 178 std r31,-8*18(r1)
640 * is required to return back to reset vector after hypervisor state 179 std r4,-8*19(r1)
641 * restore is complete. 180 std r5,-8*20(r1)
642 */ 181 cmpwi r3,PNV_THREAD_NAP
643 mr r19,r12 182 bne 1f
644 mr r18,r4 183 IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
645 mflr r17 1841: cmpwi r3,PNV_THREAD_SLEEP
646BEGIN_FTR_SECTION
647 CHECK_HMI_INTERRUPT
648END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
649
650 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
651 lbz r7,PACA_THREAD_MASK(r13)
652
653 /*
654 * Take the core lock to synchronize against other threads.
655 *
656 * Lock bit is set in one of the 2 cases-
657 * a. In the sleep/winkle enter path, the last thread is executing
658 * fastsleep workaround code.
659 * b. In the wake up path, another thread is executing fastsleep
660 * workaround undo code or resyncing timebase or restoring context
661 * In either case loop until the lock bit is cleared.
662 */
6631:
664 lwarx r15,0,r14
665 andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
666 bnel- core_idle_lock_held
667 oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
668 stwcx. r15,0,r14
669 bne- 1b
670 isync
671
672 andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
673 cmpwi cr2,r9,0
674
675 /*
676 * At this stage
677 * cr2 - eq if first thread to wakeup in core
678 * cr3- gt if waking up with partial/complete hypervisor state loss
679 * ISA300:
680 * cr4 - gt or eq if waking up from complete hypervisor state loss.
681 */
682
683BEGIN_FTR_SECTION
684 /*
685 * Were we in winkle?
686 * If yes, check if all threads were in winkle, decrement our
687 * winkle count, set all thread winkle bits if all were in winkle.
688 * Check if our thread has a winkle bit set, and set cr4 accordingly
689 * (to match ISA300, above). Pseudo-code for core idle state
690 * transitions for ISA207 is as follows (everything happens atomically
691 * due to store conditional and/or lock bit):
692 *
693 * nap_idle() { }
694 * nap_wake() { }
695 *
696 * sleep_idle()
697 * {
698 * core_idle_state &= ~thread_in_core
699 * }
700 *
701 * sleep_wake()
702 * {
703 * bool first_in_core, first_in_subcore;
704 *
705 * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
706 * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
707 *
708 * core_idle_state |= thread_in_core;
709 * }
710 *
711 * winkle_idle()
712 * {
713 * core_idle_state &= ~thread_in_core;
714 * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
715 * }
716 *
717 * winkle_wake()
718 * {
719 * bool first_in_core, first_in_subcore, winkle_state_lost;
720 *
721 * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
722 * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
723 *
724 * core_idle_state |= thread_in_core;
725 *
726 * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
727 * core_idle_state |= THREAD_WINKLE_BITS;
728 * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
729 *
730 * winkle_state_lost = core_idle_state &
731 * (thread_in_core << WINKLE_THREAD_SHIFT);
732 * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
733 * }
734 *
735 */
736 cmpwi r18,PNV_THREAD_WINKLE
737 bne 2f 185 bne 2f
738 andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h 186 IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
739 subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h 1872: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
740 beq 2f
741 ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
7422:
743 /* Shift thread bit to winkle mask, then test if this thread is set,
744 * and remove it from the winkle bits */
745 slwi r8,r7,8
746 and r8,r8,r15
747 andc r15,r15,r8
748 cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
749
750 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
751 and r4,r4,r15
752 cmpwi r4,0 /* Check if first in subcore */
753
754 or r15,r15,r7 /* Set thread bit */
755 beq first_thread_in_subcore
756END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
757
758 or r15,r15,r7 /* Set thread bit */
759 beq cr2,first_thread_in_core
760
761 /* Not first thread in core or subcore to wake up */
762 b clear_lock
763
764first_thread_in_subcore:
765 /*
766 * If waking up from sleep, subcore state is not lost. Hence
767 * skip subcore state restore
768 */
769 blt cr4,subcore_state_restored
770
771 /* Restore per-subcore state */
772 ld r4,_SDR1(r1)
773 mtspr SPRN_SDR1,r4
774
775 ld r4,_RPR(r1)
776 mtspr SPRN_RPR,r4
777 ld r4,_AMOR(r1)
778 mtspr SPRN_AMOR,r4
779
780subcore_state_restored:
781 /*
782 * Check if the thread is also the first thread in the core. If not,
783 * skip to clear_lock.
784 */
785 bne cr2,clear_lock
786
787first_thread_in_core:
788
789 /*
790 * First thread in the core waking up from any state which can cause
791 * partial or complete hypervisor state loss. It needs to
792 * call the fastsleep workaround code if the platform requires it.
793 * Call it unconditionally here. The below branch instruction will
794 * be patched out if the platform does not have fastsleep or does not
795 * require the workaround. Patching will be performed during the
796 * discovery of idle-states.
797 */
798.global pnv_fastsleep_workaround_at_exit
799pnv_fastsleep_workaround_at_exit:
800 b fastsleep_workaround_at_exit
801
802timebase_resync:
803 /*
804 * Use cr3 which indicates that we are waking up with atleast partial
805 * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
806 */
807 ble cr3,.Ltb_resynced
808 /* Time base re-sync */
809 bl opal_resync_timebase;
810 /*
811 * If waking up from sleep (POWER8), per core state
812 * is not lost, skip to clear_lock.
813 */
814.Ltb_resynced:
815 blt cr4,clear_lock
816
817 /*
818 * First thread in the core to wake up and its waking up with
819 * complete hypervisor state loss. Restore per core hypervisor
820 * state.
821 */
822BEGIN_FTR_SECTION
823 ld r4,_PTCR(r1)
824 mtspr SPRN_PTCR,r4
825 ld r4,_RPR(r1)
826 mtspr SPRN_RPR,r4
827 ld r4,_AMOR(r1)
828 mtspr SPRN_AMOR,r4
829END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
830
831 ld r4,_TSCR(r1)
832 mtspr SPRN_TSCR,r4
833 ld r4,_WORC(r1)
834 mtspr SPRN_WORC,r4
835
836clear_lock:
837 xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
838 lwsync
839 stw r15,0(r14)
840
841common_exit:
842 /*
843 * Common to all threads.
844 *
845 * If waking up from sleep, hypervisor state is not lost. Hence
846 * skip hypervisor state restore.
847 */
848 blt cr4,hypervisor_state_restored
849
850 /* Waking up from winkle */
851
852BEGIN_MMU_FTR_SECTION
853 b no_segments
854END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
855 /* Restore SLB from PACA */
856 ld r8,PACA_SLBSHADOWPTR(r13)
857
858 .rept SLB_NUM_BOLTED
859 li r3, SLBSHADOW_SAVEAREA
860 LDX_BE r5, r8, r3
861 addi r3, r3, 8
862 LDX_BE r6, r8, r3
863 andis. r7,r5,SLB_ESID_V@h
864 beq 1f
865 slbmte r6,r5
8661: addi r8,r8,16
867 .endr
868no_segments:
869
870 /* Restore per thread state */
871
872 ld r4,_SPURR(r1)
873 mtspr SPRN_SPURR,r4
874 ld r4,_PURR(r1)
875 mtspr SPRN_PURR,r4
876 ld r4,_DSCR(r1)
877 mtspr SPRN_DSCR,r4
878 ld r4,_WORT(r1)
879 mtspr SPRN_WORT,r4
880
881 /* Call cur_cpu_spec->cpu_restore() */
882 LOAD_REG_ADDR(r4, cur_cpu_spec)
883 ld r4,0(r4)
884 ld r12,CPU_SPEC_RESTORE(r4)
885#ifdef PPC64_ELF_ABI_v1
886 ld r12,0(r12)
887#endif
888 mtctr r12
889 bctrl
890
891/*
892 * On POWER9, we can come here on wakeup from a cpuidle stop state.
893 * Hence restore the additional SPRs to the saved value.
894 *
895 * On POWER8, we come here only on winkle. Since winkle is used
896 * only in the case of CPU-Hotplug, we don't need to restore
897 * the additional SPRs.
898 */
899BEGIN_FTR_SECTION
900 bl power9_restore_additional_sprs
901END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
902hypervisor_state_restored:
903
904 mr r12,r19
905 mtlr r17
906 blr /* return to pnv_powersave_wakeup */
907
908fastsleep_workaround_at_exit:
909 li r3,1
910 li r4,0
911 bl opal_config_cpu_idle_state
912 b timebase_resync
913
914/*
915 * R3 here contains the value that will be returned to the caller
916 * of power7_nap.
917 * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
918 */
919.global pnv_wakeup_loss
920pnv_wakeup_loss:
921 ld r1,PACAR1(r13)
922BEGIN_FTR_SECTION
923 CHECK_HMI_INTERRUPT
924END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
925 REST_NVGPRS(r1)
926 REST_GPR(2, r1)
927 ld r4,PACAKMSR(r13)
928 ld r5,_LINK(r1)
929 ld r6,_CCR(r1)
930 addi r1,r1,INT_FRAME_SIZE
931 mtlr r5
932 mtcr r6
933 mtmsrd r4
934 blr
935 188
936/*
937 * R3 here contains the value that will be returned to the caller
938 * of power7_nap.
939 * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
940 */
941pnv_wakeup_noloss:
942 lbz r0,PACA_NAPSTATELOST(r13)
943 cmpwi r0,0
944 bne pnv_wakeup_loss
945 ld r1,PACAR1(r13)
946BEGIN_FTR_SECTION
947 CHECK_HMI_INTERRUPT
948END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
949 ld r4,PACAKMSR(r13)
950 ld r5,_NIP(r1)
951 ld r6,_CCR(r1)
952 addi r1,r1,INT_FRAME_SIZE
953 mtlr r5
954 mtcr r6
955 mtmsrd r4
956 blr
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8a936723c791..ada901af4950 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -81,10 +81,7 @@
81DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 81DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
82EXPORT_PER_CPU_SYMBOL(irq_stat); 82EXPORT_PER_CPU_SYMBOL(irq_stat);
83 83
84int __irq_offset_value;
85
86#ifdef CONFIG_PPC32 84#ifdef CONFIG_PPC32
87EXPORT_SYMBOL(__irq_offset_value);
88atomic_t ppc_n_lost_interrupts; 85atomic_t ppc_n_lost_interrupts;
89 86
90#ifdef CONFIG_TAU_INT 87#ifdef CONFIG_TAU_INT
@@ -261,16 +258,9 @@ notrace void arch_local_irq_restore(unsigned long mask)
261 */ 258 */
262 irq_happened = get_irq_happened(); 259 irq_happened = get_irq_happened();
263 if (!irq_happened) { 260 if (!irq_happened) {
264 /* 261#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
265 * FIXME. Here we'd like to be able to do: 262 WARN_ON(!(mfmsr() & MSR_EE));
266 * 263#endif
267 * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
268 * WARN_ON(!(mfmsr() & MSR_EE));
269 * #endif
270 *
271 * But currently it hits in a few paths, we should fix those and
272 * enable the warning.
273 */
274 return; 264 return;
275 } 265 }
276 266
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b5fec1f9751a..4581377cfc98 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
112 mce->srr1 = regs->msr; 112 mce->srr1 = regs->msr;
113 mce->gpr3 = regs->gpr[3]; 113 mce->gpr3 = regs->gpr[3];
114 mce->in_use = 1; 114 mce->in_use = 1;
115 mce->cpu = get_paca()->paca_index;
115 116
116 /* Mark it recovered if we have handled it and MSR(RI=1). */ 117 /* Mark it recovered if we have handled it and MSR(RI=1). */
117 if (handled && (regs->msr & MSR_RI)) 118 if (handled && (regs->msr & MSR_RI))
@@ -121,6 +122,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
121 122
122 mce->initiator = mce_err->initiator; 123 mce->initiator = mce_err->initiator;
123 mce->severity = mce_err->severity; 124 mce->severity = mce_err->severity;
125 mce->sync_error = mce_err->sync_error;
126 mce->error_class = mce_err->error_class;
124 127
125 /* 128 /*
126 * Populate the mce error_type and type-specific error_type. 129 * Populate the mce error_type and type-specific error_type.
@@ -310,7 +313,11 @@ static void machine_check_process_queued_event(struct irq_work *work)
310void machine_check_print_event_info(struct machine_check_event *evt, 313void machine_check_print_event_info(struct machine_check_event *evt,
311 bool user_mode, bool in_guest) 314 bool user_mode, bool in_guest)
312{ 315{
313 const char *level, *sevstr, *subtype; 316 const char *level, *sevstr, *subtype, *err_type;
317 uint64_t ea = 0, pa = 0;
318 int n = 0;
319 char dar_str[50];
320 char pa_str[50];
314 static const char *mc_ue_types[] = { 321 static const char *mc_ue_types[] = {
315 "Indeterminate", 322 "Indeterminate",
316 "Instruction fetch", 323 "Instruction fetch",
@@ -357,6 +364,13 @@ void machine_check_print_event_info(struct machine_check_event *evt,
357 "Store (timeout)", 364 "Store (timeout)",
358 "Page table walk Load/Store (timeout)", 365 "Page table walk Load/Store (timeout)",
359 }; 366 };
367 static const char *mc_error_class[] = {
368 "Unknown",
369 "Hardware error",
370 "Probable Hardware error (some chance of software cause)",
371 "Software error",
372 "Probable Software error (some chance of hardware cause)",
373 };
360 374
361 /* Print things out */ 375 /* Print things out */
362 if (evt->version != MCE_V1) { 376 if (evt->version != MCE_V1) {
@@ -371,9 +385,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
371 break; 385 break;
372 case MCE_SEV_WARNING: 386 case MCE_SEV_WARNING:
373 level = KERN_WARNING; 387 level = KERN_WARNING;
374 sevstr = ""; 388 sevstr = "Warning";
375 break; 389 break;
376 case MCE_SEV_ERROR_SYNC: 390 case MCE_SEV_SEVERE:
377 level = KERN_ERR; 391 level = KERN_ERR;
378 sevstr = "Severe"; 392 sevstr = "Severe";
379 break; 393 break;
@@ -384,101 +398,107 @@ void machine_check_print_event_info(struct machine_check_event *evt,
384 break; 398 break;
385 } 399 }
386 400
387 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
388 evt->disposition == MCE_DISPOSITION_RECOVERED ?
389 "Recovered" : "Not recovered");
390
391 if (in_guest) {
392 printk("%s Guest NIP: %016llx\n", level, evt->srr0);
393 } else if (user_mode) {
394 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
395 evt->srr0, current->pid, current->comm);
396 } else {
397 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
398 (void *)evt->srr0);
399 }
400
401 printk("%s Initiator: %s\n", level,
402 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
403 switch (evt->error_type) { 401 switch (evt->error_type) {
404 case MCE_ERROR_TYPE_UE: 402 case MCE_ERROR_TYPE_UE:
403 err_type = "UE";
405 subtype = evt->u.ue_error.ue_error_type < 404 subtype = evt->u.ue_error.ue_error_type <
406 ARRAY_SIZE(mc_ue_types) ? 405 ARRAY_SIZE(mc_ue_types) ?
407 mc_ue_types[evt->u.ue_error.ue_error_type] 406 mc_ue_types[evt->u.ue_error.ue_error_type]
408 : "Unknown"; 407 : "Unknown";
409 printk("%s Error type: UE [%s]\n", level, subtype);
410 if (evt->u.ue_error.effective_address_provided) 408 if (evt->u.ue_error.effective_address_provided)
411 printk("%s Effective address: %016llx\n", 409 ea = evt->u.ue_error.effective_address;
412 level, evt->u.ue_error.effective_address);
413 if (evt->u.ue_error.physical_address_provided) 410 if (evt->u.ue_error.physical_address_provided)
414 printk("%s Physical address: %016llx\n", 411 pa = evt->u.ue_error.physical_address;
415 level, evt->u.ue_error.physical_address);
416 break; 412 break;
417 case MCE_ERROR_TYPE_SLB: 413 case MCE_ERROR_TYPE_SLB:
414 err_type = "SLB";
418 subtype = evt->u.slb_error.slb_error_type < 415 subtype = evt->u.slb_error.slb_error_type <
419 ARRAY_SIZE(mc_slb_types) ? 416 ARRAY_SIZE(mc_slb_types) ?
420 mc_slb_types[evt->u.slb_error.slb_error_type] 417 mc_slb_types[evt->u.slb_error.slb_error_type]
421 : "Unknown"; 418 : "Unknown";
422 printk("%s Error type: SLB [%s]\n", level, subtype);
423 if (evt->u.slb_error.effective_address_provided) 419 if (evt->u.slb_error.effective_address_provided)
424 printk("%s Effective address: %016llx\n", 420 ea = evt->u.slb_error.effective_address;
425 level, evt->u.slb_error.effective_address);
426 break; 421 break;
427 case MCE_ERROR_TYPE_ERAT: 422 case MCE_ERROR_TYPE_ERAT:
423 err_type = "ERAT";
428 subtype = evt->u.erat_error.erat_error_type < 424 subtype = evt->u.erat_error.erat_error_type <
429 ARRAY_SIZE(mc_erat_types) ? 425 ARRAY_SIZE(mc_erat_types) ?
430 mc_erat_types[evt->u.erat_error.erat_error_type] 426 mc_erat_types[evt->u.erat_error.erat_error_type]
431 : "Unknown"; 427 : "Unknown";
432 printk("%s Error type: ERAT [%s]\n", level, subtype);
433 if (evt->u.erat_error.effective_address_provided) 428 if (evt->u.erat_error.effective_address_provided)
434 printk("%s Effective address: %016llx\n", 429 ea = evt->u.erat_error.effective_address;
435 level, evt->u.erat_error.effective_address);
436 break; 430 break;
437 case MCE_ERROR_TYPE_TLB: 431 case MCE_ERROR_TYPE_TLB:
432 err_type = "TLB";
438 subtype = evt->u.tlb_error.tlb_error_type < 433 subtype = evt->u.tlb_error.tlb_error_type <
439 ARRAY_SIZE(mc_tlb_types) ? 434 ARRAY_SIZE(mc_tlb_types) ?
440 mc_tlb_types[evt->u.tlb_error.tlb_error_type] 435 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
441 : "Unknown"; 436 : "Unknown";
442 printk("%s Error type: TLB [%s]\n", level, subtype);
443 if (evt->u.tlb_error.effective_address_provided) 437 if (evt->u.tlb_error.effective_address_provided)
444 printk("%s Effective address: %016llx\n", 438 ea = evt->u.tlb_error.effective_address;
445 level, evt->u.tlb_error.effective_address);
446 break; 439 break;
447 case MCE_ERROR_TYPE_USER: 440 case MCE_ERROR_TYPE_USER:
441 err_type = "User";
448 subtype = evt->u.user_error.user_error_type < 442 subtype = evt->u.user_error.user_error_type <
449 ARRAY_SIZE(mc_user_types) ? 443 ARRAY_SIZE(mc_user_types) ?
450 mc_user_types[evt->u.user_error.user_error_type] 444 mc_user_types[evt->u.user_error.user_error_type]
451 : "Unknown"; 445 : "Unknown";
452 printk("%s Error type: User [%s]\n", level, subtype);
453 if (evt->u.user_error.effective_address_provided) 446 if (evt->u.user_error.effective_address_provided)
454 printk("%s Effective address: %016llx\n", 447 ea = evt->u.user_error.effective_address;
455 level, evt->u.user_error.effective_address);
456 break; 448 break;
457 case MCE_ERROR_TYPE_RA: 449 case MCE_ERROR_TYPE_RA:
450 err_type = "Real address";
458 subtype = evt->u.ra_error.ra_error_type < 451 subtype = evt->u.ra_error.ra_error_type <
459 ARRAY_SIZE(mc_ra_types) ? 452 ARRAY_SIZE(mc_ra_types) ?
460 mc_ra_types[evt->u.ra_error.ra_error_type] 453 mc_ra_types[evt->u.ra_error.ra_error_type]
461 : "Unknown"; 454 : "Unknown";
462 printk("%s Error type: Real address [%s]\n", level, subtype);
463 if (evt->u.ra_error.effective_address_provided) 455 if (evt->u.ra_error.effective_address_provided)
464 printk("%s Effective address: %016llx\n", 456 ea = evt->u.ra_error.effective_address;
465 level, evt->u.ra_error.effective_address);
466 break; 457 break;
467 case MCE_ERROR_TYPE_LINK: 458 case MCE_ERROR_TYPE_LINK:
459 err_type = "Link";
468 subtype = evt->u.link_error.link_error_type < 460 subtype = evt->u.link_error.link_error_type <
469 ARRAY_SIZE(mc_link_types) ? 461 ARRAY_SIZE(mc_link_types) ?
470 mc_link_types[evt->u.link_error.link_error_type] 462 mc_link_types[evt->u.link_error.link_error_type]
471 : "Unknown"; 463 : "Unknown";
472 printk("%s Error type: Link [%s]\n", level, subtype);
473 if (evt->u.link_error.effective_address_provided) 464 if (evt->u.link_error.effective_address_provided)
474 printk("%s Effective address: %016llx\n", 465 ea = evt->u.link_error.effective_address;
475 level, evt->u.link_error.effective_address);
476 break; 466 break;
477 default: 467 default:
478 case MCE_ERROR_TYPE_UNKNOWN: 468 case MCE_ERROR_TYPE_UNKNOWN:
479 printk("%s Error type: Unknown\n", level); 469 err_type = "Unknown";
470 subtype = "";
480 break; 471 break;
481 } 472 }
473
474 dar_str[0] = pa_str[0] = '\0';
475 if (ea && evt->srr0 != ea) {
476 /* Load/Store address */
477 n = sprintf(dar_str, "DAR: %016llx ", ea);
478 if (pa)
479 sprintf(dar_str + n, "paddr: %016llx ", pa);
480 } else if (pa) {
481 sprintf(pa_str, " paddr: %016llx", pa);
482 }
483
484 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
485 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
486 err_type, subtype, dar_str,
487 evt->disposition == MCE_DISPOSITION_RECOVERED ?
488 "Recovered" : "Not recovered");
489
490 if (in_guest || user_mode) {
491 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
492 level, evt->cpu, current->pid, current->comm,
493 in_guest ? "Guest " : "", evt->srr0, pa_str);
494 } else {
495 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
496 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
497 }
498
499 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
500 mc_error_class[evt->error_class] : "Unknown";
501 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
482} 502}
483EXPORT_SYMBOL_GPL(machine_check_print_event_info); 503EXPORT_SYMBOL_GPL(machine_check_print_event_info);
484 504
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 6b800eec31f2..b5e876efe864 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -36,7 +36,7 @@
36 * Convert an address related to an mm to a PFN. NOTE: we are in real 36 * Convert an address related to an mm to a PFN. NOTE: we are in real
37 * mode, we could potentially race with page table updates. 37 * mode, we could potentially race with page table updates.
38 */ 38 */
39static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) 39unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
40{ 40{
41 pte_t *ptep; 41 pte_t *ptep;
42 unsigned long flags; 42 unsigned long flags;
@@ -131,213 +131,232 @@ struct mce_ierror_table {
131 bool nip_valid; /* nip is a valid indicator of faulting address */ 131 bool nip_valid; /* nip is a valid indicator of faulting address */
132 unsigned int error_type; 132 unsigned int error_type;
133 unsigned int error_subtype; 133 unsigned int error_subtype;
134 unsigned int error_class;
134 unsigned int initiator; 135 unsigned int initiator;
135 unsigned int severity; 136 unsigned int severity;
137 bool sync_error;
136}; 138};
137 139
138static const struct mce_ierror_table mce_p7_ierror_table[] = { 140static const struct mce_ierror_table mce_p7_ierror_table[] = {
139{ 0x00000000001c0000, 0x0000000000040000, true, 141{ 0x00000000001c0000, 0x0000000000040000, true,
140 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, 142 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
141 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 143 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
142{ 0x00000000001c0000, 0x0000000000080000, true, 144{ 0x00000000001c0000, 0x0000000000080000, true,
143 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, 145 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
144 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 146 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
145{ 0x00000000001c0000, 0x00000000000c0000, true, 147{ 0x00000000001c0000, 0x00000000000c0000, true,
146 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, 148 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
147 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 149 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
148{ 0x00000000001c0000, 0x0000000000100000, true, 150{ 0x00000000001c0000, 0x0000000000100000, true,
149 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ 151 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
150 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 152 MCE_ECLASS_SOFT_INDETERMINATE,
153 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
151{ 0x00000000001c0000, 0x0000000000140000, true, 154{ 0x00000000001c0000, 0x0000000000140000, true,
152 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, 155 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
153 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 156 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
154{ 0x00000000001c0000, 0x0000000000180000, true, 157{ 0x00000000001c0000, 0x0000000000180000, true,
155 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, 158 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
156 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 159 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
157{ 0x00000000001c0000, 0x00000000001c0000, true, 160{ 0x00000000001c0000, 0x00000000001c0000, true,
158 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, 161 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
159 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 162 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
160{ 0, 0, 0, 0, 0, 0 } }; 163{ 0, 0, 0, 0, 0, 0, 0 } };
161 164
162static const struct mce_ierror_table mce_p8_ierror_table[] = { 165static const struct mce_ierror_table mce_p8_ierror_table[] = {
163{ 0x00000000081c0000, 0x0000000000040000, true, 166{ 0x00000000081c0000, 0x0000000000040000, true,
164 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, 167 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
165 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 168 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
166{ 0x00000000081c0000, 0x0000000000080000, true, 169{ 0x00000000081c0000, 0x0000000000080000, true,
167 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, 170 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
168 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 171 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
169{ 0x00000000081c0000, 0x00000000000c0000, true, 172{ 0x00000000081c0000, 0x00000000000c0000, true,
170 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, 173 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
171 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 174 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
172{ 0x00000000081c0000, 0x0000000000100000, true, 175{ 0x00000000081c0000, 0x0000000000100000, true,
173 MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, 176 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
174 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 177 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
175{ 0x00000000081c0000, 0x0000000000140000, true, 178{ 0x00000000081c0000, 0x0000000000140000, true,
176 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, 179 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
177 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 180 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
178{ 0x00000000081c0000, 0x0000000000180000, true, 181{ 0x00000000081c0000, 0x0000000000180000, true,
179 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, 182 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
180 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 183 MCE_ECLASS_HARDWARE,
184 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
181{ 0x00000000081c0000, 0x00000000001c0000, true, 185{ 0x00000000081c0000, 0x00000000001c0000, true,
182 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, 186 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
183 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 187 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
184{ 0x00000000081c0000, 0x0000000008000000, true, 188{ 0x00000000081c0000, 0x0000000008000000, true,
185 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, 189 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
186 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 190 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
187{ 0x00000000081c0000, 0x0000000008040000, true, 191{ 0x00000000081c0000, 0x0000000008040000, true,
188 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, 192 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
189 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 193 MCE_ECLASS_HARDWARE,
190{ 0, 0, 0, 0, 0, 0 } }; 194 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
195{ 0, 0, 0, 0, 0, 0, 0 } };
191 196
192static const struct mce_ierror_table mce_p9_ierror_table[] = { 197static const struct mce_ierror_table mce_p9_ierror_table[] = {
193{ 0x00000000081c0000, 0x0000000000040000, true, 198{ 0x00000000081c0000, 0x0000000000040000, true,
194 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, 199 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
195 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 200 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
196{ 0x00000000081c0000, 0x0000000000080000, true, 201{ 0x00000000081c0000, 0x0000000000080000, true,
197 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, 202 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
198 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 203 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
199{ 0x00000000081c0000, 0x00000000000c0000, true, 204{ 0x00000000081c0000, 0x00000000000c0000, true,
200 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, 205 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
201 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 206 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
202{ 0x00000000081c0000, 0x0000000000100000, true, 207{ 0x00000000081c0000, 0x0000000000100000, true,
203 MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, 208 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
204 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 209 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
205{ 0x00000000081c0000, 0x0000000000140000, true, 210{ 0x00000000081c0000, 0x0000000000140000, true,
206 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, 211 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
207 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 212 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
208{ 0x00000000081c0000, 0x0000000000180000, true, 213{ 0x00000000081c0000, 0x0000000000180000, true,
209 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, 214 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
210 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 215 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
211{ 0x00000000081c0000, 0x00000000001c0000, true, 216{ 0x00000000081c0000, 0x00000000001c0000, true,
212 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, 217 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
213 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 218 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
214{ 0x00000000081c0000, 0x0000000008000000, true, 219{ 0x00000000081c0000, 0x0000000008000000, true,
215 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, 220 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
216 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 221 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
217{ 0x00000000081c0000, 0x0000000008040000, true, 222{ 0x00000000081c0000, 0x0000000008040000, true,
218 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, 223 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
219 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 224 MCE_ECLASS_HARDWARE,
225 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
220{ 0x00000000081c0000, 0x00000000080c0000, true, 226{ 0x00000000081c0000, 0x00000000080c0000, true,
221 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, 227 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
222 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 228 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
223{ 0x00000000081c0000, 0x0000000008100000, true, 229{ 0x00000000081c0000, 0x0000000008100000, true,
224 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, 230 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
225 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 231 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
226{ 0x00000000081c0000, 0x0000000008140000, false, 232{ 0x00000000081c0000, 0x0000000008140000, false,
227 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, 233 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
228 MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ 234 MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
229{ 0x00000000081c0000, 0x0000000008180000, false, 235{ 0x00000000081c0000, 0x0000000008180000, false,
230 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, 236 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
231 MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ 237 MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
232{ 0x00000000081c0000, 0x00000000081c0000, true, 238{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
233 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, 239 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
234 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 240 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
235{ 0, 0, 0, 0, 0, 0 } }; 241{ 0, 0, 0, 0, 0, 0, 0 } };
236 242
237struct mce_derror_table { 243struct mce_derror_table {
238 unsigned long dsisr_value; 244 unsigned long dsisr_value;
239 bool dar_valid; /* dar is a valid indicator of faulting address */ 245 bool dar_valid; /* dar is a valid indicator of faulting address */
240 unsigned int error_type; 246 unsigned int error_type;
241 unsigned int error_subtype; 247 unsigned int error_subtype;
248 unsigned int error_class;
242 unsigned int initiator; 249 unsigned int initiator;
243 unsigned int severity; 250 unsigned int severity;
251 bool sync_error;
244}; 252};
245 253
246static const struct mce_derror_table mce_p7_derror_table[] = { 254static const struct mce_derror_table mce_p7_derror_table[] = {
247{ 0x00008000, false, 255{ 0x00008000, false,
248 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, 256 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
249 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 257 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
250{ 0x00004000, true, 258{ 0x00004000, true,
251 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, 259 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
252 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 260 MCE_ECLASS_HARDWARE,
261 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
253{ 0x00000800, true, 262{ 0x00000800, true,
254 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, 263 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
255 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 264 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
256{ 0x00000400, true, 265{ 0x00000400, true,
257 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, 266 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
258 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 267 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
259{ 0x00000080, true, 268{ 0x00000080, true,
260 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ 269 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
261 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 270 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
262{ 0x00000100, true, 271{ 0x00000100, true,
263 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, 272 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
264 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 273 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
265{ 0x00000040, true, 274{ 0x00000040, true,
266 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ 275 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
267 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 276 MCE_ECLASS_HARD_INDETERMINATE,
268{ 0, false, 0, 0, 0, 0 } }; 277 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
278{ 0, false, 0, 0, 0, 0, 0 } };
269 279
270static const struct mce_derror_table mce_p8_derror_table[] = { 280static const struct mce_derror_table mce_p8_derror_table[] = {
271{ 0x00008000, false, 281{ 0x00008000, false,
272 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, 282 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
273 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 283 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
274{ 0x00004000, true, 284{ 0x00004000, true,
275 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, 285 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
276 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 286 MCE_ECLASS_HARDWARE,
287 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
277{ 0x00002000, true, 288{ 0x00002000, true,
278 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, 289 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
279 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 290 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
280{ 0x00001000, true, 291{ 0x00001000, true,
281 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, 292 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
282 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 293 MCE_ECLASS_HARDWARE,
294 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
283{ 0x00000800, true, 295{ 0x00000800, true,
284 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, 296 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
285 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 297 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
286{ 0x00000400, true, 298{ 0x00000400, true,
287 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, 299 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
288 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 300 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
289{ 0x00000200, true, 301{ 0x00000200, true,
290 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ 302 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
291 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 303 MCE_ECLASS_SOFT_INDETERMINATE,
304 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
292{ 0x00000080, true, 305{ 0x00000080, true,
293 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ 306 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
294 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 307 MCE_ECLASS_SOFT_INDETERMINATE,
308 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
295{ 0x00000100, true, 309{ 0x00000100, true,
296 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, 310 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
297 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 311 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
298{ 0, false, 0, 0, 0, 0 } }; 312{ 0, false, 0, 0, 0, 0, 0 } };
299 313
300static const struct mce_derror_table mce_p9_derror_table[] = { 314static const struct mce_derror_table mce_p9_derror_table[] = {
301{ 0x00008000, false, 315{ 0x00008000, false,
302 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, 316 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
303 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 317 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
304{ 0x00004000, true, 318{ 0x00004000, true,
305 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, 319 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
306 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 320 MCE_ECLASS_HARDWARE,
321 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
307{ 0x00002000, true, 322{ 0x00002000, true,
308 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, 323 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
309 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 324 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
310{ 0x00001000, true, 325{ 0x00001000, true,
311 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, 326 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
312 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 327 MCE_ECLASS_HARDWARE,
328 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
313{ 0x00000800, true, 329{ 0x00000800, true,
314 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, 330 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
315 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 331 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
316{ 0x00000400, true, 332{ 0x00000400, true,
317 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, 333 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
318 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 334 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
319{ 0x00000200, false, 335{ 0x00000200, false,
320 MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, 336 MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
321 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 337 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
322{ 0x00000080, true, 338{ 0x00000080, true,
323 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ 339 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
324 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 340 MCE_ECLASS_SOFT_INDETERMINATE,
341 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
325{ 0x00000100, true, 342{ 0x00000100, true,
326 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, 343 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
327 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 344 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
328{ 0x00000040, true, 345{ 0x00000040, true,
329 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, 346 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
330 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 347 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
331{ 0x00000020, false, 348{ 0x00000020, false,
332 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, 349 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
333 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 350 MCE_ECLASS_HARDWARE,
351 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
334{ 0x00000010, false, 352{ 0x00000010, false,
335 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, 353 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
336 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 354 MCE_ECLASS_HARDWARE,
355 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
337{ 0x00000008, false, 356{ 0x00000008, false,
338 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, 357 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
339 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 358 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
340{ 0, false, 0, 0, 0, 0 } }; 359{ 0, false, 0, 0, 0, 0, 0 } };
341 360
342static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, 361static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
343 uint64_t *phys_addr) 362 uint64_t *phys_addr)
@@ -404,6 +423,7 @@ static int mce_handle_ierror(struct pt_regs *regs,
404 423
405 /* now fill in mce_error_info */ 424 /* now fill in mce_error_info */
406 mce_err->error_type = table[i].error_type; 425 mce_err->error_type = table[i].error_type;
426 mce_err->error_class = table[i].error_class;
407 switch (table[i].error_type) { 427 switch (table[i].error_type) {
408 case MCE_ERROR_TYPE_UE: 428 case MCE_ERROR_TYPE_UE:
409 mce_err->u.ue_error_type = table[i].error_subtype; 429 mce_err->u.ue_error_type = table[i].error_subtype;
@@ -427,11 +447,12 @@ static int mce_handle_ierror(struct pt_regs *regs,
427 mce_err->u.link_error_type = table[i].error_subtype; 447 mce_err->u.link_error_type = table[i].error_subtype;
428 break; 448 break;
429 } 449 }
450 mce_err->sync_error = table[i].sync_error;
430 mce_err->severity = table[i].severity; 451 mce_err->severity = table[i].severity;
431 mce_err->initiator = table[i].initiator; 452 mce_err->initiator = table[i].initiator;
432 if (table[i].nip_valid) { 453 if (table[i].nip_valid) {
433 *addr = regs->nip; 454 *addr = regs->nip;
434 if (mce_err->severity == MCE_SEV_ERROR_SYNC && 455 if (mce_err->sync_error &&
435 table[i].error_type == MCE_ERROR_TYPE_UE) { 456 table[i].error_type == MCE_ERROR_TYPE_UE) {
436 unsigned long pfn; 457 unsigned long pfn;
437 458
@@ -448,8 +469,10 @@ static int mce_handle_ierror(struct pt_regs *regs,
448 } 469 }
449 470
450 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; 471 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
451 mce_err->severity = MCE_SEV_ERROR_SYNC; 472 mce_err->error_class = MCE_ECLASS_UNKNOWN;
473 mce_err->severity = MCE_SEV_SEVERE;
452 mce_err->initiator = MCE_INITIATOR_CPU; 474 mce_err->initiator = MCE_INITIATOR_CPU;
475 mce_err->sync_error = true;
453 476
454 return 0; 477 return 0;
455} 478}
@@ -496,6 +519,7 @@ static int mce_handle_derror(struct pt_regs *regs,
496 519
497 /* now fill in mce_error_info */ 520 /* now fill in mce_error_info */
498 mce_err->error_type = table[i].error_type; 521 mce_err->error_type = table[i].error_type;
522 mce_err->error_class = table[i].error_class;
499 switch (table[i].error_type) { 523 switch (table[i].error_type) {
500 case MCE_ERROR_TYPE_UE: 524 case MCE_ERROR_TYPE_UE:
501 mce_err->u.ue_error_type = table[i].error_subtype; 525 mce_err->u.ue_error_type = table[i].error_subtype;
@@ -519,11 +543,12 @@ static int mce_handle_derror(struct pt_regs *regs,
519 mce_err->u.link_error_type = table[i].error_subtype; 543 mce_err->u.link_error_type = table[i].error_subtype;
520 break; 544 break;
521 } 545 }
546 mce_err->sync_error = table[i].sync_error;
522 mce_err->severity = table[i].severity; 547 mce_err->severity = table[i].severity;
523 mce_err->initiator = table[i].initiator; 548 mce_err->initiator = table[i].initiator;
524 if (table[i].dar_valid) 549 if (table[i].dar_valid)
525 *addr = regs->dar; 550 *addr = regs->dar;
526 else if (mce_err->severity == MCE_SEV_ERROR_SYNC && 551 else if (mce_err->sync_error &&
527 table[i].error_type == MCE_ERROR_TYPE_UE) { 552 table[i].error_type == MCE_ERROR_TYPE_UE) {
528 /* 553 /*
529 * We do a maximum of 4 nested MCE calls, see 554 * We do a maximum of 4 nested MCE calls, see
@@ -539,8 +564,10 @@ static int mce_handle_derror(struct pt_regs *regs,
539 return handled; 564 return handled;
540 565
541 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; 566 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
542 mce_err->severity = MCE_SEV_ERROR_SYNC; 567 mce_err->error_class = MCE_ECLASS_UNKNOWN;
568 mce_err->severity = MCE_SEV_SEVERE;
543 mce_err->initiator = MCE_INITIATOR_CPU; 569 mce_err->initiator = MCE_INITIATOR_CPU;
570 mce_err->sync_error = true;
544 571
545 return 0; 572 return 0;
546} 573}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e7382abee868..9cc91d03ab62 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm)
267 267
268 get_paca()->mm_ctx_id = context->id; 268 get_paca()->mm_ctx_id = context->id;
269#ifdef CONFIG_PPC_MM_SLICES 269#ifdef CONFIG_PPC_MM_SLICES
270 VM_BUG_ON(!mm->context.slb_addr_limit); 270 VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
271 get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; 271 get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
272 memcpy(&get_paca()->mm_ctx_low_slices_psize, 272 memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
273 &context->low_slices_psize, sizeof(context->low_slices_psize)); 273 LOW_SLICE_ARRAY_SZ);
274 memcpy(&get_paca()->mm_ctx_high_slices_psize, 274 memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
275 &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); 275 TASK_SLICE_ARRAY_SZ(context));
276#else /* CONFIG_PPC_MM_SLICES */ 276#else /* CONFIG_PPC_MM_SLICES */
277 get_paca()->mm_ctx_user_psize = context->user_psize; 277 get_paca()->mm_ctx_user_psize = context->user_psize;
278 get_paca()->mm_ctx_sllp = context->sllp; 278 get_paca()->mm_ctx_sllp = context->sllp;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dd9e0d5386ee..87da40129927 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -67,6 +67,7 @@
67#include <asm/cpu_has_feature.h> 67#include <asm/cpu_has_feature.h>
68#include <asm/asm-prototypes.h> 68#include <asm/asm-prototypes.h>
69#include <asm/stacktrace.h> 69#include <asm/stacktrace.h>
70#include <asm/hw_breakpoint.h>
70 71
71#include <linux/kprobes.h> 72#include <linux/kprobes.h>
72#include <linux/kdebug.h> 73#include <linux/kdebug.h>
@@ -133,7 +134,8 @@ static int __init enable_strict_msr_control(char *str)
133} 134}
134early_param("ppc_strict_facility_enable", enable_strict_msr_control); 135early_param("ppc_strict_facility_enable", enable_strict_msr_control);
135 136
136unsigned long msr_check_and_set(unsigned long bits) 137/* notrace because it's called by restore_math */
138unsigned long notrace msr_check_and_set(unsigned long bits)
137{ 139{
138 unsigned long oldmsr = mfmsr(); 140 unsigned long oldmsr = mfmsr();
139 unsigned long newmsr; 141 unsigned long newmsr;
@@ -152,7 +154,8 @@ unsigned long msr_check_and_set(unsigned long bits)
152} 154}
153EXPORT_SYMBOL_GPL(msr_check_and_set); 155EXPORT_SYMBOL_GPL(msr_check_and_set);
154 156
155void __msr_check_and_clear(unsigned long bits) 157/* notrace because it's called by restore_math */
158void notrace __msr_check_and_clear(unsigned long bits)
156{ 159{
157 unsigned long oldmsr = mfmsr(); 160 unsigned long oldmsr = mfmsr();
158 unsigned long newmsr; 161 unsigned long newmsr;
@@ -525,7 +528,17 @@ void giveup_all(struct task_struct *tsk)
525} 528}
526EXPORT_SYMBOL(giveup_all); 529EXPORT_SYMBOL(giveup_all);
527 530
528void restore_math(struct pt_regs *regs) 531/*
532 * The exception exit path calls restore_math() with interrupts hard disabled
533 * but the soft irq state not "reconciled". ftrace code that calls
534 * local_irq_save/restore causes warnings.
535 *
536 * Rather than complicate the exit path, just don't trace restore_math. This
537 * could be done by having ftrace entry code check for this un-reconciled
538 * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and
539 * temporarily fix it up for the duration of the ftrace call.
540 */
541void notrace restore_math(struct pt_regs *regs)
529{ 542{
530 unsigned long msr; 543 unsigned long msr;
531 544
@@ -784,7 +797,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
784 return __set_dabr(dabr, dabrx); 797 return __set_dabr(dabr, dabrx);
785} 798}
786 799
787static inline int set_dawr(struct arch_hw_breakpoint *brk) 800int set_dawr(struct arch_hw_breakpoint *brk)
788{ 801{
789 unsigned long dawr, dawrx, mrd; 802 unsigned long dawr, dawrx, mrd;
790 803
@@ -816,7 +829,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
816{ 829{
817 memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk)); 830 memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
818 831
819 if (cpu_has_feature(CPU_FTR_DAWR)) 832 if (dawr_enabled())
820 // Power8 or later 833 // Power8 or later
821 set_dawr(brk); 834 set_dawr(brk);
822 else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) 835 else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -830,8 +843,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
830/* Check if we have DAWR or DABR hardware */ 843/* Check if we have DAWR or DABR hardware */
831bool ppc_breakpoint_available(void) 844bool ppc_breakpoint_available(void)
832{ 845{
833 if (cpu_has_feature(CPU_FTR_DAWR)) 846 if (dawr_enabled())
834 return true; /* POWER8 DAWR */ 847 return true; /* POWER8 DAWR or POWER9 forced DAWR */
835 if (cpu_has_feature(CPU_FTR_ARCH_207S)) 848 if (cpu_has_feature(CPU_FTR_ARCH_207S))
836 return false; /* POWER9 with DAWR disabled */ 849 return false; /* POWER9 with DAWR disabled */
837 /* DABR: Everything but POWER8 and POWER9 */ 850 /* DABR: Everything but POWER8 and POWER9 */
@@ -1151,11 +1164,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
1151 thread_pkey_regs_restore(new_thread, old_thread); 1164 thread_pkey_regs_restore(new_thread, old_thread);
1152} 1165}
1153 1166
1154#ifdef CONFIG_PPC_BOOK3S_64
1155#define CP_SIZE 128
1156static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE)));
1157#endif
1158
1159struct task_struct *__switch_to(struct task_struct *prev, 1167struct task_struct *__switch_to(struct task_struct *prev,
1160 struct task_struct *new) 1168 struct task_struct *new)
1161{ 1169{
@@ -1729,7 +1737,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
1729 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ 1737 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
1730 1738
1731#ifdef CONFIG_PPC_BOOK3S_64 1739#ifdef CONFIG_PPC_BOOK3S_64
1732 preload_new_slb_context(start, sp); 1740 if (!radix_enabled())
1741 preload_new_slb_context(start, sp);
1733#endif 1742#endif
1734#endif 1743#endif
1735 1744
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f33ff4163a51..523bb99d7676 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -154,10 +154,8 @@ static struct prom_t __prombss prom;
154 154
155static unsigned long __prombss prom_entry; 155static unsigned long __prombss prom_entry;
156 156
157#define PROM_SCRATCH_SIZE 256
158
159static char __prombss of_stdout_device[256]; 157static char __prombss of_stdout_device[256];
160static char __prombss prom_scratch[PROM_SCRATCH_SIZE]; 158static char __prombss prom_scratch[256];
161 159
162static unsigned long __prombss dt_header_start; 160static unsigned long __prombss dt_header_start;
163static unsigned long __prombss dt_struct_start, dt_struct_end; 161static unsigned long __prombss dt_struct_start, dt_struct_end;
@@ -224,6 +222,135 @@ static bool __prombss rtas_has_query_cpu_stopped;
224#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) 222#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR)
225#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) 223#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR)
226 224
225/* Copied from lib/string.c and lib/kstrtox.c */
226
227static int __init prom_strcmp(const char *cs, const char *ct)
228{
229 unsigned char c1, c2;
230
231 while (1) {
232 c1 = *cs++;
233 c2 = *ct++;
234 if (c1 != c2)
235 return c1 < c2 ? -1 : 1;
236 if (!c1)
237 break;
238 }
239 return 0;
240}
241
242static char __init *prom_strcpy(char *dest, const char *src)
243{
244 char *tmp = dest;
245
246 while ((*dest++ = *src++) != '\0')
247 /* nothing */;
248 return tmp;
249}
250
251static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
252{
253 unsigned char c1, c2;
254
255 while (count) {
256 c1 = *cs++;
257 c2 = *ct++;
258 if (c1 != c2)
259 return c1 < c2 ? -1 : 1;
260 if (!c1)
261 break;
262 count--;
263 }
264 return 0;
265}
266
267static size_t __init prom_strlen(const char *s)
268{
269 const char *sc;
270
271 for (sc = s; *sc != '\0'; ++sc)
272 /* nothing */;
273 return sc - s;
274}
275
276static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
277{
278 const unsigned char *su1, *su2;
279 int res = 0;
280
281 for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
282 if ((res = *su1 - *su2) != 0)
283 break;
284 return res;
285}
286
287static char __init *prom_strstr(const char *s1, const char *s2)
288{
289 size_t l1, l2;
290
291 l2 = prom_strlen(s2);
292 if (!l2)
293 return (char *)s1;
294 l1 = prom_strlen(s1);
295 while (l1 >= l2) {
296 l1--;
297 if (!prom_memcmp(s1, s2, l2))
298 return (char *)s1;
299 s1++;
300 }
301 return NULL;
302}
303
304static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
305{
306 size_t ret = prom_strlen(src);
307
308 if (size) {
309 size_t len = (ret >= size) ? size - 1 : ret;
310 memcpy(dest, src, len);
311 dest[len] = '\0';
312 }
313 return ret;
314}
315
316#ifdef CONFIG_PPC_PSERIES
317static int __init prom_strtobool(const char *s, bool *res)
318{
319 if (!s)
320 return -EINVAL;
321
322 switch (s[0]) {
323 case 'y':
324 case 'Y':
325 case '1':
326 *res = true;
327 return 0;
328 case 'n':
329 case 'N':
330 case '0':
331 *res = false;
332 return 0;
333 case 'o':
334 case 'O':
335 switch (s[1]) {
336 case 'n':
337 case 'N':
338 *res = true;
339 return 0;
340 case 'f':
341 case 'F':
342 *res = false;
343 return 0;
344 default:
345 break;
346 }
347 default:
348 break;
349 }
350
351 return -EINVAL;
352}
353#endif
227 354
228/* This is the one and *ONLY* place where we actually call open 355/* This is the one and *ONLY* place where we actually call open
229 * firmware. 356 * firmware.
@@ -555,7 +682,7 @@ static int __init prom_setprop(phandle node, const char *nodename,
555 add_string(&p, tohex((u32)(unsigned long) value)); 682 add_string(&p, tohex((u32)(unsigned long) value));
556 add_string(&p, tohex(valuelen)); 683 add_string(&p, tohex(valuelen));
557 add_string(&p, tohex(ADDR(pname))); 684 add_string(&p, tohex(ADDR(pname)));
558 add_string(&p, tohex(strlen(pname))); 685 add_string(&p, tohex(prom_strlen(pname)));
559 add_string(&p, "property"); 686 add_string(&p, "property");
560 *p = 0; 687 *p = 0;
561 return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); 688 return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
@@ -631,33 +758,30 @@ static void __init early_cmdline_parse(void)
631 const char *opt; 758 const char *opt;
632 759
633 char *p; 760 char *p;
634 int l __maybe_unused = 0; 761 int l = 0;
635 762
636 prom_cmd_line[0] = 0; 763 prom_cmd_line[0] = 0;
637 p = prom_cmd_line; 764 p = prom_cmd_line;
638 if ((long)prom.chosen > 0) 765 if ((long)prom.chosen > 0)
639 l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1); 766 l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
640#ifdef CONFIG_CMDLINE 767 if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
641 if (l <= 0 || p[0] == '\0') /* dbl check */ 768 prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
642 strlcpy(prom_cmd_line,
643 CONFIG_CMDLINE, sizeof(prom_cmd_line));
644#endif /* CONFIG_CMDLINE */
645 prom_printf("command line: %s\n", prom_cmd_line); 769 prom_printf("command line: %s\n", prom_cmd_line);
646 770
647#ifdef CONFIG_PPC64 771#ifdef CONFIG_PPC64
648 opt = strstr(prom_cmd_line, "iommu="); 772 opt = prom_strstr(prom_cmd_line, "iommu=");
649 if (opt) { 773 if (opt) {
650 prom_printf("iommu opt is: %s\n", opt); 774 prom_printf("iommu opt is: %s\n", opt);
651 opt += 6; 775 opt += 6;
652 while (*opt && *opt == ' ') 776 while (*opt && *opt == ' ')
653 opt++; 777 opt++;
654 if (!strncmp(opt, "off", 3)) 778 if (!prom_strncmp(opt, "off", 3))
655 prom_iommu_off = 1; 779 prom_iommu_off = 1;
656 else if (!strncmp(opt, "force", 5)) 780 else if (!prom_strncmp(opt, "force", 5))
657 prom_iommu_force_on = 1; 781 prom_iommu_force_on = 1;
658 } 782 }
659#endif 783#endif
660 opt = strstr(prom_cmd_line, "mem="); 784 opt = prom_strstr(prom_cmd_line, "mem=");
661 if (opt) { 785 if (opt) {
662 opt += 4; 786 opt += 4;
663 prom_memory_limit = prom_memparse(opt, (const char **)&opt); 787 prom_memory_limit = prom_memparse(opt, (const char **)&opt);
@@ -669,13 +793,13 @@ static void __init early_cmdline_parse(void)
669 793
670#ifdef CONFIG_PPC_PSERIES 794#ifdef CONFIG_PPC_PSERIES
671 prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); 795 prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
672 opt = strstr(prom_cmd_line, "disable_radix"); 796 opt = prom_strstr(prom_cmd_line, "disable_radix");
673 if (opt) { 797 if (opt) {
674 opt += 13; 798 opt += 13;
675 if (*opt && *opt == '=') { 799 if (*opt && *opt == '=') {
676 bool val; 800 bool val;
677 801
678 if (kstrtobool(++opt, &val)) 802 if (prom_strtobool(++opt, &val))
679 prom_radix_disable = false; 803 prom_radix_disable = false;
680 else 804 else
681 prom_radix_disable = val; 805 prom_radix_disable = val;
@@ -1028,7 +1152,7 @@ static int __init prom_count_smt_threads(void)
1028 type[0] = 0; 1152 type[0] = 0;
1029 prom_getprop(node, "device_type", type, sizeof(type)); 1153 prom_getprop(node, "device_type", type, sizeof(type));
1030 1154
1031 if (strcmp(type, "cpu")) 1155 if (prom_strcmp(type, "cpu"))
1032 continue; 1156 continue;
1033 /* 1157 /*
1034 * There is an entry for each smt thread, each entry being 1158 * There is an entry for each smt thread, each entry being
@@ -1138,8 +1262,14 @@ static void __init prom_check_platform_support(void)
1138 int prop_len = prom_getproplen(prom.chosen, 1262 int prop_len = prom_getproplen(prom.chosen,
1139 "ibm,arch-vec-5-platform-support"); 1263 "ibm,arch-vec-5-platform-support");
1140 1264
1141 /* First copy the architecture vec template */ 1265 /*
1142 ibm_architecture_vec = ibm_architecture_vec_template; 1266 * First copy the architecture vec template
1267 *
1268 * use memcpy() instead of *vec = *vec_template so that GCC replaces it
1269 * by __memcpy() when KASAN is active
1270 */
1271 memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
1272 sizeof(ibm_architecture_vec));
1143 1273
1144 if (prop_len > 1) { 1274 if (prop_len > 1) {
1145 int i; 1275 int i;
@@ -1475,7 +1605,7 @@ static void __init prom_init_mem(void)
1475 */ 1605 */
1476 prom_getprop(node, "name", type, sizeof(type)); 1606 prom_getprop(node, "name", type, sizeof(type));
1477 } 1607 }
1478 if (strcmp(type, "memory")) 1608 if (prom_strcmp(type, "memory"))
1479 continue; 1609 continue;
1480 1610
1481 plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf)); 1611 plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf));
@@ -1487,8 +1617,8 @@ static void __init prom_init_mem(void)
1487 endp = p + (plen / sizeof(cell_t)); 1617 endp = p + (plen / sizeof(cell_t));
1488 1618
1489#ifdef DEBUG_PROM 1619#ifdef DEBUG_PROM
1490 memset(path, 0, PROM_SCRATCH_SIZE); 1620 memset(path, 0, sizeof(prom_scratch));
1491 call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); 1621 call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
1492 prom_debug(" node %s :\n", path); 1622 prom_debug(" node %s :\n", path);
1493#endif /* DEBUG_PROM */ 1623#endif /* DEBUG_PROM */
1494 1624
@@ -1756,19 +1886,19 @@ static void __init prom_initialize_tce_table(void)
1756 prom_getprop(node, "device_type", type, sizeof(type)); 1886 prom_getprop(node, "device_type", type, sizeof(type));
1757 prom_getprop(node, "model", model, sizeof(model)); 1887 prom_getprop(node, "model", model, sizeof(model));
1758 1888
1759 if ((type[0] == 0) || (strstr(type, "pci") == NULL)) 1889 if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL))
1760 continue; 1890 continue;
1761 1891
1762 /* Keep the old logic intact to avoid regression. */ 1892 /* Keep the old logic intact to avoid regression. */
1763 if (compatible[0] != 0) { 1893 if (compatible[0] != 0) {
1764 if ((strstr(compatible, "python") == NULL) && 1894 if ((prom_strstr(compatible, "python") == NULL) &&
1765 (strstr(compatible, "Speedwagon") == NULL) && 1895 (prom_strstr(compatible, "Speedwagon") == NULL) &&
1766 (strstr(compatible, "Winnipeg") == NULL)) 1896 (prom_strstr(compatible, "Winnipeg") == NULL))
1767 continue; 1897 continue;
1768 } else if (model[0] != 0) { 1898 } else if (model[0] != 0) {
1769 if ((strstr(model, "ython") == NULL) && 1899 if ((prom_strstr(model, "ython") == NULL) &&
1770 (strstr(model, "peedwagon") == NULL) && 1900 (prom_strstr(model, "peedwagon") == NULL) &&
1771 (strstr(model, "innipeg") == NULL)) 1901 (prom_strstr(model, "innipeg") == NULL))
1772 continue; 1902 continue;
1773 } 1903 }
1774 1904
@@ -1796,10 +1926,10 @@ static void __init prom_initialize_tce_table(void)
1796 local_alloc_bottom = base; 1926 local_alloc_bottom = base;
1797 1927
1798 /* It seems OF doesn't null-terminate the path :-( */ 1928 /* It seems OF doesn't null-terminate the path :-( */
1799 memset(path, 0, PROM_SCRATCH_SIZE); 1929 memset(path, 0, sizeof(prom_scratch));
1800 /* Call OF to setup the TCE hardware */ 1930 /* Call OF to setup the TCE hardware */
1801 if (call_prom("package-to-path", 3, 1, node, 1931 if (call_prom("package-to-path", 3, 1, node,
1802 path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { 1932 path, sizeof(prom_scratch) - 1) == PROM_ERROR) {
1803 prom_printf("package-to-path failed\n"); 1933 prom_printf("package-to-path failed\n");
1804 } 1934 }
1805 1935
@@ -1917,12 +2047,12 @@ static void __init prom_hold_cpus(void)
1917 2047
1918 type[0] = 0; 2048 type[0] = 0;
1919 prom_getprop(node, "device_type", type, sizeof(type)); 2049 prom_getprop(node, "device_type", type, sizeof(type));
1920 if (strcmp(type, "cpu") != 0) 2050 if (prom_strcmp(type, "cpu") != 0)
1921 continue; 2051 continue;
1922 2052
1923 /* Skip non-configured cpus. */ 2053 /* Skip non-configured cpus. */
1924 if (prom_getprop(node, "status", type, sizeof(type)) > 0) 2054 if (prom_getprop(node, "status", type, sizeof(type)) > 0)
1925 if (strcmp(type, "okay") != 0) 2055 if (prom_strcmp(type, "okay") != 0)
1926 continue; 2056 continue;
1927 2057
1928 reg = cpu_to_be32(-1); /* make sparse happy */ 2058 reg = cpu_to_be32(-1); /* make sparse happy */
@@ -1998,9 +2128,9 @@ static void __init prom_find_mmu(void)
1998 return; 2128 return;
1999 version[sizeof(version) - 1] = 0; 2129 version[sizeof(version) - 1] = 0;
2000 /* XXX might need to add other versions here */ 2130 /* XXX might need to add other versions here */
2001 if (strcmp(version, "Open Firmware, 1.0.5") == 0) 2131 if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0)
2002 of_workarounds = OF_WA_CLAIM; 2132 of_workarounds = OF_WA_CLAIM;
2003 else if (strncmp(version, "FirmWorks,3.", 12) == 0) { 2133 else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) {
2004 of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL; 2134 of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
2005 call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim"); 2135 call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
2006 } else 2136 } else
@@ -2033,7 +2163,7 @@ static void __init prom_init_stdout(void)
2033 call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); 2163 call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
2034 prom_printf("OF stdout device is: %s\n", of_stdout_device); 2164 prom_printf("OF stdout device is: %s\n", of_stdout_device);
2035 prom_setprop(prom.chosen, "/chosen", "linux,stdout-path", 2165 prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
2036 path, strlen(path) + 1); 2166 path, prom_strlen(path) + 1);
2037 2167
2038 /* instance-to-package fails on PA-Semi */ 2168 /* instance-to-package fails on PA-Semi */
2039 stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); 2169 stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
@@ -2043,7 +2173,7 @@ static void __init prom_init_stdout(void)
2043 /* If it's a display, note it */ 2173 /* If it's a display, note it */
2044 memset(type, 0, sizeof(type)); 2174 memset(type, 0, sizeof(type));
2045 prom_getprop(stdout_node, "device_type", type, sizeof(type)); 2175 prom_getprop(stdout_node, "device_type", type, sizeof(type));
2046 if (strcmp(type, "display") == 0) 2176 if (prom_strcmp(type, "display") == 0)
2047 prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); 2177 prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
2048 } 2178 }
2049} 2179}
@@ -2064,19 +2194,19 @@ static int __init prom_find_machine_type(void)
2064 compat[len] = 0; 2194 compat[len] = 0;
2065 while (i < len) { 2195 while (i < len) {
2066 char *p = &compat[i]; 2196 char *p = &compat[i];
2067 int sl = strlen(p); 2197 int sl = prom_strlen(p);
2068 if (sl == 0) 2198 if (sl == 0)
2069 break; 2199 break;
2070 if (strstr(p, "Power Macintosh") || 2200 if (prom_strstr(p, "Power Macintosh") ||
2071 strstr(p, "MacRISC")) 2201 prom_strstr(p, "MacRISC"))
2072 return PLATFORM_POWERMAC; 2202 return PLATFORM_POWERMAC;
2073#ifdef CONFIG_PPC64 2203#ifdef CONFIG_PPC64
2074 /* We must make sure we don't detect the IBM Cell 2204 /* We must make sure we don't detect the IBM Cell
2075 * blades as pSeries due to some firmware issues, 2205 * blades as pSeries due to some firmware issues,
2076 * so we do it here. 2206 * so we do it here.
2077 */ 2207 */
2078 if (strstr(p, "IBM,CBEA") || 2208 if (prom_strstr(p, "IBM,CBEA") ||
2079 strstr(p, "IBM,CPBW-1.0")) 2209 prom_strstr(p, "IBM,CPBW-1.0"))
2080 return PLATFORM_GENERIC; 2210 return PLATFORM_GENERIC;
2081#endif /* CONFIG_PPC64 */ 2211#endif /* CONFIG_PPC64 */
2082 i += sl + 1; 2212 i += sl + 1;
@@ -2093,7 +2223,7 @@ static int __init prom_find_machine_type(void)
2093 compat, sizeof(compat)-1); 2223 compat, sizeof(compat)-1);
2094 if (len <= 0) 2224 if (len <= 0)
2095 return PLATFORM_GENERIC; 2225 return PLATFORM_GENERIC;
2096 if (strcmp(compat, "chrp")) 2226 if (prom_strcmp(compat, "chrp"))
2097 return PLATFORM_GENERIC; 2227 return PLATFORM_GENERIC;
2098 2228
2099 /* Default to pSeries. We need to know if we are running LPAR */ 2229 /* Default to pSeries. We need to know if we are running LPAR */
@@ -2155,19 +2285,19 @@ static void __init prom_check_displays(void)
2155 for (node = 0; prom_next_node(&node); ) { 2285 for (node = 0; prom_next_node(&node); ) {
2156 memset(type, 0, sizeof(type)); 2286 memset(type, 0, sizeof(type));
2157 prom_getprop(node, "device_type", type, sizeof(type)); 2287 prom_getprop(node, "device_type", type, sizeof(type));
2158 if (strcmp(type, "display") != 0) 2288 if (prom_strcmp(type, "display") != 0)
2159 continue; 2289 continue;
2160 2290
2161 /* It seems OF doesn't null-terminate the path :-( */ 2291 /* It seems OF doesn't null-terminate the path :-( */
2162 path = prom_scratch; 2292 path = prom_scratch;
2163 memset(path, 0, PROM_SCRATCH_SIZE); 2293 memset(path, 0, sizeof(prom_scratch));
2164 2294
2165 /* 2295 /*
2166 * leave some room at the end of the path for appending extra 2296 * leave some room at the end of the path for appending extra
2167 * arguments 2297 * arguments
2168 */ 2298 */
2169 if (call_prom("package-to-path", 3, 1, node, path, 2299 if (call_prom("package-to-path", 3, 1, node, path,
2170 PROM_SCRATCH_SIZE-10) == PROM_ERROR) 2300 sizeof(prom_scratch) - 10) == PROM_ERROR)
2171 continue; 2301 continue;
2172 prom_printf("found display : %s, opening... ", path); 2302 prom_printf("found display : %s, opening... ", path);
2173 2303
@@ -2259,9 +2389,9 @@ static unsigned long __init dt_find_string(char *str)
2259 s = os = (char *)dt_string_start; 2389 s = os = (char *)dt_string_start;
2260 s += 4; 2390 s += 4;
2261 while (s < (char *)dt_string_end) { 2391 while (s < (char *)dt_string_end) {
2262 if (strcmp(s, str) == 0) 2392 if (prom_strcmp(s, str) == 0)
2263 return s - os; 2393 return s - os;
2264 s += strlen(s) + 1; 2394 s += prom_strlen(s) + 1;
2265 } 2395 }
2266 return 0; 2396 return 0;
2267} 2397}
@@ -2294,7 +2424,7 @@ static void __init scan_dt_build_strings(phandle node,
2294 } 2424 }
2295 2425
2296 /* skip "name" */ 2426 /* skip "name" */
2297 if (strcmp(namep, "name") == 0) { 2427 if (prom_strcmp(namep, "name") == 0) {
2298 *mem_start = (unsigned long)namep; 2428 *mem_start = (unsigned long)namep;
2299 prev_name = "name"; 2429 prev_name = "name";
2300 continue; 2430 continue;
@@ -2306,7 +2436,7 @@ static void __init scan_dt_build_strings(phandle node,
2306 namep = sstart + soff; 2436 namep = sstart + soff;
2307 } else { 2437 } else {
2308 /* Trim off some if we can */ 2438 /* Trim off some if we can */
2309 *mem_start = (unsigned long)namep + strlen(namep) + 1; 2439 *mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
2310 dt_string_end = *mem_start; 2440 dt_string_end = *mem_start;
2311 } 2441 }
2312 prev_name = namep; 2442 prev_name = namep;
@@ -2363,8 +2493,8 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
2363 2493
2364 /* get it again for debugging */ 2494 /* get it again for debugging */
2365 path = prom_scratch; 2495 path = prom_scratch;
2366 memset(path, 0, PROM_SCRATCH_SIZE); 2496 memset(path, 0, sizeof(prom_scratch));
2367 call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); 2497 call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
2368 2498
2369 /* get and store all properties */ 2499 /* get and store all properties */
2370 prev_name = ""; 2500 prev_name = "";
@@ -2375,7 +2505,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
2375 break; 2505 break;
2376 2506
2377 /* skip "name" */ 2507 /* skip "name" */
2378 if (strcmp(pname, "name") == 0) { 2508 if (prom_strcmp(pname, "name") == 0) {
2379 prev_name = "name"; 2509 prev_name = "name";
2380 continue; 2510 continue;
2381 } 2511 }
@@ -2406,7 +2536,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
2406 call_prom("getprop", 4, 1, node, pname, valp, l); 2536 call_prom("getprop", 4, 1, node, pname, valp, l);
2407 *mem_start = _ALIGN(*mem_start, 4); 2537 *mem_start = _ALIGN(*mem_start, 4);
2408 2538
2409 if (!strcmp(pname, "phandle")) 2539 if (!prom_strcmp(pname, "phandle"))
2410 has_phandle = 1; 2540 has_phandle = 1;
2411 } 2541 }
2412 2542
@@ -2476,8 +2606,8 @@ static void __init flatten_device_tree(void)
2476 2606
2477 /* Add "phandle" in there, we'll need it */ 2607 /* Add "phandle" in there, we'll need it */
2478 namep = make_room(&mem_start, &mem_end, 16, 1); 2608 namep = make_room(&mem_start, &mem_end, 16, 1);
2479 strcpy(namep, "phandle"); 2609 prom_strcpy(namep, "phandle");
2480 mem_start = (unsigned long)namep + strlen(namep) + 1; 2610 mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
2481 2611
2482 /* Build string array */ 2612 /* Build string array */
2483 prom_printf("Building dt strings...\n"); 2613 prom_printf("Building dt strings...\n");
@@ -2799,7 +2929,7 @@ static void __init fixup_device_tree_efika(void)
2799 rv = prom_getprop(node, "model", prop, sizeof(prop)); 2929 rv = prom_getprop(node, "model", prop, sizeof(prop));
2800 if (rv == PROM_ERROR) 2930 if (rv == PROM_ERROR)
2801 return; 2931 return;
2802 if (strcmp(prop, "EFIKA5K2")) 2932 if (prom_strcmp(prop, "EFIKA5K2"))
2803 return; 2933 return;
2804 2934
2805 prom_printf("Applying EFIKA device tree fixups\n"); 2935 prom_printf("Applying EFIKA device tree fixups\n");
@@ -2807,13 +2937,13 @@ static void __init fixup_device_tree_efika(void)
2807 /* Claiming to be 'chrp' is death */ 2937 /* Claiming to be 'chrp' is death */
2808 node = call_prom("finddevice", 1, 1, ADDR("/")); 2938 node = call_prom("finddevice", 1, 1, ADDR("/"));
2809 rv = prom_getprop(node, "device_type", prop, sizeof(prop)); 2939 rv = prom_getprop(node, "device_type", prop, sizeof(prop));
2810 if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0)) 2940 if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0))
2811 prom_setprop(node, "/", "device_type", "efika", sizeof("efika")); 2941 prom_setprop(node, "/", "device_type", "efika", sizeof("efika"));
2812 2942
2813 /* CODEGEN,description is exposed in /proc/cpuinfo so 2943 /* CODEGEN,description is exposed in /proc/cpuinfo so
2814 fix that too */ 2944 fix that too */
2815 rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop)); 2945 rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop));
2816 if (rv != PROM_ERROR && (strstr(prop, "CHRP"))) 2946 if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP")))
2817 prom_setprop(node, "/", "CODEGEN,description", 2947 prom_setprop(node, "/", "CODEGEN,description",
2818 "Efika 5200B PowerPC System", 2948 "Efika 5200B PowerPC System",
2819 sizeof("Efika 5200B PowerPC System")); 2949 sizeof("Efika 5200B PowerPC System"));
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 667df97d2595..4cac45cb5de5 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -16,10 +16,18 @@
16# If you really need to reference something from prom_init.o add 16# If you really need to reference something from prom_init.o add
17# it to the list below: 17# it to the list below:
18 18
19grep "^CONFIG_KASAN=y$" .config >/dev/null
20if [ $? -eq 0 ]
21then
22 MEM_FUNCS="__memcpy __memset"
23else
24 MEM_FUNCS="memcpy memset"
25fi
26
19WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush 27WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
20_end enter_prom memcpy memset reloc_offset __secondary_hold 28_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
21__secondary_hold_acknowledge __secondary_hold_spinloop __start 29__secondary_hold_acknowledge __secondary_hold_spinloop __start
22strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224 30logo_linux_clut224
23reloc_got2 kernstart_addr memstart_addr linux_banner _stext 31reloc_got2 kernstart_addr memstart_addr linux_banner _stext
24__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." 32__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
25 33
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index d9ac7d94656e..684b0b315c32 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -43,6 +43,7 @@
43#include <asm/tm.h> 43#include <asm/tm.h>
44#include <asm/asm-prototypes.h> 44#include <asm/asm-prototypes.h>
45#include <asm/debug.h> 45#include <asm/debug.h>
46#include <asm/hw_breakpoint.h>
46 47
47#define CREATE_TRACE_POINTS 48#define CREATE_TRACE_POINTS
48#include <trace/events/syscalls.h> 49#include <trace/events/syscalls.h>
@@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request,
3088 dbginfo.sizeof_condition = 0; 3089 dbginfo.sizeof_condition = 0;
3089#ifdef CONFIG_HAVE_HW_BREAKPOINT 3090#ifdef CONFIG_HAVE_HW_BREAKPOINT
3090 dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; 3091 dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
3091 if (cpu_has_feature(CPU_FTR_DAWR)) 3092 if (dawr_enabled())
3092 dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; 3093 dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
3093#else 3094#else
3094 dbginfo.features = 0; 3095 dbginfo.features = 0;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 70568ccbd9fd..e1c9cf079503 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -104,6 +104,14 @@ static __init int barrier_nospec_debugfs_init(void)
104 return 0; 104 return 0;
105} 105}
106device_initcall(barrier_nospec_debugfs_init); 106device_initcall(barrier_nospec_debugfs_init);
107
108static __init int security_feature_debugfs_init(void)
109{
110 debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
111 (u64 *)&powerpc_security_features);
112 return 0;
113}
114device_initcall(security_feature_debugfs_init);
107#endif /* CONFIG_DEBUG_FS */ 115#endif /* CONFIG_DEBUG_FS */
108 116
109#ifdef CONFIG_PPC_FSL_BOOK3E 117#ifdef CONFIG_PPC_FSL_BOOK3E
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2e5dfb6e0823..aad9f5df6ab6 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -67,6 +67,7 @@
67#include <asm/livepatch.h> 67#include <asm/livepatch.h>
68#include <asm/mmu_context.h> 68#include <asm/mmu_context.h>
69#include <asm/cpu_has_feature.h> 69#include <asm/cpu_has_feature.h>
70#include <asm/kasan.h>
70 71
71#include "setup.h" 72#include "setup.h"
72 73
@@ -133,13 +134,11 @@ int crashing_cpu = -1;
133/* also used by kexec */ 134/* also used by kexec */
134void machine_shutdown(void) 135void machine_shutdown(void)
135{ 136{
136#ifdef CONFIG_FA_DUMP
137 /* 137 /*
138 * if fadump is active, cleanup the fadump registration before we 138 * if fadump is active, cleanup the fadump registration before we
139 * shutdown. 139 * shutdown.
140 */ 140 */
141 fadump_cleanup(); 141 fadump_cleanup();
142#endif
143 142
144 if (ppc_md.machine_shutdown) 143 if (ppc_md.machine_shutdown)
145 ppc_md.machine_shutdown(); 144 ppc_md.machine_shutdown();
@@ -200,14 +199,15 @@ static void show_cpuinfo_summary(struct seq_file *m)
200{ 199{
201 struct device_node *root; 200 struct device_node *root;
202 const char *model = NULL; 201 const char *model = NULL;
203#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
204 unsigned long bogosum = 0; 202 unsigned long bogosum = 0;
205 int i; 203 int i;
206 for_each_online_cpu(i) 204
207 bogosum += loops_per_jiffy; 205 if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) {
208 seq_printf(m, "total bogomips\t: %lu.%02lu\n", 206 for_each_online_cpu(i)
209 bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); 207 bogosum += loops_per_jiffy;
210#endif /* CONFIG_SMP && CONFIG_PPC32 */ 208 seq_printf(m, "total bogomips\t: %lu.%02lu\n",
209 bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100);
210 }
211 seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); 211 seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
212 if (ppc_md.name) 212 if (ppc_md.name)
213 seq_printf(m, "platform\t: %s\n", ppc_md.name); 213 seq_printf(m, "platform\t: %s\n", ppc_md.name);
@@ -221,11 +221,10 @@ static void show_cpuinfo_summary(struct seq_file *m)
221 if (ppc_md.show_cpuinfo != NULL) 221 if (ppc_md.show_cpuinfo != NULL)
222 ppc_md.show_cpuinfo(m); 222 ppc_md.show_cpuinfo(m);
223 223
224#ifdef CONFIG_PPC32
225 /* Display the amount of memory */ 224 /* Display the amount of memory */
226 seq_printf(m, "Memory\t\t: %d MB\n", 225 if (IS_ENABLED(CONFIG_PPC32))
227 (unsigned int)(total_memory / (1024 * 1024))); 226 seq_printf(m, "Memory\t\t: %d MB\n",
228#endif 227 (unsigned int)(total_memory / (1024 * 1024)));
229} 228}
230 229
231static int show_cpuinfo(struct seq_file *m, void *v) 230static int show_cpuinfo(struct seq_file *m, void *v)
@@ -252,26 +251,24 @@ static int show_cpuinfo(struct seq_file *m, void *v)
252 else 251 else
253 seq_printf(m, "unknown (%08x)", pvr); 252 seq_printf(m, "unknown (%08x)", pvr);
254 253
255#ifdef CONFIG_ALTIVEC
256 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 254 if (cpu_has_feature(CPU_FTR_ALTIVEC))
257 seq_printf(m, ", altivec supported"); 255 seq_printf(m, ", altivec supported");
258#endif /* CONFIG_ALTIVEC */
259 256
260 seq_printf(m, "\n"); 257 seq_printf(m, "\n");
261 258
262#ifdef CONFIG_TAU 259#ifdef CONFIG_TAU
263 if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) { 260 if (cpu_has_feature(CPU_FTR_TAU)) {
264#ifdef CONFIG_TAU_AVERAGE 261 if (IS_ENABLED(CONFIG_TAU_AVERAGE)) {
265 /* more straightforward, but potentially misleading */ 262 /* more straightforward, but potentially misleading */
266 seq_printf(m, "temperature \t: %u C (uncalibrated)\n", 263 seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
267 cpu_temp(cpu_id)); 264 cpu_temp(cpu_id));
268#else 265 } else {
269 /* show the actual temp sensor range */ 266 /* show the actual temp sensor range */
270 u32 temp; 267 u32 temp;
271 temp = cpu_temp_both(cpu_id); 268 temp = cpu_temp_both(cpu_id);
272 seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", 269 seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
273 temp & 0xff, temp >> 16); 270 temp & 0xff, temp >> 16);
274#endif 271 }
275 } 272 }
276#endif /* CONFIG_TAU */ 273#endif /* CONFIG_TAU */
277 274
@@ -335,11 +332,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
335 seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", 332 seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
336 maj, min, PVR_VER(pvr), PVR_REV(pvr)); 333 maj, min, PVR_VER(pvr), PVR_REV(pvr));
337 334
338#ifdef CONFIG_PPC32 335 if (IS_ENABLED(CONFIG_PPC32))
339 seq_printf(m, "bogomips\t: %lu.%02lu\n", 336 seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
340 loops_per_jiffy / (500000/HZ), 337 (loops_per_jiffy / (5000 / HZ)) % 100);
341 (loops_per_jiffy / (5000/HZ)) % 100); 338
342#endif
343 seq_printf(m, "\n"); 339 seq_printf(m, "\n");
344 340
345 /* If this is the last cpu, print the summary */ 341 /* If this is the last cpu, print the summary */
@@ -401,8 +397,8 @@ void __init check_for_initrd(void)
401 397
402#ifdef CONFIG_SMP 398#ifdef CONFIG_SMP
403 399
404int threads_per_core, threads_per_subcore, threads_shift; 400int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
405cpumask_t threads_core_mask; 401cpumask_t threads_core_mask __read_mostly;
406EXPORT_SYMBOL_GPL(threads_per_core); 402EXPORT_SYMBOL_GPL(threads_per_core);
407EXPORT_SYMBOL_GPL(threads_per_subcore); 403EXPORT_SYMBOL_GPL(threads_per_subcore);
408EXPORT_SYMBOL_GPL(threads_shift); 404EXPORT_SYMBOL_GPL(threads_shift);
@@ -740,23 +736,19 @@ void __init setup_panic(void)
740 * BUG() in that case. 736 * BUG() in that case.
741 */ 737 */
742 738
743#ifdef CONFIG_NOT_COHERENT_CACHE 739#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE))
744#define KERNEL_COHERENCY 0
745#else
746#define KERNEL_COHERENCY 1
747#endif
748 740
749static int __init check_cache_coherency(void) 741static int __init check_cache_coherency(void)
750{ 742{
751 struct device_node *np; 743 struct device_node *np;
752 const void *prop; 744 const void *prop;
753 int devtree_coherency; 745 bool devtree_coherency;
754 746
755 np = of_find_node_by_path("/"); 747 np = of_find_node_by_path("/");
756 prop = of_get_property(np, "coherency-off", NULL); 748 prop = of_get_property(np, "coherency-off", NULL);
757 of_node_put(np); 749 of_node_put(np);
758 750
759 devtree_coherency = prop ? 0 : 1; 751 devtree_coherency = prop ? false : true;
760 752
761 if (devtree_coherency != KERNEL_COHERENCY) { 753 if (devtree_coherency != KERNEL_COHERENCY) {
762 printk(KERN_ERR 754 printk(KERN_ERR
@@ -799,12 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
799static __init void print_system_info(void) 791static __init void print_system_info(void)
800{ 792{
801 pr_info("-----------------------------------------------------\n"); 793 pr_info("-----------------------------------------------------\n");
802#ifdef CONFIG_PPC_BOOK3S_64
803 pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
804#endif
805#ifdef CONFIG_PPC_BOOK3S_32
806 pr_info("Hash_size = 0x%lx\n", Hash_size);
807#endif
808 pr_info("phys_mem_size = 0x%llx\n", 794 pr_info("phys_mem_size = 0x%llx\n",
809 (unsigned long long)memblock_phys_mem_size()); 795 (unsigned long long)memblock_phys_mem_size());
810 796
@@ -826,18 +812,7 @@ static __init void print_system_info(void)
826 pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); 812 pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
827#endif 813#endif
828 814
829#ifdef CONFIG_PPC_BOOK3S_64 815 print_system_hash_info();
830 if (htab_address)
831 pr_info("htab_address = 0x%p\n", htab_address);
832 if (htab_hash_mask)
833 pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
834#endif
835#ifdef CONFIG_PPC_BOOK3S_32
836 if (Hash)
837 pr_info("Hash = 0x%p\n", Hash);
838 if (Hash_mask)
839 pr_info("Hash_mask = 0x%lx\n", Hash_mask);
840#endif
841 816
842 if (PHYSICAL_START > 0) 817 if (PHYSICAL_START > 0)
843 pr_info("physical_start = 0x%llx\n", 818 pr_info("physical_start = 0x%llx\n",
@@ -868,6 +843,8 @@ static void smp_setup_pacas(void)
868 */ 843 */
869void __init setup_arch(char **cmdline_p) 844void __init setup_arch(char **cmdline_p)
870{ 845{
846 kasan_init();
847
871 *cmdline_p = boot_command_line; 848 *cmdline_p = boot_command_line;
872 849
873 /* Set a half-reasonable default so udelay does something sensible */ 850 /* Set a half-reasonable default so udelay does something sensible */
@@ -947,20 +924,7 @@ void __init setup_arch(char **cmdline_p)
947 init_mm.end_data = (unsigned long) _edata; 924 init_mm.end_data = (unsigned long) _edata;
948 init_mm.brk = klimit; 925 init_mm.brk = klimit;
949 926
950#ifdef CONFIG_PPC_MM_SLICES
951#ifdef CONFIG_PPC64
952 if (!radix_enabled())
953 init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
954#elif defined(CONFIG_PPC_8xx)
955 init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
956#else
957#error "context.addr_limit not initialized."
958#endif
959#endif
960
961#ifdef CONFIG_SPAPR_TCE_IOMMU
962 mm_iommu_init(&init_mm); 927 mm_iommu_init(&init_mm);
963#endif
964 irqstack_early_init(); 928 irqstack_early_init();
965 exc_lvl_early_init(); 929 exc_lvl_early_init();
966 emergency_stack_init(); 930 emergency_stack_init();
@@ -969,9 +933,9 @@ void __init setup_arch(char **cmdline_p)
969 933
970 early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); 934 early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
971 935
972#ifdef CONFIG_DUMMY_CONSOLE 936 if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
973 conswitchp = &dummy_con; 937 conswitchp = &dummy_con;
974#endif 938
975 if (ppc_md.setup_arch) 939 if (ppc_md.setup_arch)
976 ppc_md.setup_arch(); 940 ppc_md.setup_arch();
977 941
@@ -983,10 +947,8 @@ void __init setup_arch(char **cmdline_p)
983 /* Initialize the MMU context management stuff. */ 947 /* Initialize the MMU context management stuff. */
984 mmu_context_init(); 948 mmu_context_init();
985 949
986#ifdef CONFIG_PPC64
987 /* Interrupt code needs to be 64K-aligned. */ 950 /* Interrupt code needs to be 64K-aligned. */
988 if ((unsigned long)_stext & 0xffff) 951 if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff)
989 panic("Kernelbase not 64K-aligned (0x%lx)!\n", 952 panic("Kernelbase not 64K-aligned (0x%lx)!\n",
990 (unsigned long)_stext); 953 (unsigned long)_stext);
991#endif
992} 954}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 4a65e08a6042..3fb9f64f88fd 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -64,34 +64,6 @@ EXPORT_SYMBOL(DMA_MODE_READ);
64EXPORT_SYMBOL(DMA_MODE_WRITE); 64EXPORT_SYMBOL(DMA_MODE_WRITE);
65 65
66/* 66/*
67 * We're called here very early in the boot.
68 *
69 * Note that the kernel may be running at an address which is different
70 * from the address that it was linked at, so we must use RELOC/PTRRELOC
71 * to access static data (including strings). -- paulus
72 */
73notrace unsigned long __init early_init(unsigned long dt_ptr)
74{
75 unsigned long offset = reloc_offset();
76
77 /* First zero the BSS -- use memset_io, some platforms don't have
78 * caches on yet */
79 memset_io((void __iomem *)PTRRELOC(&__bss_start), 0,
80 __bss_stop - __bss_start);
81
82 /*
83 * Identify the CPU type and fix up code sections
84 * that depend on which cpu we have.
85 */
86 identify_cpu(offset, mfspr(SPRN_PVR));
87
88 apply_feature_fixups();
89
90 return KERNELBASE + offset;
91}
92
93
94/*
95 * This is run before start_kernel(), the kernel has been relocated 67 * This is run before start_kernel(), the kernel has been relocated
96 * and we are running with enough of the MMU enabled to have our 68 * and we are running with enough of the MMU enabled to have our
97 * proper kernel virtual addresses 69 * proper kernel virtual addresses
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4f49e1a3594c..a400854a5036 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -68,6 +68,7 @@
68#include <asm/cputhreads.h> 68#include <asm/cputhreads.h>
69#include <asm/hw_irq.h> 69#include <asm/hw_irq.h>
70#include <asm/feature-fixups.h> 70#include <asm/feature-fixups.h>
71#include <asm/kup.h>
71 72
72#include "setup.h" 73#include "setup.h"
73 74
@@ -331,6 +332,12 @@ void __init early_setup(unsigned long dt_ptr)
331 */ 332 */
332 configure_exceptions(); 333 configure_exceptions();
333 334
335 /*
336 * Configure Kernel Userspace Protection. This needs to happen before
337 * feature fixups for platforms that implement this using features.
338 */
339 setup_kup();
340
334 /* Apply all the dynamic patching */ 341 /* Apply all the dynamic patching */
335 apply_feature_fixups(); 342 apply_feature_fixups();
336 setup_feature_keys(); 343 setup_feature_keys();
@@ -383,6 +390,9 @@ void early_setup_secondary(void)
383 /* Initialize the hash table or TLB handling */ 390 /* Initialize the hash table or TLB handling */
384 early_init_mmu_secondary(); 391 early_init_mmu_secondary();
385 392
393 /* Perform any KUP setup that is per-cpu */
394 setup_kup();
395
386 /* 396 /*
387 * At this point, we can let interrupts switch to virtual mode 397 * At this point, we can let interrupts switch to virtual mode
388 * (the MMU has been setup), so adjust the MSR in the PACA to 398 * (the MMU has been setup), so adjust the MSR in the PACA to
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 6794466f6420..06c299ef6132 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
565 preempt_disable(); 565 preempt_disable();
566 566
567 /* pull in MSR TS bits from user context */ 567 /* pull in MSR TS bits from user context */
568 regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); 568 regs->msr |= msr & MSR_TS_MASK;
569 569
570 /* 570 /*
571 * Ensure that TM is enabled in regs->msr before we leave the signal 571 * Ensure that TM is enabled in regs->msr before we leave the signal
@@ -745,6 +745,31 @@ SYSCALL_DEFINE0(rt_sigreturn)
745 if (MSR_TM_SUSPENDED(mfmsr())) 745 if (MSR_TM_SUSPENDED(mfmsr()))
746 tm_reclaim_current(0); 746 tm_reclaim_current(0);
747 747
748 /*
749 * Disable MSR[TS] bit also, so, if there is an exception in the
750 * code below (as a page fault in copy_ckvsx_to_user()), it does
751 * not recheckpoint this task if there was a context switch inside
752 * the exception.
753 *
754 * A major page fault can indirectly call schedule(). A reschedule
755 * process in the middle of an exception can have a side effect
756 * (Changing the CPU MSR[TS] state), since schedule() is called
757 * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
758 * (switch_to() calls tm_recheckpoint() for the 'new' process). In
759 * this case, the process continues to be the same in the CPU, but
760 * the CPU state just changed.
761 *
762 * This can cause a TM Bad Thing, since the MSR in the stack will
763 * have the MSR[TS]=0, and this is what will be used to RFID.
764 *
765 * Clearing MSR[TS] state here will avoid a recheckpoint if there
766 * is any process reschedule in kernel space. The MSR[TS] state
767 * does not need to be saved also, since it will be replaced with
768 * the MSR[TS] that came from user context later, at
769 * restore_tm_sigcontexts.
770 */
771 regs->msr &= ~MSR_TS_MASK;
772
748 if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) 773 if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
749 goto badframe; 774 goto badframe;
750 if (MSR_TM_ACTIVE(msr)) { 775 if (MSR_TM_ACTIVE(msr)) {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc0503ef9c9c..325d60633dfa 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -43,7 +43,6 @@
43#include <linux/timex.h> 43#include <linux/timex.h>
44#include <linux/kernel_stat.h> 44#include <linux/kernel_stat.h>
45#include <linux/time.h> 45#include <linux/time.h>
46#include <linux/clockchips.h>
47#include <linux/init.h> 46#include <linux/init.h>
48#include <linux/profile.h> 47#include <linux/profile.h>
49#include <linux/cpu.h> 48#include <linux/cpu.h>
@@ -151,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
151unsigned long ppc_tb_freq; 150unsigned long ppc_tb_freq;
152EXPORT_SYMBOL_GPL(ppc_tb_freq); 151EXPORT_SYMBOL_GPL(ppc_tb_freq);
153 152
153bool tb_invalid;
154
154#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 155#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
155/* 156/*
156 * Factor for converting from cputime_t (timebase ticks) to 157 * Factor for converting from cputime_t (timebase ticks) to
@@ -460,6 +461,13 @@ void __delay(unsigned long loops)
460 diff += 1000000000; 461 diff += 1000000000;
461 spin_cpu_relax(); 462 spin_cpu_relax();
462 } while (diff < loops); 463 } while (diff < loops);
464 } else if (tb_invalid) {
465 /*
466 * TB is in error state and isn't ticking anymore.
467 * HMI handler was unable to recover from TB error.
468 * Return immediately, so that kernel won't get stuck here.
469 */
470 spin_cpu_relax();
463 } else { 471 } else {
464 start = get_tbl(); 472 start = get_tbl();
465 while (get_tbl() - start < loops) 473 while (get_tbl() - start < loops)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1fd45a8650e1..665f294725cb 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs)
2088 int code = FPE_FLTUNK; 2088 int code = FPE_FLTUNK;
2089 int err; 2089 int err;
2090 2090
2091 /* We restore the interrupt state now */
2092 if (!arch_irq_disabled_regs(regs))
2093 local_irq_enable();
2094
2091 flush_spe_to_thread(current); 2095 flush_spe_to_thread(current);
2092 2096
2093 spefscr = current->thread.spefscr; 2097 spefscr = current->thread.spefscr;
@@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
2133 extern int speround_handler(struct pt_regs *regs); 2137 extern int speround_handler(struct pt_regs *regs);
2134 int err; 2138 int err;
2135 2139
2140 /* We restore the interrupt state now */
2141 if (!arch_irq_disabled_regs(regs))
2142 local_irq_enable();
2143
2136 preempt_disable(); 2144 preempt_disable();
2137 if (regs->msr & MSR_SPE) 2145 if (regs->msr & MSR_SPE)
2138 giveup_spe(current); 2146 giveup_spe(current);
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index ce199f6e4256..06f54d947057 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -26,9 +26,8 @@ GCOV_PROFILE := n
26KCOV_INSTRUMENT := n 26KCOV_INSTRUMENT := n
27UBSAN_SANITIZE := n 27UBSAN_SANITIZE := n
28 28
29ccflags-y := -shared -fno-common -fno-builtin 29ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
30ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ 30 -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
31 $(call cc-ldoption, -Wl$(comma)--hash-style=both)
32asflags-y := -D__VDSO32__ -s 31asflags-y := -D__VDSO32__ -s
33 32
34obj-y += vdso32_wrapper.o 33obj-y += vdso32_wrapper.o
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 28e7d112aa2f..32ebb3522ea1 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -12,9 +12,8 @@ GCOV_PROFILE := n
12KCOV_INSTRUMENT := n 12KCOV_INSTRUMENT := n
13UBSAN_SANITIZE := n 13UBSAN_SANITIZE := n
14 14
15ccflags-y := -shared -fno-common -fno-builtin 15ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
16ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ 16 -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
17 $(call cc-ldoption, -Wl$(comma)--hash-style=both)
18asflags-y := -D__VDSO64__ -s 17asflags-y := -D__VDSO64__ -s
19 18
20obj-y += vdso64_wrapper.o 19obj-y += vdso64_wrapper.o
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 21165da0052d..8eb867dbad5f 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -21,6 +21,7 @@ _GLOBAL(load_vr_state)
21 REST_32VRS(0,r4,r3) 21 REST_32VRS(0,r4,r3)
22 blr 22 blr
23EXPORT_SYMBOL(load_vr_state) 23EXPORT_SYMBOL(load_vr_state)
24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
24 25
25/* 26/*
26 * Store VMX state into memory, including VSCR. 27 * Store VMX state into memory, including VSCR.
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3c6ab22a0c4e..af3c15a1d41e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
77 77
78static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ 78static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
79 79
80static DEFINE_PER_CPU(struct timer_list, wd_timer); 80static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
81static DEFINE_PER_CPU(u64, wd_timer_tb); 81static DEFINE_PER_CPU(u64, wd_timer_tb);
82 82
83/* SMP checker bits */ 83/* SMP checker bits */
@@ -293,21 +293,21 @@ out:
293 nmi_exit(); 293 nmi_exit();
294} 294}
295 295
296static void wd_timer_reset(unsigned int cpu, struct timer_list *t) 296static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
297{
298 t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
299 if (wd_timer_period_ms > 1000)
300 t->expires = __round_jiffies_up(t->expires, cpu);
301 add_timer_on(t, cpu);
302}
303
304static void wd_timer_fn(struct timer_list *t)
305{ 297{
306 int cpu = smp_processor_id(); 298 int cpu = smp_processor_id();
307 299
300 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
301 return HRTIMER_NORESTART;
302
303 if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
304 return HRTIMER_NORESTART;
305
308 watchdog_timer_interrupt(cpu); 306 watchdog_timer_interrupt(cpu);
309 307
310 wd_timer_reset(cpu, t); 308 hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
309
310 return HRTIMER_RESTART;
311} 311}
312 312
313void arch_touch_nmi_watchdog(void) 313void arch_touch_nmi_watchdog(void)
@@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void)
323} 323}
324EXPORT_SYMBOL(arch_touch_nmi_watchdog); 324EXPORT_SYMBOL(arch_touch_nmi_watchdog);
325 325
326static void start_watchdog_timer_on(unsigned int cpu) 326static void start_watchdog(void *arg)
327{
328 struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
329
330 per_cpu(wd_timer_tb, cpu) = get_tb();
331
332 timer_setup(t, wd_timer_fn, TIMER_PINNED);
333 wd_timer_reset(cpu, t);
334}
335
336static void stop_watchdog_timer_on(unsigned int cpu)
337{
338 struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
339
340 del_timer_sync(t);
341}
342
343static int start_wd_on_cpu(unsigned int cpu)
344{ 327{
328 struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
329 int cpu = smp_processor_id();
345 unsigned long flags; 330 unsigned long flags;
346 331
347 if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { 332 if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
348 WARN_ON(1); 333 WARN_ON(1);
349 return 0; 334 return;
350 } 335 }
351 336
352 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) 337 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
353 return 0; 338 return;
354 339
355 if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) 340 if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
356 return 0; 341 return;
357 342
358 wd_smp_lock(&flags); 343 wd_smp_lock(&flags);
359 cpumask_set_cpu(cpu, &wd_cpus_enabled); 344 cpumask_set_cpu(cpu, &wd_cpus_enabled);
@@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu)
363 } 348 }
364 wd_smp_unlock(&flags); 349 wd_smp_unlock(&flags);
365 350
366 start_watchdog_timer_on(cpu); 351 *this_cpu_ptr(&wd_timer_tb) = get_tb();
367 352
368 return 0; 353 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
354 hrtimer->function = watchdog_timer_fn;
355 hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
356 HRTIMER_MODE_REL_PINNED);
369} 357}
370 358
371static int stop_wd_on_cpu(unsigned int cpu) 359static int start_watchdog_on_cpu(unsigned int cpu)
372{ 360{
361 return smp_call_function_single(cpu, start_watchdog, NULL, true);
362}
363
364static void stop_watchdog(void *arg)
365{
366 struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
367 int cpu = smp_processor_id();
373 unsigned long flags; 368 unsigned long flags;
374 369
375 if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) 370 if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
376 return 0; /* Can happen in CPU unplug case */ 371 return; /* Can happen in CPU unplug case */
377 372
378 stop_watchdog_timer_on(cpu); 373 hrtimer_cancel(hrtimer);
379 374
380 wd_smp_lock(&flags); 375 wd_smp_lock(&flags);
381 cpumask_clear_cpu(cpu, &wd_cpus_enabled); 376 cpumask_clear_cpu(cpu, &wd_cpus_enabled);
382 wd_smp_unlock(&flags); 377 wd_smp_unlock(&flags);
383 378
384 wd_smp_clear_cpu_pending(cpu, get_tb()); 379 wd_smp_clear_cpu_pending(cpu, get_tb());
380}
385 381
386 return 0; 382static int stop_watchdog_on_cpu(unsigned int cpu)
383{
384 return smp_call_function_single(cpu, stop_watchdog, NULL, true);
387} 385}
388 386
389static void watchdog_calc_timeouts(void) 387static void watchdog_calc_timeouts(void)
@@ -402,7 +400,7 @@ void watchdog_nmi_stop(void)
402 int cpu; 400 int cpu;
403 401
404 for_each_cpu(cpu, &wd_cpus_enabled) 402 for_each_cpu(cpu, &wd_cpus_enabled)
405 stop_wd_on_cpu(cpu); 403 stop_watchdog_on_cpu(cpu);
406} 404}
407 405
408void watchdog_nmi_start(void) 406void watchdog_nmi_start(void)
@@ -411,7 +409,7 @@ void watchdog_nmi_start(void)
411 409
412 watchdog_calc_timeouts(); 410 watchdog_calc_timeouts();
413 for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) 411 for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
414 start_wd_on_cpu(cpu); 412 start_watchdog_on_cpu(cpu);
415} 413}
416 414
417/* 415/*
@@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void)
423 421
424 err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 422 err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
425 "powerpc/watchdog:online", 423 "powerpc/watchdog:online",
426 start_wd_on_cpu, stop_wd_on_cpu); 424 start_watchdog_on_cpu,
425 stop_watchdog_on_cpu);
427 if (err < 0) { 426 if (err < 0) {
428 pr_warn("could not be initialized"); 427 pr_warn("could not be initialized");
429 return err; 428 return err;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b2b29d4f9842..7bdcd4d7a9f0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -74,6 +74,7 @@
74#include <asm/opal.h> 74#include <asm/opal.h>
75#include <asm/xics.h> 75#include <asm/xics.h>
76#include <asm/xive.h> 76#include <asm/xive.h>
77#include <asm/hw_breakpoint.h>
77 78
78#include "book3s.h" 79#include "book3s.h"
79 80
@@ -3374,7 +3375,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
3374 mtspr(SPRN_PURR, vcpu->arch.purr); 3375 mtspr(SPRN_PURR, vcpu->arch.purr);
3375 mtspr(SPRN_SPURR, vcpu->arch.spurr); 3376 mtspr(SPRN_SPURR, vcpu->arch.spurr);
3376 3377
3377 if (cpu_has_feature(CPU_FTR_DAWR)) { 3378 if (dawr_enabled()) {
3378 mtspr(SPRN_DAWR, vcpu->arch.dawr); 3379 mtspr(SPRN_DAWR, vcpu->arch.dawr);
3379 mtspr(SPRN_DAWRX, vcpu->arch.dawrx); 3380 mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
3380 } 3381 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3b9662a4207e..085509148d95 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -822,7 +822,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
822 raddr = per_cpu_ptr(addr, cpu); 822 raddr = per_cpu_ptr(addr, cpu);
823 l = (unsigned long)raddr; 823 l = (unsigned long)raddr;
824 824
825 if (REGION_ID(l) == VMALLOC_REGION_ID) { 825 if (get_region_id(l) == VMALLOC_REGION_ID) {
826 l = vmalloc_to_phys(raddr); 826 l = vmalloc_to_phys(raddr);
827 raddr = (unsigned int *)l; 827 raddr = (unsigned int *)l;
828 } 828 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3a5e719ef032..dd014308f065 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -35,6 +35,7 @@
35#include <asm/thread_info.h> 35#include <asm/thread_info.h>
36#include <asm/asm-compat.h> 36#include <asm/asm-compat.h>
37#include <asm/feature-fixups.h> 37#include <asm/feature-fixups.h>
38#include <asm/cpuidle.h>
38 39
39/* Sign-extend HDEC if not on POWER9 */ 40/* Sign-extend HDEC if not on POWER9 */
40#define EXTEND_HDEC(reg) \ 41#define EXTEND_HDEC(reg) \
@@ -45,6 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
45/* Values in HSTATE_NAPPING(r13) */ 46/* Values in HSTATE_NAPPING(r13) */
46#define NAPPING_CEDE 1 47#define NAPPING_CEDE 1
47#define NAPPING_NOVCPU 2 48#define NAPPING_NOVCPU 2
49#define NAPPING_UNSPLIT 3
48 50
49/* Stack frame offsets for kvmppc_hv_entry */ 51/* Stack frame offsets for kvmppc_hv_entry */
50#define SFS 208 52#define SFS 208
@@ -290,17 +292,19 @@ kvm_novcpu_exit:
290 b kvmhv_switch_to_host 292 b kvmhv_switch_to_host
291 293
292/* 294/*
293 * We come in here when wakened from nap mode. 295 * We come in here when wakened from Linux offline idle code.
294 * Relocation is off and most register values are lost. 296 * Relocation is off
295 * r13 points to the PACA.
296 * r3 contains the SRR1 wakeup value, SRR1 is trashed. 297 * r3 contains the SRR1 wakeup value, SRR1 is trashed.
297 */ 298 */
298 .globl kvm_start_guest 299_GLOBAL(idle_kvm_start_guest)
299kvm_start_guest: 300 ld r4,PACAEMERGSP(r13)
300 /* Set runlatch bit the minute you wake up from nap */ 301 mfcr r5
301 mfspr r0, SPRN_CTRLF 302 mflr r0
302 ori r0, r0, 1 303 std r1,0(r4)
303 mtspr SPRN_CTRLT, r0 304 std r5,8(r4)
305 std r0,16(r4)
306 subi r1,r4,STACK_FRAME_OVERHEAD
307 SAVE_NVGPRS(r1)
304 308
305 /* 309 /*
306 * Could avoid this and pass it through in r3. For now, 310 * Could avoid this and pass it through in r3. For now,
@@ -308,27 +312,23 @@ kvm_start_guest:
308 */ 312 */
309 mtspr SPRN_SRR1,r3 313 mtspr SPRN_SRR1,r3
310 314
311 ld r2,PACATOC(r13)
312
313 li r0,0 315 li r0,0
314 stb r0,PACA_FTRACE_ENABLED(r13) 316 stb r0,PACA_FTRACE_ENABLED(r13)
315 317
316 li r0,KVM_HWTHREAD_IN_KVM 318 li r0,KVM_HWTHREAD_IN_KVM
317 stb r0,HSTATE_HWTHREAD_STATE(r13) 319 stb r0,HSTATE_HWTHREAD_STATE(r13)
318 320
319 /* NV GPR values from power7_idle() will no longer be valid */ 321 /* kvm cede / napping does not come through here */
320 li r0,1
321 stb r0,PACA_NAPSTATELOST(r13)
322
323 /* were we napping due to cede? */
324 lbz r0,HSTATE_NAPPING(r13) 322 lbz r0,HSTATE_NAPPING(r13)
325 cmpwi r0,NAPPING_CEDE 323 twnei r0,0
326 beq kvm_end_cede
327 cmpwi r0,NAPPING_NOVCPU
328 beq kvm_novcpu_wakeup
329 324
330 ld r1,PACAEMERGSP(r13) 325 b 1f
331 subi r1,r1,STACK_FRAME_OVERHEAD 326
327kvm_unsplit_wakeup:
328 li r0, 0
329 stb r0, HSTATE_NAPPING(r13)
330
3311:
332 332
333 /* 333 /*
334 * We weren't napping due to cede, so this must be a secondary 334 * We weren't napping due to cede, so this must be a secondary
@@ -437,19 +437,25 @@ kvm_no_guest:
437 lbz r3, HSTATE_HWTHREAD_REQ(r13) 437 lbz r3, HSTATE_HWTHREAD_REQ(r13)
438 cmpwi r3, 0 438 cmpwi r3, 0
439 bne 54f 439 bne 54f
440/* 440
441 * We jump to pnv_wakeup_loss, which will return to the caller 441 /*
442 * of power7_nap in the powernv cpu offline loop. The value we 442 * Jump to idle_return_gpr_loss, which returns to the
443 * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss 443 * idle_kvm_start_guest caller.
444 * requires SRR1 in r12. 444 */
445 */
446 li r3, LPCR_PECE0 445 li r3, LPCR_PECE0
447 mfspr r4, SPRN_LPCR 446 mfspr r4, SPRN_LPCR
448 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 447 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
449 mtspr SPRN_LPCR, r4 448 mtspr SPRN_LPCR, r4
450 li r3, 0 449 /* set up r3 for return */
451 mfspr r12,SPRN_SRR1 450 mfspr r3,SPRN_SRR1
452 b pnv_wakeup_loss 451 REST_NVGPRS(r1)
452 addi r1, r1, STACK_FRAME_OVERHEAD
453 ld r0, 16(r1)
454 ld r5, 8(r1)
455 ld r1, 0(r1)
456 mtlr r0
457 mtcr r5
458 blr
453 459
45453: HMT_LOW 46053: HMT_LOW
455 ld r5, HSTATE_KVM_VCORE(r13) 461 ld r5, HSTATE_KVM_VCORE(r13)
@@ -534,6 +540,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
534 lbz r0, KVM_SPLIT_DO_NAP(r3) 540 lbz r0, KVM_SPLIT_DO_NAP(r3)
535 cmpwi r0, 0 541 cmpwi r0, 0
536 beq 57f 542 beq 57f
543 li r3, NAPPING_UNSPLIT
544 stb r3, HSTATE_NAPPING(r13)
537 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 545 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
538 mfspr r5, SPRN_LPCR 546 mfspr r5, SPRN_LPCR
539 rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) 547 rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
@@ -822,18 +830,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
822 mtspr SPRN_IAMR, r5 830 mtspr SPRN_IAMR, r5
823 mtspr SPRN_PSPB, r6 831 mtspr SPRN_PSPB, r6
824 mtspr SPRN_FSCR, r7 832 mtspr SPRN_FSCR, r7
825 ld r5, VCPU_DAWR(r4)
826 ld r6, VCPU_DAWRX(r4)
827 ld r7, VCPU_CIABR(r4)
828 ld r8, VCPU_TAR(r4)
829 /* 833 /*
830 * Handle broken DAWR case by not writing it. This means we 834 * Handle broken DAWR case by not writing it. This means we
831 * can still store the DAWR register for migration. 835 * can still store the DAWR register for migration.
832 */ 836 */
833BEGIN_FTR_SECTION 837 LOAD_REG_ADDR(r5, dawr_force_enable)
838 lbz r5, 0(r5)
839 cmpdi r5, 0
840 beq 1f
841 ld r5, VCPU_DAWR(r4)
842 ld r6, VCPU_DAWRX(r4)
834 mtspr SPRN_DAWR, r5 843 mtspr SPRN_DAWR, r5
835 mtspr SPRN_DAWRX, r6 844 mtspr SPRN_DAWRX, r6
836END_FTR_SECTION_IFSET(CPU_FTR_DAWR) 8451:
846 ld r7, VCPU_CIABR(r4)
847 ld r8, VCPU_TAR(r4)
837 mtspr SPRN_CIABR, r7 848 mtspr SPRN_CIABR, r7
838 mtspr SPRN_TAR, r8 849 mtspr SPRN_TAR, r8
839 ld r5, VCPU_IC(r4) 850 ld r5, VCPU_IC(r4)
@@ -2513,11 +2524,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2513 blr 2524 blr
2514 2525
25152: 25262:
2516BEGIN_FTR_SECTION 2527 LOAD_REG_ADDR(r11, dawr_force_enable)
2517 /* POWER9 with disabled DAWR */ 2528 lbz r11, 0(r11)
2529 cmpdi r11, 0
2518 li r3, H_HARDWARE 2530 li r3, H_HARDWARE
2519 blr 2531 beqlr
2520END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
2521 /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ 2532 /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
2522 rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW 2533 rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
2523 rlwimi r5, r4, 2, DAWRX_WT 2534 rlwimi r5, r4, 2, DAWRX_WT
@@ -2654,6 +2665,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2654 2665
2655 lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */ 2666 lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */
2656 2667
2668 /* Go back to host stack */
2669 ld r1, HSTATE_HOST_R1(r13)
2670
2657 /* 2671 /*
2658 * Take a nap until a decrementer or external or doobell interrupt 2672 * Take a nap until a decrementer or external or doobell interrupt
2659 * occurs, with PECE1 and PECE0 set in LPCR. 2673 * occurs, with PECE1 and PECE0 set in LPCR.
@@ -2682,26 +2696,42 @@ BEGIN_FTR_SECTION
2682 * requested level = 0 (just stop dispatching) 2696 * requested level = 0 (just stop dispatching)
2683 */ 2697 */
2684 lis r3, (PSSCR_EC | PSSCR_ESL)@h 2698 lis r3, (PSSCR_EC | PSSCR_ESL)@h
2685 mtspr SPRN_PSSCR, r3
2686 /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ 2699 /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
2687 li r4, LPCR_PECE_HVEE@higher 2700 li r4, LPCR_PECE_HVEE@higher
2688 sldi r4, r4, 32 2701 sldi r4, r4, 32
2689 or r5, r5, r4 2702 or r5, r5, r4
2690END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2703FTR_SECTION_ELSE
2704 li r3, PNV_THREAD_NAP
2705ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
2691 mtspr SPRN_LPCR,r5 2706 mtspr SPRN_LPCR,r5
2692 isync 2707 isync
2693 li r0, 0 2708
2694 std r0, HSTATE_SCRATCH0(r13)
2695 ptesync
2696 ld r0, HSTATE_SCRATCH0(r13)
26971: cmpd r0, r0
2698 bne 1b
2699BEGIN_FTR_SECTION 2709BEGIN_FTR_SECTION
2700 nap 2710 bl isa300_idle_stop_mayloss
2701FTR_SECTION_ELSE 2711FTR_SECTION_ELSE
2702 PPC_STOP 2712 bl isa206_idle_insn_mayloss
2703ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 2713ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
2704 b . 2714
2715 mfspr r0, SPRN_CTRLF
2716 ori r0, r0, 1
2717 mtspr SPRN_CTRLT, r0
2718
2719 mtspr SPRN_SRR1, r3
2720
2721 li r0, 0
2722 stb r0, PACA_FTRACE_ENABLED(r13)
2723
2724 li r0, KVM_HWTHREAD_IN_KVM
2725 stb r0, HSTATE_HWTHREAD_STATE(r13)
2726
2727 lbz r0, HSTATE_NAPPING(r13)
2728 cmpwi r0, NAPPING_CEDE
2729 beq kvm_end_cede
2730 cmpwi r0, NAPPING_NOVCPU
2731 beq kvm_novcpu_wakeup
2732 cmpwi r0, NAPPING_UNSPLIT
2733 beq kvm_unsplit_wakeup
2734 twi 31,0,0 /* Nap state must not be zero */
2705 2735
270633: mr r4, r3 273633: mr r4, r3
2707 li r3, 0 2737 li r3, 0
@@ -2709,12 +2739,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
2709 b 34f 2739 b 34f
2710 2740
2711kvm_end_cede: 2741kvm_end_cede:
2742 /* Woken by external or decrementer interrupt */
2743
2712 /* get vcpu pointer */ 2744 /* get vcpu pointer */
2713 ld r4, HSTATE_KVM_VCPU(r13) 2745 ld r4, HSTATE_KVM_VCPU(r13)
2714 2746
2715 /* Woken by external or decrementer interrupt */
2716 ld r1, HSTATE_HOST_R1(r13)
2717
2718#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 2747#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2719 addi r3, r4, VCPU_TB_RMINTR 2748 addi r3, r4, VCPU_TB_RMINTR
2720 bl kvmhv_accumulate_time 2749 bl kvmhv_accumulate_time
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 79396e184bca..c55f9c27bf79 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -8,9 +8,22 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
8CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) 8CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
9CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) 9CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
10 10
11obj-y += string.o alloc.o code-patching.o feature-fixups.o 11KASAN_SANITIZE_code-patching.o := n
12KASAN_SANITIZE_feature-fixups.o := n
12 13
13obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o 14ifdef CONFIG_KASAN
15CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
16CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
17endif
18
19obj-y += alloc.o code-patching.o feature-fixups.o
20
21ifndef CONFIG_KASAN
22obj-y += string.o memcmp_$(BITS).o
23obj-$(CONFIG_PPC32) += strlen_32.o
24endif
25
26obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
14 27
15obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 28obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
16 29
@@ -34,7 +47,7 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
34 test_emulate_step_exec_instr.o 47 test_emulate_step_exec_instr.o
35 48
36obj-y += checksum_$(BITS).o checksum_wrappers.o \ 49obj-y += checksum_$(BITS).o checksum_wrappers.o \
37 string_$(BITS).o memcmp_$(BITS).o 50 string_$(BITS).o
38 51
39obj-y += sstep.o ldstfp.o quad.o 52obj-y += sstep.o ldstfp.o quad.o
40obj64-y += quad.o 53obj64-y += quad.o
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index 890d4ddd91d6..bb9307ce2440 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -29,6 +29,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
29 unsigned int csum; 29 unsigned int csum;
30 30
31 might_sleep(); 31 might_sleep();
32 allow_read_from_user(src, len);
32 33
33 *err_ptr = 0; 34 *err_ptr = 0;
34 35
@@ -60,6 +61,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
60 } 61 }
61 62
62out: 63out:
64 prevent_read_from_user(src, len);
63 return (__force __wsum)csum; 65 return (__force __wsum)csum;
64} 66}
65EXPORT_SYMBOL(csum_and_copy_from_user); 67EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -70,6 +72,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
70 unsigned int csum; 72 unsigned int csum;
71 73
72 might_sleep(); 74 might_sleep();
75 allow_write_to_user(dst, len);
73 76
74 *err_ptr = 0; 77 *err_ptr = 0;
75 78
@@ -97,6 +100,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
97 } 100 }
98 101
99out: 102out:
103 prevent_write_to_user(dst, len);
100 return (__force __wsum)csum; 104 return (__force __wsum)csum;
101} 105}
102EXPORT_SYMBOL(csum_and_copy_to_user); 106EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 506413a2c25e..90c9d4a1e36f 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -15,7 +15,6 @@
15#include <linux/cpuhotplug.h> 15#include <linux/cpuhotplug.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/uaccess.h> 17#include <linux/uaccess.h>
18#include <linux/kprobes.h>
19 18
20#include <asm/pgtable.h> 19#include <asm/pgtable.h>
21#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
@@ -26,9 +25,9 @@
26static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, 25static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
27 unsigned int *patch_addr) 26 unsigned int *patch_addr)
28{ 27{
29 int err; 28 int err = 0;
30 29
31 __put_user_size(instr, patch_addr, 4, err); 30 __put_user_asm(instr, patch_addr, err, "stw");
32 if (err) 31 if (err)
33 return err; 32 return err;
34 33
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index ba66846fe973..d5642481fb98 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -14,6 +14,7 @@
14#include <asm/ppc_asm.h> 14#include <asm/ppc_asm.h>
15#include <asm/export.h> 15#include <asm/export.h>
16#include <asm/code-patching-asm.h> 16#include <asm/code-patching-asm.h>
17#include <asm/kasan.h>
17 18
18#define COPY_16_BYTES \ 19#define COPY_16_BYTES \
19 lwz r7,4(r4); \ 20 lwz r7,4(r4); \
@@ -68,6 +69,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
68LG_CACHELINE_BYTES = L1_CACHE_SHIFT 69LG_CACHELINE_BYTES = L1_CACHE_SHIFT
69CACHELINE_MASK = (L1_CACHE_BYTES-1) 70CACHELINE_MASK = (L1_CACHE_BYTES-1)
70 71
72#ifndef CONFIG_KASAN
71_GLOBAL(memset16) 73_GLOBAL(memset16)
72 rlwinm. r0 ,r5, 31, 1, 31 74 rlwinm. r0 ,r5, 31, 1, 31
73 addi r6, r3, -4 75 addi r6, r3, -4
@@ -81,6 +83,7 @@ _GLOBAL(memset16)
81 sth r4, 4(r6) 83 sth r4, 4(r6)
82 blr 84 blr
83EXPORT_SYMBOL(memset16) 85EXPORT_SYMBOL(memset16)
86#endif
84 87
85/* 88/*
86 * Use dcbz on the complete cache lines in the destination 89 * Use dcbz on the complete cache lines in the destination
@@ -91,7 +94,7 @@ EXPORT_SYMBOL(memset16)
91 * We therefore skip the optimised bloc that uses dcbz. This jump is 94 * We therefore skip the optimised bloc that uses dcbz. This jump is
92 * replaced by a nop once cache is active. This is done in machine_init() 95 * replaced by a nop once cache is active. This is done in machine_init()
93 */ 96 */
94_GLOBAL(memset) 97_GLOBAL_KASAN(memset)
95 cmplwi 0,r5,4 98 cmplwi 0,r5,4
96 blt 7f 99 blt 7f
97 100
@@ -151,6 +154,7 @@ _GLOBAL(memset)
151 bdnz 9b 154 bdnz 9b
152 blr 155 blr
153EXPORT_SYMBOL(memset) 156EXPORT_SYMBOL(memset)
157EXPORT_SYMBOL_KASAN(memset)
154 158
155/* 159/*
156 * This version uses dcbz on the complete cache lines in the 160 * This version uses dcbz on the complete cache lines in the
@@ -163,12 +167,12 @@ EXPORT_SYMBOL(memset)
163 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is 167 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
164 * replaced by a nop once cache is active. This is done in machine_init() 168 * replaced by a nop once cache is active. This is done in machine_init()
165 */ 169 */
166_GLOBAL(memmove) 170_GLOBAL_KASAN(memmove)
167 cmplw 0,r3,r4 171 cmplw 0,r3,r4
168 bgt backwards_memcpy 172 bgt backwards_memcpy
169 /* fall through */ 173 /* fall through */
170 174
171_GLOBAL(memcpy) 175_GLOBAL_KASAN(memcpy)
1721: b generic_memcpy 1761: b generic_memcpy
173 patch_site 1b, patch__memcpy_nocache 177 patch_site 1b, patch__memcpy_nocache
174 178
@@ -244,6 +248,8 @@ _GLOBAL(memcpy)
24465: blr 24865: blr
245EXPORT_SYMBOL(memcpy) 249EXPORT_SYMBOL(memcpy)
246EXPORT_SYMBOL(memmove) 250EXPORT_SYMBOL(memmove)
251EXPORT_SYMBOL_KASAN(memcpy)
252EXPORT_SYMBOL_KASAN(memmove)
247 253
248generic_memcpy: 254generic_memcpy:
249 srwi. r7,r5,3 255 srwi. r7,r5,3
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 3c3be02f33b7..7f6bd031c306 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -12,7 +12,9 @@
12#include <asm/errno.h> 12#include <asm/errno.h>
13#include <asm/ppc_asm.h> 13#include <asm/ppc_asm.h>
14#include <asm/export.h> 14#include <asm/export.h>
15#include <asm/kasan.h>
15 16
17#ifndef CONFIG_KASAN
16_GLOBAL(__memset16) 18_GLOBAL(__memset16)
17 rlwimi r4,r4,16,0,15 19 rlwimi r4,r4,16,0,15
18 /* fall through */ 20 /* fall through */
@@ -29,8 +31,9 @@ _GLOBAL(__memset64)
29EXPORT_SYMBOL(__memset16) 31EXPORT_SYMBOL(__memset16)
30EXPORT_SYMBOL(__memset32) 32EXPORT_SYMBOL(__memset32)
31EXPORT_SYMBOL(__memset64) 33EXPORT_SYMBOL(__memset64)
34#endif
32 35
33_GLOBAL(memset) 36_GLOBAL_KASAN(memset)
34 neg r0,r3 37 neg r0,r3
35 rlwimi r4,r4,8,16,23 38 rlwimi r4,r4,8,16,23
36 andi. r0,r0,7 /* # bytes to be 8-byte aligned */ 39 andi. r0,r0,7 /* # bytes to be 8-byte aligned */
@@ -96,8 +99,9 @@ _GLOBAL(memset)
96 stb r4,0(r6) 99 stb r4,0(r6)
97 blr 100 blr
98EXPORT_SYMBOL(memset) 101EXPORT_SYMBOL(memset)
102EXPORT_SYMBOL_KASAN(memset)
99 103
100_GLOBAL_TOC(memmove) 104_GLOBAL_TOC_KASAN(memmove)
101 cmplw 0,r3,r4 105 cmplw 0,r3,r4
102 bgt backwards_memcpy 106 bgt backwards_memcpy
103 b memcpy 107 b memcpy
@@ -139,3 +143,4 @@ _GLOBAL(backwards_memcpy)
139 mtctr r7 143 mtctr r7
140 b 1b 144 b 1b
141EXPORT_SYMBOL(memmove) 145EXPORT_SYMBOL(memmove)
146EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 273ea67e60a1..25c3772c1dfb 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -11,6 +11,7 @@
11#include <asm/export.h> 11#include <asm/export.h>
12#include <asm/asm-compat.h> 12#include <asm/asm-compat.h>
13#include <asm/feature-fixups.h> 13#include <asm/feature-fixups.h>
14#include <asm/kasan.h>
14 15
15#ifndef SELFTEST_CASE 16#ifndef SELFTEST_CASE
16/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ 17/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
@@ -18,7 +19,7 @@
18#endif 19#endif
19 20
20 .align 7 21 .align 7
21_GLOBAL_TOC(memcpy) 22_GLOBAL_TOC_KASAN(memcpy)
22BEGIN_FTR_SECTION 23BEGIN_FTR_SECTION
23#ifdef __LITTLE_ENDIAN__ 24#ifdef __LITTLE_ENDIAN__
24 cmpdi cr7,r5,0 25 cmpdi cr7,r5,0
@@ -230,3 +231,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
230 blr 231 blr
231#endif 232#endif
232EXPORT_SYMBOL(memcpy) 233EXPORT_SYMBOL(memcpy)
234EXPORT_SYMBOL_KASAN(memcpy)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3c1bd9fa23cd..0f499db315d6 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,53 +5,18 @@
5 5
6ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 6ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
7 7
8CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
9
10obj-y := fault.o mem.o pgtable.o mmap.o \ 8obj-y := fault.o mem.o pgtable.o mmap.o \
11 init_$(BITS).o pgtable_$(BITS).o \ 9 init_$(BITS).o pgtable_$(BITS).o \
10 pgtable-frag.o \
12 init-common.o mmu_context.o drmem.o 11 init-common.o mmu_context.o drmem.o
13obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ 12obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
14 tlb_nohash_low.o 13obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
15obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o 14obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
16hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
17obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
18obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \
19 $(hash64-y) mmu_context_book3s64.o \
20 pgtable-book3s64.o pgtable-frag.o
21obj-$(CONFIG_PPC32) += pgtable-frag.o
22obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
23obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
24obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o
25ifdef CONFIG_PPC_BOOK3S_64
26obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
27obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
28endif
29obj-$(CONFIG_40x) += 40x_mmu.o
30obj-$(CONFIG_44x) += 44x_mmu.o
31obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o
32obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
33obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o 15obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
34obj-$(CONFIG_PPC_SPLPAR) += vphn.o
35obj-$(CONFIG_PPC_MM_SLICES) += slice.o 16obj-$(CONFIG_PPC_MM_SLICES) += slice.o
36obj-y += hugetlbpage.o 17obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
37ifdef CONFIG_HUGETLB_PAGE
38obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
39obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
40obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
41endif
42obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
43obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
44obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o 18obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
45obj-$(CONFIG_HIGHMEM) += highmem.o 19obj-$(CONFIG_HIGHMEM) += highmem.o
46obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o 20obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
47obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
48obj-$(CONFIG_PPC_PTDUMP) += ptdump/ 21obj-$(CONFIG_PPC_PTDUMP) += ptdump/
49obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o 22obj-$(CONFIG_KASAN) += kasan/
50
51# Disable kcov instrumentation on sensitive code
52# This is necessary for booting with kcov enabled on book3e machines
53KCOV_INSTRUMENT_tlb_nohash.o := n
54KCOV_INSTRUMENT_fsl_booke_mmu.o := n
55
56# Instrumenting the SLB fault path can lead to duplicate SLB entries
57KCOV_INSTRUMENT_slb.o := n
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
new file mode 100644
index 000000000000..1732eaa740a9
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -0,0 +1,9 @@
1# SPDX-License-Identifier: GPL-2.0
2
3KASAN_SANITIZE_mmu.o := n
4
5ifdef CONFIG_KASAN
6CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING
7endif
8
9obj-y += mmu.o hash_low.o mmu_context.o tlb.o
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/book3s32/hash_low.S
index a6c491f18a04..e27792d0b744 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -309,13 +309,13 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
309 309
310_GLOBAL(create_hpte) 310_GLOBAL(create_hpte)
311 /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ 311 /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
312 rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ 312 rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */
313 rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ 313 rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */
314 and r8,r8,r0 /* writable if _RW & _DIRTY */ 314 and r8,r8,r0 /* writable if _RW & _DIRTY */
315 rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ 315 rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
316 rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ 316 rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
317 ori r8,r8,0xe04 /* clear out reserved bits */ 317 ori r8,r8,0xe04 /* clear out reserved bits */
318 andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ 318 andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */
319BEGIN_FTR_SECTION 319BEGIN_FTR_SECTION
320 rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */ 320 rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */
321END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) 321END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/book3s32/mmu.c
index 5d9c3ff728c9..fc073cb2c517 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -34,11 +34,12 @@
34#include <asm/code-patching.h> 34#include <asm/code-patching.h>
35#include <asm/sections.h> 35#include <asm/sections.h>
36 36
37#include "mmu_decl.h" 37#include <mm/mmu_decl.h>
38 38
39struct hash_pte *Hash, *Hash_end; 39struct hash_pte *Hash;
40unsigned long Hash_size, Hash_mask; 40static unsigned long Hash_size, Hash_mask;
41unsigned long _SDR1; 41unsigned long _SDR1;
42static unsigned int hash_mb, hash_mb2;
42 43
43struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ 44struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
44 45
@@ -318,7 +319,6 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
318 */ 319 */
319void __init MMU_init_hw(void) 320void __init MMU_init_hw(void)
320{ 321{
321 unsigned int hmask, mb, mb2;
322 unsigned int n_hpteg, lg_n_hpteg; 322 unsigned int n_hpteg, lg_n_hpteg;
323 323
324 if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) 324 if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
@@ -355,26 +355,34 @@ void __init MMU_init_hw(void)
355 __func__, Hash_size, Hash_size); 355 __func__, Hash_size, Hash_size);
356 _SDR1 = __pa(Hash) | SDR1_LOW_BITS; 356 _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
357 357
358 Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); 358 pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
359 (unsigned long long)(total_memory >> 20), Hash_size >> 10);
359 360
360 printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
361 (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
362 361
362 Hash_mask = n_hpteg - 1;
363 hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
364 if (lg_n_hpteg > 16)
365 hash_mb2 = 16 - LG_HPTEG_SIZE;
366}
367
368void __init MMU_init_hw_patch(void)
369{
370 unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
371
372 if (ppc_md.progress)
373 ppc_md.progress("hash:patch", 0x345);
374 if (ppc_md.progress)
375 ppc_md.progress("hash:done", 0x205);
376
377 /* WARNING: Make sure nothing can trigger a KASAN check past this point */
363 378
364 /* 379 /*
365 * Patch up the instructions in hashtable.S:create_hpte 380 * Patch up the instructions in hashtable.S:create_hpte
366 */ 381 */
367 if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
368 Hash_mask = n_hpteg - 1;
369 hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
370 mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
371 if (lg_n_hpteg > 16)
372 mb2 = 16 - LG_HPTEG_SIZE;
373
374 modify_instruction_site(&patch__hash_page_A0, 0xffff, 382 modify_instruction_site(&patch__hash_page_A0, 0xffff,
375 ((unsigned int)Hash - PAGE_OFFSET) >> 16); 383 ((unsigned int)Hash - PAGE_OFFSET) >> 16);
376 modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6); 384 modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
377 modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6); 385 modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
378 modify_instruction_site(&patch__hash_page_B, 0xffff, hmask); 386 modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
379 modify_instruction_site(&patch__hash_page_C, 0xffff, hmask); 387 modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
380 388
@@ -383,11 +391,9 @@ void __init MMU_init_hw(void)
383 */ 391 */
384 modify_instruction_site(&patch__flush_hash_A0, 0xffff, 392 modify_instruction_site(&patch__flush_hash_A0, 0xffff,
385 ((unsigned int)Hash - PAGE_OFFSET) >> 16); 393 ((unsigned int)Hash - PAGE_OFFSET) >> 16);
386 modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6); 394 modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
387 modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6); 395 modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
388 modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask); 396 modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
389
390 if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
391} 397}
392 398
393void setup_initial_memory_limit(phys_addr_t first_memblock_base, 399void setup_initial_memory_limit(phys_addr_t first_memblock_base,
@@ -404,3 +410,33 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
404 else /* Anything else has 256M mapped */ 410 else /* Anything else has 256M mapped */
405 memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); 411 memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
406} 412}
413
414void __init print_system_hash_info(void)
415{
416 pr_info("Hash_size = 0x%lx\n", Hash_size);
417 if (Hash_mask)
418 pr_info("Hash_mask = 0x%lx\n", Hash_mask);
419}
420
421#ifdef CONFIG_PPC_KUEP
422void __init setup_kuep(bool disabled)
423{
424 pr_info("Activating Kernel Userspace Execution Prevention\n");
425
426 if (cpu_has_feature(CPU_FTR_601))
427 pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
428
429 if (disabled)
430 pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
431}
432#endif
433
434#ifdef CONFIG_PPC_KUAP
435void __init setup_kuap(bool disabled)
436{
437 pr_info("Activating Kernel Userspace Access Protection\n");
438
439 if (disabled)
440 pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
441}
442#endif
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 921c1e33e941..921c1e33e941 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/book3s32/tlb.c
index cf8472cf3d59..8d56f0417f87 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -32,7 +32,7 @@
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/tlb.h> 33#include <asm/tlb.h>
34 34
35#include "mmu_decl.h" 35#include <mm/mmu_decl.h>
36 36
37/* 37/*
38 * Called when unmapping pages to flush entries from the TLB/hash table. 38 * Called when unmapping pages to flush entries from the TLB/hash table.
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
new file mode 100644
index 000000000000..974b4fc19f4f
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -0,0 +1,24 @@
1# SPDX-License-Identifier: GPL-2.0
2
3ccflags-y := $(NO_MINIMAL_TOC)
4
5CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
6
7obj-y += hash_pgtable.o hash_utils.o slb.o \
8 mmu_context.o pgtable.o hash_tlb.o
9obj-$(CONFIG_PPC_NATIVE) += hash_native.o
10obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
11obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
12obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
13obj-$(CONFIG_PPC_SPLPAR) += vphn.o
14obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
15ifdef CONFIG_HUGETLB_PAGE
16obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
17endif
18obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
19obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
20obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o
21obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
22
23# Instrumenting the SLB fault path can lead to duplicate SLB entries
24KCOV_INSTRUMENT_slb.o := n
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 6fa6765a10eb..22e787123cdf 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright IBM Corporation, 2015 2 * Copyright IBM Corporation, 2015
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2 of the GNU Lesser General Public License 6 * under the terms of version 2 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 3afa253d7f52..7084ce2951e6 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright IBM Corporation, 2015 2 * Copyright IBM Corporation, 2015
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2 of the GNU Lesser General Public License 6 * under the terms of version 2 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugepage.c
index dfbc3b32f09b..440823797de7 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_hugepage.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright IBM Corporation, 2013 2 * Copyright IBM Corporation, 2013
3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License 6 * under the terms of version 2.1 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
index b0d9209d9a86..eefa89c6117b 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
@@ -15,6 +15,9 @@
15#include <asm/cacheflush.h> 15#include <asm/cacheflush.h>
16#include <asm/machdep.h> 16#include <asm/machdep.h>
17 17
18unsigned int hpage_shift;
19EXPORT_SYMBOL(hpage_shift);
20
18extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, 21extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
19 unsigned long pa, unsigned long rlags, 22 unsigned long pa, unsigned long rlags,
20 unsigned long vflags, int psize, int ssize); 23 unsigned long vflags, int psize, int ssize);
@@ -34,7 +37,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
34 /* Search the Linux page table for a match with va */ 37 /* Search the Linux page table for a match with va */
35 vpn = hpt_vpn(ea, vsid, ssize); 38 vpn = hpt_vpn(ea, vsid, ssize);
36 39
37 /* At this point, we have a pte (old_pte) which can be used to build 40 /*
41 * At this point, we have a pte (old_pte) which can be used to build
38 * or update an HPTE. There are 2 cases: 42 * or update an HPTE. There are 2 cases:
39 * 43 *
40 * 1. There is a valid (present) pte with no associated HPTE (this is 44 * 1. There is a valid (present) pte with no associated HPTE (this is
@@ -55,8 +59,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
55 if (unlikely(!check_pte_access(access, old_pte))) 59 if (unlikely(!check_pte_access(access, old_pte)))
56 return 1; 60 return 1;
57 61
58 /* Try to lock the PTE, add ACCESSED and DIRTY if it was 62 /*
59 * a write access */ 63 * Try to lock the PTE, add ACCESSED and DIRTY if it was
64 * a write access
65 */
60 new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; 66 new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
61 if (access & _PAGE_WRITE) 67 if (access & _PAGE_WRITE)
62 new_pte |= _PAGE_DIRTY; 68 new_pte |= _PAGE_DIRTY;
@@ -74,8 +80,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
74 rpte = __real_pte(__pte(old_pte), ptep, offset); 80 rpte = __real_pte(__pte(old_pte), ptep, offset);
75 81
76 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 82 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
77 /* No CPU has hugepages but lacks no execute, so we 83 /*
78 * don't need to worry about that case */ 84 * No CPU has hugepages but lacks no execute, so we
85 * don't need to worry about that case
86 */
79 rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); 87 rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
80 88
81 /* Check if pte already has an hpte (case 2) */ 89 /* Check if pte already has an hpte (case 2) */
@@ -145,3 +153,16 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr
145 old_pte, pte); 153 old_pte, pte);
146 set_huge_pte_at(vma->vm_mm, addr, ptep, pte); 154 set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
147} 155}
156
157void hugetlbpage_init_default(void)
158{
159 /* Set default large page size. Currently, we pick 16M or 1M
160 * depending on what is available
161 */
162 if (mmu_psize_defs[MMU_PAGE_16M].shift)
163 hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
164 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
165 hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
166 else if (mmu_psize_defs[MMU_PAGE_2M].shift)
167 hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;
168}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/book3s64/hash_native.c
index aaa28fd918fe..aaa28fd918fe 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index c08d49046a96..1fd025dba4a3 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -19,7 +19,7 @@
19#include <asm/mmu.h> 19#include <asm/mmu.h>
20#include <asm/tlb.h> 20#include <asm/tlb.h>
21 21
22#include "mmu_decl.h" 22#include <mm/mmu_decl.h>
23 23
24#define CREATE_TRACE_POINTS 24#define CREATE_TRACE_POINTS
25#include <trace/events/thp.h> 25#include <trace/events/thp.h>
@@ -112,9 +112,16 @@ int __meminit hash__vmemmap_create_mapping(unsigned long start,
112 unsigned long page_size, 112 unsigned long page_size,
113 unsigned long phys) 113 unsigned long phys)
114{ 114{
115 int rc = htab_bolt_mapping(start, start + page_size, phys, 115 int rc;
116 pgprot_val(PAGE_KERNEL), 116
117 mmu_vmemmap_psize, mmu_kernel_ssize); 117 if ((start + page_size) >= H_VMEMMAP_END) {
118 pr_warn("Outside the supported range\n");
119 return -1;
120 }
121
122 rc = htab_bolt_mapping(start, start + page_size, phys,
123 pgprot_val(PAGE_KERNEL),
124 mmu_vmemmap_psize, mmu_kernel_ssize);
118 if (rc < 0) { 125 if (rc < 0) {
119 int rc2 = htab_remove_mapping(start, start + page_size, 126 int rc2 = htab_remove_mapping(start, start + page_size,
120 mmu_vmemmap_psize, 127 mmu_vmemmap_psize,
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index 87d71dd25441..d4f0101447b1 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -55,7 +55,8 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
55 55
56 i = batch->index; 56 i = batch->index;
57 57
58 /* Get page size (maybe move back to caller). 58 /*
59 * Get page size (maybe move back to caller).
59 * 60 *
60 * NOTE: when using special 64K mappings in 4K environment like 61 * NOTE: when using special 64K mappings in 4K environment like
61 * for SPEs, we obtain the page size from the slice, which thus 62 * for SPEs, we obtain the page size from the slice, which thus
@@ -77,10 +78,12 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
77#endif 78#endif
78 } else { 79 } else {
79 psize = pte_pagesize_index(mm, addr, pte); 80 psize = pte_pagesize_index(mm, addr, pte);
80 /* Mask the address for the standard page size. If we 81 /*
82 * Mask the address for the standard page size. If we
81 * have a 64k page kernel, but the hardware does not 83 * have a 64k page kernel, but the hardware does not
82 * support 64k pages, this might be different from the 84 * support 64k pages, this might be different from the
83 * hardware page size encoded in the slice table. */ 85 * hardware page size encoded in the slice table.
86 */
84 addr &= PAGE_MASK; 87 addr &= PAGE_MASK;
85 offset = PTRS_PER_PTE; 88 offset = PTRS_PER_PTE;
86 } 89 }
@@ -161,7 +164,8 @@ void hash__tlb_flush(struct mmu_gather *tlb)
161{ 164{
162 struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch); 165 struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
163 166
164 /* If there's a TLB batch pending, then we must flush it because the 167 /*
168 * If there's a TLB batch pending, then we must flush it because the
165 * pages are going to be freed and we really don't want to have a CPU 169 * pages are going to be freed and we really don't want to have a CPU
166 * access a freed page because it has a stale TLB 170 * access a freed page because it has a stale TLB
167 */ 171 */
@@ -201,7 +205,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
201 205
202 BUG_ON(!mm->pgd); 206 BUG_ON(!mm->pgd);
203 207
204 /* Note: Normally, we should only ever use a batch within a 208 /*
209 * Note: Normally, we should only ever use a batch within a
205 * PTE locked section. This violates the rule, but will work 210 * PTE locked section. This violates the rule, but will work
206 * since we don't actually modify the PTEs, we just flush the 211 * since we don't actually modify the PTEs, we just flush the
207 * hash while leaving the PTEs intact (including their reference 212 * hash while leaving the PTEs intact (including their reference
@@ -238,7 +243,8 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
238 unsigned long flags; 243 unsigned long flags;
239 244
240 addr = _ALIGN_DOWN(addr, PMD_SIZE); 245 addr = _ALIGN_DOWN(addr, PMD_SIZE);
241 /* Note: Normally, we should only ever use a batch within a 246 /*
247 * Note: Normally, we should only ever use a batch within a
242 * PTE locked section. This violates the rule, but will work 248 * PTE locked section. This violates the rule, but will work
243 * since we don't actually modify the PTEs, we just flush the 249 * since we don't actually modify the PTEs, we just flush the
244 * hash while leaving the PTEs intact (including their reference 250 * hash while leaving the PTEs intact (including their reference
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 0a4f939a8161..919a861a8ec0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -37,6 +37,7 @@
37#include <linux/context_tracking.h> 37#include <linux/context_tracking.h>
38#include <linux/libfdt.h> 38#include <linux/libfdt.h>
39#include <linux/pkeys.h> 39#include <linux/pkeys.h>
40#include <linux/hugetlb.h>
40 41
41#include <asm/debugfs.h> 42#include <asm/debugfs.h>
42#include <asm/processor.h> 43#include <asm/processor.h>
@@ -65,6 +66,8 @@
65#include <asm/pte-walk.h> 66#include <asm/pte-walk.h>
66#include <asm/asm-prototypes.h> 67#include <asm/asm-prototypes.h>
67 68
69#include <mm/mmu_decl.h>
70
68#ifdef DEBUG 71#ifdef DEBUG
69#define DBG(fmt...) udbg_printf(fmt) 72#define DBG(fmt...) udbg_printf(fmt)
70#else 73#else
@@ -128,7 +131,8 @@ static DEFINE_SPINLOCK(linear_map_hash_lock);
128struct mmu_hash_ops mmu_hash_ops; 131struct mmu_hash_ops mmu_hash_ops;
129EXPORT_SYMBOL(mmu_hash_ops); 132EXPORT_SYMBOL(mmu_hash_ops);
130 133
131/* There are definitions of page sizes arrays to be used when none 134/*
135 * These are definitions of page sizes arrays to be used when none
132 * is provided by the firmware. 136 * is provided by the firmware.
133 */ 137 */
134 138
@@ -145,7 +149,8 @@ static struct mmu_psize_def mmu_psize_defaults[] = {
145 }, 149 },
146}; 150};
147 151
148/* POWER4, GPUL, POWER5 152/*
153 * POWER4, GPUL, POWER5
149 * 154 *
150 * Support for 16Mb large pages 155 * Support for 16Mb large pages
151 */ 156 */
@@ -479,7 +484,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
479} 484}
480 485
481#ifdef CONFIG_HUGETLB_PAGE 486#ifdef CONFIG_HUGETLB_PAGE
482/* Scan for 16G memory blocks that have been set aside for huge pages 487/*
488 * Scan for 16G memory blocks that have been set aside for huge pages
483 * and reserve those blocks for 16G huge pages. 489 * and reserve those blocks for 16G huge pages.
484 */ 490 */
485static int __init htab_dt_scan_hugepage_blocks(unsigned long node, 491static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
@@ -496,8 +502,10 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
496 if (type == NULL || strcmp(type, "memory") != 0) 502 if (type == NULL || strcmp(type, "memory") != 0)
497 return 0; 503 return 0;
498 504
499 /* This property is the log base 2 of the number of virtual pages that 505 /*
500 * will represent this memory block. */ 506 * This property is the log base 2 of the number of virtual pages that
507 * will represent this memory block.
508 */
501 page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); 509 page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
502 if (page_count_prop == NULL) 510 if (page_count_prop == NULL)
503 return 0; 511 return 0;
@@ -673,7 +681,8 @@ static void __init htab_init_page_sizes(void)
673#endif /* CONFIG_PPC_64K_PAGES */ 681#endif /* CONFIG_PPC_64K_PAGES */
674 682
675#ifdef CONFIG_SPARSEMEM_VMEMMAP 683#ifdef CONFIG_SPARSEMEM_VMEMMAP
676 /* We try to use 16M pages for vmemmap if that is supported 684 /*
685 * We try to use 16M pages for vmemmap if that is supported
677 * and we have at least 1G of RAM at boot 686 * and we have at least 1G of RAM at boot
678 */ 687 */
679 if (mmu_psize_defs[MMU_PAGE_16M].shift && 688 if (mmu_psize_defs[MMU_PAGE_16M].shift &&
@@ -742,7 +751,8 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size)
742 751
743static unsigned long __init htab_get_table_size(void) 752static unsigned long __init htab_get_table_size(void)
744{ 753{
745 /* If hash size isn't already provided by the platform, we try to 754 /*
755 * If hash size isn't already provided by the platform, we try to
746 * retrieve it from the device-tree. If it's not there neither, we 756 * retrieve it from the device-tree. If it's not there neither, we
747 * calculate it now based on the total RAM size 757 * calculate it now based on the total RAM size
748 */ 758 */
@@ -755,12 +765,12 @@ static unsigned long __init htab_get_table_size(void)
755} 765}
756 766
757#ifdef CONFIG_MEMORY_HOTPLUG 767#ifdef CONFIG_MEMORY_HOTPLUG
758void resize_hpt_for_hotplug(unsigned long new_mem_size) 768int resize_hpt_for_hotplug(unsigned long new_mem_size)
759{ 769{
760 unsigned target_hpt_shift; 770 unsigned target_hpt_shift;
761 771
762 if (!mmu_hash_ops.resize_hpt) 772 if (!mmu_hash_ops.resize_hpt)
763 return; 773 return 0;
764 774
765 target_hpt_shift = htab_shift_for_mem_size(new_mem_size); 775 target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
766 776
@@ -772,23 +782,25 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
772 * reduce unless the target shift is at least 2 below the 782 * reduce unless the target shift is at least 2 below the
773 * current shift 783 * current shift
774 */ 784 */
775 if ((target_hpt_shift > ppc64_pft_size) 785 if (target_hpt_shift > ppc64_pft_size ||
776 || (target_hpt_shift < (ppc64_pft_size - 1))) { 786 target_hpt_shift < ppc64_pft_size - 1)
777 int rc; 787 return mmu_hash_ops.resize_hpt(target_hpt_shift);
778 788
779 rc = mmu_hash_ops.resize_hpt(target_hpt_shift); 789 return 0;
780 if (rc && (rc != -ENODEV))
781 printk(KERN_WARNING
782 "Unable to resize hash page table to target order %d: %d\n",
783 target_hpt_shift, rc);
784 }
785} 790}
786 791
787int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) 792int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
788{ 793{
789 int rc = htab_bolt_mapping(start, end, __pa(start), 794 int rc;
790 pgprot_val(PAGE_KERNEL), mmu_linear_psize, 795
791 mmu_kernel_ssize); 796 if (end >= H_VMALLOC_START) {
797 pr_warn("Outside the supported range\n");
798 return -1;
799 }
800
801 rc = htab_bolt_mapping(start, end, __pa(start),
802 pgprot_val(PAGE_KERNEL), mmu_linear_psize,
803 mmu_kernel_ssize);
792 804
793 if (rc < 0) { 805 if (rc < 0) {
794 int rc2 = htab_remove_mapping(start, end, mmu_linear_psize, 806 int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
@@ -929,6 +941,11 @@ static void __init htab_initialize(void)
929 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", 941 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
930 base, size, prot); 942 base, size, prot);
931 943
944 if ((base + size) >= H_VMALLOC_START) {
945 pr_warn("Outside the supported range\n");
946 continue;
947 }
948
932 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), 949 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
933 prot, mmu_linear_psize, mmu_kernel_ssize)); 950 prot, mmu_linear_psize, mmu_kernel_ssize));
934 } 951 }
@@ -968,6 +985,7 @@ void __init hash__early_init_devtree(void)
968 htab_scan_page_sizes(); 985 htab_scan_page_sizes();
969} 986}
970 987
988struct hash_mm_context init_hash_mm_context;
971void __init hash__early_init_mmu(void) 989void __init hash__early_init_mmu(void)
972{ 990{
973#ifndef CONFIG_PPC_64K_PAGES 991#ifndef CONFIG_PPC_64K_PAGES
@@ -1013,11 +1031,11 @@ void __init hash__early_init_mmu(void)
1013 __pgd_val_bits = HASH_PGD_VAL_BITS; 1031 __pgd_val_bits = HASH_PGD_VAL_BITS;
1014 1032
1015 __kernel_virt_start = H_KERN_VIRT_START; 1033 __kernel_virt_start = H_KERN_VIRT_START;
1016 __kernel_virt_size = H_KERN_VIRT_SIZE;
1017 __vmalloc_start = H_VMALLOC_START; 1034 __vmalloc_start = H_VMALLOC_START;
1018 __vmalloc_end = H_VMALLOC_END; 1035 __vmalloc_end = H_VMALLOC_END;
1019 __kernel_io_start = H_KERN_IO_START; 1036 __kernel_io_start = H_KERN_IO_START;
1020 vmemmap = (struct page *)H_VMEMMAP_BASE; 1037 __kernel_io_end = H_KERN_IO_END;
1038 vmemmap = (struct page *)H_VMEMMAP_START;
1021 ioremap_bot = IOREMAP_BASE; 1039 ioremap_bot = IOREMAP_BASE;
1022 1040
1023#ifdef CONFIG_PCI 1041#ifdef CONFIG_PCI
@@ -1035,12 +1053,16 @@ void __init hash__early_init_mmu(void)
1035 if (!mmu_hash_ops.hpte_insert) 1053 if (!mmu_hash_ops.hpte_insert)
1036 panic("hash__early_init_mmu: No MMU hash ops defined!\n"); 1054 panic("hash__early_init_mmu: No MMU hash ops defined!\n");
1037 1055
1038 /* Initialize the MMU Hash table and create the linear mapping 1056 /*
1057 * Initialize the MMU Hash table and create the linear mapping
1039 * of memory. Has to be done before SLB initialization as this is 1058 * of memory. Has to be done before SLB initialization as this is
1040 * currently where the page size encoding is obtained. 1059 * currently where the page size encoding is obtained.
1041 */ 1060 */
1042 htab_initialize(); 1061 htab_initialize();
1043 1062
1063 init_mm.context.hash_context = &init_hash_mm_context;
1064 mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
1065
1044 pr_info("Initializing hash mmu with SLB\n"); 1066 pr_info("Initializing hash mmu with SLB\n");
1045 /* Initialize SLB management */ 1067 /* Initialize SLB management */
1046 slb_initialize(); 1068 slb_initialize();
@@ -1147,10 +1169,13 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
1147 */ 1169 */
1148static int subpage_protection(struct mm_struct *mm, unsigned long ea) 1170static int subpage_protection(struct mm_struct *mm, unsigned long ea)
1149{ 1171{
1150 struct subpage_prot_table *spt = &mm->context.spt; 1172 struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
1151 u32 spp = 0; 1173 u32 spp = 0;
1152 u32 **sbpm, *sbpp; 1174 u32 **sbpm, *sbpp;
1153 1175
1176 if (!spt)
1177 return 0;
1178
1154 if (ea >= spt->maxaddr) 1179 if (ea >= spt->maxaddr)
1155 return 0; 1180 return 0;
1156 if (ea < 0x100000000UL) { 1181 if (ea < 0x100000000UL) {
@@ -1214,7 +1239,8 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
1214 } 1239 }
1215} 1240}
1216 1241
1217/* Result code is: 1242/*
1243 * Result code is:
1218 * 0 - handled 1244 * 0 - handled
1219 * 1 - normal page fault 1245 * 1 - normal page fault
1220 * -1 - critical hash insertion error 1246 * -1 - critical hash insertion error
@@ -1238,7 +1264,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1238 trace_hash_fault(ea, access, trap); 1264 trace_hash_fault(ea, access, trap);
1239 1265
1240 /* Get region & vsid */ 1266 /* Get region & vsid */
1241 switch (REGION_ID(ea)) { 1267 switch (get_region_id(ea)) {
1242 case USER_REGION_ID: 1268 case USER_REGION_ID:
1243 user_region = 1; 1269 user_region = 1;
1244 if (! mm) { 1270 if (! mm) {
@@ -1252,15 +1278,19 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1252 break; 1278 break;
1253 case VMALLOC_REGION_ID: 1279 case VMALLOC_REGION_ID:
1254 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); 1280 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
1255 if (ea < VMALLOC_END) 1281 psize = mmu_vmalloc_psize;
1256 psize = mmu_vmalloc_psize; 1282 ssize = mmu_kernel_ssize;
1257 else 1283 break;
1258 psize = mmu_io_psize; 1284
1285 case IO_REGION_ID:
1286 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
1287 psize = mmu_io_psize;
1259 ssize = mmu_kernel_ssize; 1288 ssize = mmu_kernel_ssize;
1260 break; 1289 break;
1261 default: 1290 default:
1262 /* Not a valid range 1291 /*
1263 * Send the problem up to do_page_fault 1292 * Not a valid range
1293 * Send the problem up to do_page_fault()
1264 */ 1294 */
1265 rc = 1; 1295 rc = 1;
1266 goto bail; 1296 goto bail;
@@ -1285,7 +1315,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1285 flags |= HPTE_LOCAL_UPDATE; 1315 flags |= HPTE_LOCAL_UPDATE;
1286 1316
1287#ifndef CONFIG_PPC_64K_PAGES 1317#ifndef CONFIG_PPC_64K_PAGES
1288 /* If we use 4K pages and our psize is not 4K, then we might 1318 /*
1319 * If we use 4K pages and our psize is not 4K, then we might
1289 * be hitting a special driver mapping, and need to align the 1320 * be hitting a special driver mapping, and need to align the
1290 * address before we fetch the PTE. 1321 * address before we fetch the PTE.
1291 * 1322 *
@@ -1307,7 +1338,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1307 /* Add _PAGE_PRESENT to the required access perm */ 1338 /* Add _PAGE_PRESENT to the required access perm */
1308 access |= _PAGE_PRESENT; 1339 access |= _PAGE_PRESENT;
1309 1340
1310 /* Pre-check access permissions (will be re-checked atomically 1341 /*
1342 * Pre-check access permissions (will be re-checked atomically
1311 * in __hash_page_XX but this pre-check is a fast path 1343 * in __hash_page_XX but this pre-check is a fast path
1312 */ 1344 */
1313 if (!check_pte_access(access, pte_val(*ptep))) { 1345 if (!check_pte_access(access, pte_val(*ptep))) {
@@ -1354,7 +1386,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1354 psize = MMU_PAGE_4K; 1386 psize = MMU_PAGE_4K;
1355 } 1387 }
1356 1388
1357 /* If this PTE is non-cacheable and we have restrictions on 1389 /*
1390 * If this PTE is non-cacheable and we have restrictions on
1358 * using non cacheable large pages, then we switch to 4k 1391 * using non cacheable large pages, then we switch to 4k
1359 */ 1392 */
1360 if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) { 1393 if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
@@ -1395,7 +1428,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1395 flags, ssize, spp); 1428 flags, ssize, spp);
1396 } 1429 }
1397 1430
1398 /* Dump some info in case of hash insertion failure, they should 1431 /*
1432 * Dump some info in case of hash insertion failure, they should
1399 * never happen so it is really useful to know if/when they do 1433 * never happen so it is really useful to know if/when they do
1400 */ 1434 */
1401 if (rc == -1) 1435 if (rc == -1)
@@ -1421,7 +1455,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
1421 unsigned long flags = 0; 1455 unsigned long flags = 0;
1422 struct mm_struct *mm = current->mm; 1456 struct mm_struct *mm = current->mm;
1423 1457
1424 if (REGION_ID(ea) == VMALLOC_REGION_ID) 1458 if ((get_region_id(ea) == VMALLOC_REGION_ID) ||
1459 (get_region_id(ea) == IO_REGION_ID))
1425 mm = &init_mm; 1460 mm = &init_mm;
1426 1461
1427 if (dsisr & DSISR_NOHPTE) 1462 if (dsisr & DSISR_NOHPTE)
@@ -1437,8 +1472,9 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
1437 unsigned long access = _PAGE_PRESENT | _PAGE_READ; 1472 unsigned long access = _PAGE_PRESENT | _PAGE_READ;
1438 unsigned long flags = 0; 1473 unsigned long flags = 0;
1439 struct mm_struct *mm = current->mm; 1474 struct mm_struct *mm = current->mm;
1475 unsigned int region_id = get_region_id(ea);
1440 1476
1441 if (REGION_ID(ea) == VMALLOC_REGION_ID) 1477 if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
1442 mm = &init_mm; 1478 mm = &init_mm;
1443 1479
1444 if (dsisr & DSISR_NOHPTE) 1480 if (dsisr & DSISR_NOHPTE)
@@ -1455,7 +1491,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
1455 * 2) user space access kernel space. 1491 * 2) user space access kernel space.
1456 */ 1492 */
1457 access |= _PAGE_PRIVILEGED; 1493 access |= _PAGE_PRIVILEGED;
1458 if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) 1494 if ((msr & MSR_PR) || (region_id == USER_REGION_ID))
1459 access &= ~_PAGE_PRIVILEGED; 1495 access &= ~_PAGE_PRIVILEGED;
1460 1496
1461 if (trap == 0x400) 1497 if (trap == 0x400)
@@ -1470,7 +1506,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
1470 int psize = get_slice_psize(mm, ea); 1506 int psize = get_slice_psize(mm, ea);
1471 1507
1472 /* We only prefault standard pages for now */ 1508 /* We only prefault standard pages for now */
1473 if (unlikely(psize != mm->context.user_psize)) 1509 if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
1474 return false; 1510 return false;
1475 1511
1476 /* 1512 /*
@@ -1499,7 +1535,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1499 int rc, ssize, update_flags = 0; 1535 int rc, ssize, update_flags = 0;
1500 unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0); 1536 unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
1501 1537
1502 BUG_ON(REGION_ID(ea) != USER_REGION_ID); 1538 BUG_ON(get_region_id(ea) != USER_REGION_ID);
1503 1539
1504 if (!should_hash_preload(mm, ea)) 1540 if (!should_hash_preload(mm, ea))
1505 return; 1541 return;
@@ -1549,7 +1585,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1549 1585
1550 /* Hash it in */ 1586 /* Hash it in */
1551#ifdef CONFIG_PPC_64K_PAGES 1587#ifdef CONFIG_PPC_64K_PAGES
1552 if (mm->context.user_psize == MMU_PAGE_64K) 1588 if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
1553 rc = __hash_page_64K(ea, access, vsid, ptep, trap, 1589 rc = __hash_page_64K(ea, access, vsid, ptep, trap,
1554 update_flags, ssize); 1590 update_flags, ssize);
1555 else 1591 else
@@ -1562,8 +1598,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1562 */ 1598 */
1563 if (rc == -1) 1599 if (rc == -1)
1564 hash_failure_debug(ea, access, vsid, trap, ssize, 1600 hash_failure_debug(ea, access, vsid, trap, ssize,
1565 mm->context.user_psize, 1601 mm_ctx_user_psize(&mm->context),
1566 mm->context.user_psize, 1602 mm_ctx_user_psize(&mm->context),
1567 pte_val(*ptep)); 1603 pte_val(*ptep));
1568out_exit: 1604out_exit:
1569 local_irq_restore(flags); 1605 local_irq_restore(flags);
@@ -1634,7 +1670,8 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
1634 return gslot; 1670 return gslot;
1635} 1671}
1636 1672
1637/* WARNING: This is called from hash_low_64.S, if you change this prototype, 1673/*
1674 * WARNING: This is called from hash_low_64.S, if you change this prototype,
1638 * do not forget to update the assembly call site ! 1675 * do not forget to update the assembly call site !
1639 */ 1676 */
1640void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, 1677void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
@@ -1855,7 +1892,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
1855void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, 1892void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
1856 phys_addr_t first_memblock_size) 1893 phys_addr_t first_memblock_size)
1857{ 1894{
1858 /* We don't currently support the first MEMBLOCK not mapping 0 1895 /*
1896 * We don't currently support the first MEMBLOCK not mapping 0
1859 * physical on those processors 1897 * physical on those processors
1860 */ 1898 */
1861 BUG_ON(first_memblock_base != 0); 1899 BUG_ON(first_memblock_base != 0);
@@ -1909,3 +1947,14 @@ static int __init hash64_debugfs(void)
1909} 1947}
1910machine_device_initcall(pseries, hash64_debugfs); 1948machine_device_initcall(pseries, hash64_debugfs);
1911#endif /* CONFIG_DEBUG_FS */ 1949#endif /* CONFIG_DEBUG_FS */
1950
1951void __init print_system_hash_info(void)
1952{
1953 pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
1954
1955 if (htab_hash_mask)
1956 pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
1957 pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START);
1958 pr_info("kernel IO start = 0x%lx\n", KERN_IO_START);
1959 pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
1960}
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 8330f135294f..8330f135294f 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/book3s64/mmu_context.c
index f720c5cc0b5e..cb2b08635508 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -63,6 +63,13 @@ static int hash__init_new_context(struct mm_struct *mm)
63 if (index < 0) 63 if (index < 0)
64 return index; 64 return index;
65 65
66 mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
67 GFP_KERNEL);
68 if (!mm->context.hash_context) {
69 ida_free(&mmu_context_ida, index);
70 return -ENOMEM;
71 }
72
66 /* 73 /*
67 * The old code would re-promote on fork, we don't do that when using 74 * The old code would re-promote on fork, we don't do that when using
68 * slices as it could cause problem promoting slices that have been 75 * slices as it could cause problem promoting slices that have been
@@ -77,10 +84,26 @@ static int hash__init_new_context(struct mm_struct *mm)
77 * We should not be calling init_new_context() on init_mm. Hence a 84 * We should not be calling init_new_context() on init_mm. Hence a
78 * check against 0 is OK. 85 * check against 0 is OK.
79 */ 86 */
80 if (mm->context.id == 0) 87 if (mm->context.id == 0) {
88 memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
81 slice_init_new_context_exec(mm); 89 slice_init_new_context_exec(mm);
90 } else {
91 /* This is fork. Copy hash_context details from current->mm */
92 memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
93#ifdef CONFIG_PPC_SUBPAGE_PROT
94 /* inherit subpage prot detalis if we have one. */
95 if (current->mm->context.hash_context->spt) {
96 mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
97 GFP_KERNEL);
98 if (!mm->context.hash_context->spt) {
99 ida_free(&mmu_context_ida, index);
100 kfree(mm->context.hash_context);
101 return -ENOMEM;
102 }
103 }
104#endif
82 105
83 subpage_prot_init_new_context(mm); 106 }
84 107
85 pkey_mm_init(mm); 108 pkey_mm_init(mm);
86 return index; 109 return index;
@@ -118,6 +141,7 @@ static int radix__init_new_context(struct mm_struct *mm)
118 asm volatile("ptesync;isync" : : : "memory"); 141 asm volatile("ptesync;isync" : : : "memory");
119 142
120 mm->context.npu_context = NULL; 143 mm->context.npu_context = NULL;
144 mm->context.hash_context = NULL;
121 145
122 return index; 146 return index;
123} 147}
@@ -162,6 +186,7 @@ static void destroy_contexts(mm_context_t *ctx)
162 if (context_id) 186 if (context_id)
163 ida_free(&mmu_context_ida, context_id); 187 ida_free(&mmu_context_ida, context_id);
164 } 188 }
189 kfree(ctx->hash_context);
165} 190}
166 191
167static void pmd_frag_destroy(void *pmd_frag) 192static void pmd_frag_destroy(void *pmd_frag)
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/book3s64/pgtable.c
index a4341aba0af4..16bda049187a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -17,7 +17,7 @@
17#include <asm/trace.h> 17#include <asm/trace.h>
18#include <asm/powernv.h> 18#include <asm/powernv.h>
19 19
20#include "mmu_decl.h" 20#include <mm/mmu_decl.h>
21#include <trace/events/thp.h> 21#include <trace/events/thp.h>
22 22
23unsigned long __pmd_frag_nr; 23unsigned long __pmd_frag_nr;
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 587807763737..ae7fca40e5b3 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -7,6 +7,7 @@
7 7
8#include <asm/mman.h> 8#include <asm/mman.h>
9#include <asm/mmu_context.h> 9#include <asm/mmu_context.h>
10#include <asm/mmu.h>
10#include <asm/setup.h> 11#include <asm/setup.h>
11#include <linux/pkeys.h> 12#include <linux/pkeys.h>
12#include <linux/of_device.h> 13#include <linux/of_device.h>
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index cab06331c0c0..cab06331c0c0 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 154472a28c77..c9bcf428dd2b 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -29,6 +29,7 @@
29#include <asm/powernv.h> 29#include <asm/powernv.h>
30#include <asm/sections.h> 30#include <asm/sections.h>
31#include <asm/trace.h> 31#include <asm/trace.h>
32#include <asm/uaccess.h>
32 33
33#include <trace/events/thp.h> 34#include <trace/events/thp.h>
34 35
@@ -135,6 +136,10 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa,
135 */ 136 */
136 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); 137 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
137 138
139#ifdef CONFIG_PPC_64K_PAGES
140 BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
141#endif
142
138 if (unlikely(!slab_is_available())) 143 if (unlikely(!slab_is_available()))
139 return early_map_kernel_page(ea, pa, flags, map_page_size, 144 return early_map_kernel_page(ea, pa, flags, map_page_size,
140 nid, region_start, region_end); 145 nid, region_start, region_end);
@@ -334,6 +339,12 @@ void __init radix_init_pgtable(void)
334 * page tables will be allocated within the range. No 339 * page tables will be allocated within the range. No
335 * need or a node (which we don't have yet). 340 * need or a node (which we don't have yet).
336 */ 341 */
342
343 if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
344 pr_warn("Outside the supported range\n");
345 continue;
346 }
347
337 WARN_ON(create_physical_mapping(reg->base, 348 WARN_ON(create_physical_mapping(reg->base,
338 reg->base + reg->size, 349 reg->base + reg->size,
339 -1)); 350 -1));
@@ -531,8 +542,15 @@ static void radix_init_amor(void)
531 mtspr(SPRN_AMOR, (3ul << 62)); 542 mtspr(SPRN_AMOR, (3ul << 62));
532} 543}
533 544
534static void radix_init_iamr(void) 545#ifdef CONFIG_PPC_KUEP
546void setup_kuep(bool disabled)
535{ 547{
548 if (disabled || !early_radix_enabled())
549 return;
550
551 if (smp_processor_id() == boot_cpuid)
552 pr_info("Activating Kernel Userspace Execution Prevention\n");
553
536 /* 554 /*
537 * Radix always uses key0 of the IAMR to determine if an access is 555 * Radix always uses key0 of the IAMR to determine if an access is
538 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction 556 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
@@ -540,6 +558,25 @@ static void radix_init_iamr(void)
540 */ 558 */
541 mtspr(SPRN_IAMR, (1ul << 62)); 559 mtspr(SPRN_IAMR, (1ul << 62));
542} 560}
561#endif
562
563#ifdef CONFIG_PPC_KUAP
564void setup_kuap(bool disabled)
565{
566 if (disabled || !early_radix_enabled())
567 return;
568
569 if (smp_processor_id() == boot_cpuid) {
570 pr_info("Activating Kernel Userspace Access Prevention\n");
571 cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
572 }
573
574 /* Make sure userspace can't change the AMR */
575 mtspr(SPRN_UAMOR, 0);
576 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
577 isync();
578}
579#endif
543 580
544void __init radix__early_init_mmu(void) 581void __init radix__early_init_mmu(void)
545{ 582{
@@ -574,11 +611,11 @@ void __init radix__early_init_mmu(void)
574 __pgd_val_bits = RADIX_PGD_VAL_BITS; 611 __pgd_val_bits = RADIX_PGD_VAL_BITS;
575 612
576 __kernel_virt_start = RADIX_KERN_VIRT_START; 613 __kernel_virt_start = RADIX_KERN_VIRT_START;
577 __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
578 __vmalloc_start = RADIX_VMALLOC_START; 614 __vmalloc_start = RADIX_VMALLOC_START;
579 __vmalloc_end = RADIX_VMALLOC_END; 615 __vmalloc_end = RADIX_VMALLOC_END;
580 __kernel_io_start = RADIX_KERN_IO_START; 616 __kernel_io_start = RADIX_KERN_IO_START;
581 vmemmap = (struct page *)RADIX_VMEMMAP_BASE; 617 __kernel_io_end = RADIX_KERN_IO_END;
618 vmemmap = (struct page *)RADIX_VMEMMAP_START;
582 ioremap_bot = IOREMAP_BASE; 619 ioremap_bot = IOREMAP_BASE;
583 620
584#ifdef CONFIG_PCI 621#ifdef CONFIG_PCI
@@ -601,7 +638,6 @@ void __init radix__early_init_mmu(void)
601 638
602 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); 639 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
603 640
604 radix_init_iamr();
605 radix_init_pgtable(); 641 radix_init_pgtable();
606 /* Switch to the guard PID before turning on MMU */ 642 /* Switch to the guard PID before turning on MMU */
607 radix__switch_mmu_context(NULL, &init_mm); 643 radix__switch_mmu_context(NULL, &init_mm);
@@ -623,7 +659,6 @@ void radix__early_init_mmu_secondary(void)
623 __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); 659 __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
624 radix_init_amor(); 660 radix_init_amor();
625 } 661 }
626 radix_init_iamr();
627 662
628 radix__switch_mmu_context(NULL, &init_mm); 663 radix__switch_mmu_context(NULL, &init_mm);
629 if (cpu_has_feature(CPU_FTR_HVMODE)) 664 if (cpu_has_feature(CPU_FTR_HVMODE))
@@ -646,7 +681,8 @@ void radix__mmu_cleanup_all(void)
646void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, 681void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
647 phys_addr_t first_memblock_size) 682 phys_addr_t first_memblock_size)
648{ 683{
649 /* We don't currently support the first MEMBLOCK not mapping 0 684 /*
685 * We don't currently support the first MEMBLOCK not mapping 0
650 * physical on those processors 686 * physical on those processors
651 */ 687 */
652 BUG_ON(first_memblock_base != 0); 688 BUG_ON(first_memblock_base != 0);
@@ -866,6 +902,11 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
866 902
867int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) 903int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
868{ 904{
905 if (end >= RADIX_VMALLOC_START) {
906 pr_warn("Outside the supported range\n");
907 return -1;
908 }
909
869 return create_physical_mapping(start, end, nid); 910 return create_physical_mapping(start, end, nid);
870} 911}
871 912
@@ -893,6 +934,11 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
893 int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); 934 int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
894 int ret; 935 int ret;
895 936
937 if ((start + page_size) >= RADIX_VMEMMAP_END) {
938 pr_warn("Outside the supported range\n");
939 return -1;
940 }
941
896 ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid); 942 ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
897 BUG_ON(ret); 943 BUG_ON(ret);
898 944
@@ -958,45 +1004,44 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
958void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 1004void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
959 pgtable_t pgtable) 1005 pgtable_t pgtable)
960{ 1006{
961 struct list_head *lh = (struct list_head *) pgtable; 1007 struct list_head *lh = (struct list_head *) pgtable;
962 1008
963 assert_spin_locked(pmd_lockptr(mm, pmdp)); 1009 assert_spin_locked(pmd_lockptr(mm, pmdp));
964 1010
965 /* FIFO */ 1011 /* FIFO */
966 if (!pmd_huge_pte(mm, pmdp)) 1012 if (!pmd_huge_pte(mm, pmdp))
967 INIT_LIST_HEAD(lh); 1013 INIT_LIST_HEAD(lh);
968 else 1014 else
969 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 1015 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
970 pmd_huge_pte(mm, pmdp) = pgtable; 1016 pmd_huge_pte(mm, pmdp) = pgtable;
971} 1017}
972 1018
973pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 1019pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
974{ 1020{
975 pte_t *ptep; 1021 pte_t *ptep;
976 pgtable_t pgtable; 1022 pgtable_t pgtable;
977 struct list_head *lh; 1023 struct list_head *lh;
978
979 assert_spin_locked(pmd_lockptr(mm, pmdp));
980
981 /* FIFO */
982 pgtable = pmd_huge_pte(mm, pmdp);
983 lh = (struct list_head *) pgtable;
984 if (list_empty(lh))
985 pmd_huge_pte(mm, pmdp) = NULL;
986 else {
987 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
988 list_del(lh);
989 }
990 ptep = (pte_t *) pgtable;
991 *ptep = __pte(0);
992 ptep++;
993 *ptep = __pte(0);
994 return pgtable;
995}
996 1024
1025 assert_spin_locked(pmd_lockptr(mm, pmdp));
1026
1027 /* FIFO */
1028 pgtable = pmd_huge_pte(mm, pmdp);
1029 lh = (struct list_head *) pgtable;
1030 if (list_empty(lh))
1031 pmd_huge_pte(mm, pmdp) = NULL;
1032 else {
1033 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1034 list_del(lh);
1035 }
1036 ptep = (pte_t *) pgtable;
1037 *ptep = __pte(0);
1038 ptep++;
1039 *ptep = __pte(0);
1040 return pgtable;
1041}
997 1042
998pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, 1043pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
999 unsigned long addr, pmd_t *pmdp) 1044 unsigned long addr, pmd_t *pmdp)
1000{ 1045{
1001 pmd_t old_pmd; 1046 pmd_t old_pmd;
1002 unsigned long old; 1047 unsigned long old;
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 6a23b9ebd2a1..6a23b9ebd2a1 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 5986df48359b..c22742218bd3 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -554,7 +554,8 @@ void slb_initialize(void)
554 asm volatile("isync; slbia; isync":::"memory"); 554 asm volatile("isync; slbia; isync":::"memory");
555 create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX); 555 create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
556 556
557 /* For the boot cpu, we're running on the stack in init_thread_union, 557 /*
558 * For the boot cpu, we're running on the stack in init_thread_union,
558 * which is in the first segment of the linear mapping, and also 559 * which is in the first segment of the linear mapping, and also
559 * get_paca()->kstack hasn't been initialized yet. 560 * get_paca()->kstack hasn't been initialized yet.
560 * For secondary cpus, we need to bolt the kernel stack entry now. 561 * For secondary cpus, we need to bolt the kernel stack entry now.
@@ -691,10 +692,10 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
691 unsigned long flags; 692 unsigned long flags;
692 int ssize; 693 int ssize;
693 694
694 if (id == KERNEL_REGION_ID) { 695 if (id == LINEAR_MAP_REGION_ID) {
695 696
696 /* We only support upto MAX_PHYSMEM_BITS */ 697 /* We only support upto MAX_PHYSMEM_BITS */
697 if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS)) 698 if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS))
698 return -EFAULT; 699 return -EFAULT;
699 700
700 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; 701 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
@@ -702,20 +703,25 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
702#ifdef CONFIG_SPARSEMEM_VMEMMAP 703#ifdef CONFIG_SPARSEMEM_VMEMMAP
703 } else if (id == VMEMMAP_REGION_ID) { 704 } else if (id == VMEMMAP_REGION_ID) {
704 705
705 if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) 706 if (ea >= H_VMEMMAP_END)
706 return -EFAULT; 707 return -EFAULT;
707 708
708 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; 709 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
709#endif 710#endif
710 } else if (id == VMALLOC_REGION_ID) { 711 } else if (id == VMALLOC_REGION_ID) {
711 712
712 if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) 713 if (ea >= H_VMALLOC_END)
713 return -EFAULT; 714 return -EFAULT;
714 715
715 if (ea < H_VMALLOC_END) 716 flags = local_paca->vmalloc_sllp;
716 flags = local_paca->vmalloc_sllp; 717
717 else 718 } else if (id == IO_REGION_ID) {
718 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; 719
720 if (ea >= H_KERN_IO_END)
721 return -EFAULT;
722
723 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
724
719 } else { 725 } else {
720 return -EFAULT; 726 return -EFAULT;
721 } 727 }
@@ -725,6 +731,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
725 ssize = MMU_SEGSIZE_256M; 731 ssize = MMU_SEGSIZE_256M;
726 732
727 context = get_kernel_context(ea); 733 context = get_kernel_context(ea);
734
728 return slb_insert_entry(ea, context, flags, ssize, true); 735 return slb_insert_entry(ea, context, flags, ssize, true);
729} 736}
730 737
@@ -739,7 +746,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
739 * consider this as bad access if we take a SLB miss 746 * consider this as bad access if we take a SLB miss
740 * on an address above addr limit. 747 * on an address above addr limit.
741 */ 748 */
742 if (ea >= mm->context.slb_addr_limit) 749 if (ea >= mm_ctx_slb_addr_limit(&mm->context))
743 return -EFAULT; 750 return -EFAULT;
744 751
745 context = get_user_context(&mm->context, ea); 752 context = get_user_context(&mm->context, ea);
@@ -761,7 +768,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
761 768
762long do_slb_fault(struct pt_regs *regs, unsigned long ea) 769long do_slb_fault(struct pt_regs *regs, unsigned long ea)
763{ 770{
764 unsigned long id = REGION_ID(ea); 771 unsigned long id = get_region_id(ea);
765 772
766 /* IRQs are not reconciled here, so can't check irqs_disabled */ 773 /* IRQs are not reconciled here, so can't check irqs_disabled */
767 VM_WARN_ON(mfmsr() & MSR_EE); 774 VM_WARN_ON(mfmsr() & MSR_EE);
@@ -784,7 +791,7 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
784 * first class kernel code. But for performance it's probably nicer 791 * first class kernel code. But for performance it's probably nicer
785 * if they go via fast_exception_return too. 792 * if they go via fast_exception_return too.
786 */ 793 */
787 if (id >= KERNEL_REGION_ID) { 794 if (id >= LINEAR_MAP_REGION_ID) {
788 long err; 795 long err;
789#ifdef CONFIG_DEBUG_VM 796#ifdef CONFIG_DEBUG_VM
790 /* Catch recursive kernel SLB faults. */ 797 /* Catch recursive kernel SLB faults. */
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 5e4178790dee..473dd430e306 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -25,10 +25,13 @@
25 */ 25 */
26void subpage_prot_free(struct mm_struct *mm) 26void subpage_prot_free(struct mm_struct *mm)
27{ 27{
28 struct subpage_prot_table *spt = &mm->context.spt; 28 struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
29 unsigned long i, j, addr; 29 unsigned long i, j, addr;
30 u32 **p; 30 u32 **p;
31 31
32 if (!spt)
33 return;
34
32 for (i = 0; i < 4; ++i) { 35 for (i = 0; i < 4; ++i) {
33 if (spt->low_prot[i]) { 36 if (spt->low_prot[i]) {
34 free_page((unsigned long)spt->low_prot[i]); 37 free_page((unsigned long)spt->low_prot[i]);
@@ -48,13 +51,7 @@ void subpage_prot_free(struct mm_struct *mm)
48 free_page((unsigned long)p); 51 free_page((unsigned long)p);
49 } 52 }
50 spt->maxaddr = 0; 53 spt->maxaddr = 0;
51} 54 kfree(spt);
52
53void subpage_prot_init_new_context(struct mm_struct *mm)
54{
55 struct subpage_prot_table *spt = &mm->context.spt;
56
57 memset(spt, 0, sizeof(*spt));
58} 55}
59 56
60static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, 57static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
@@ -93,13 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
93static void subpage_prot_clear(unsigned long addr, unsigned long len) 90static void subpage_prot_clear(unsigned long addr, unsigned long len)
94{ 91{
95 struct mm_struct *mm = current->mm; 92 struct mm_struct *mm = current->mm;
96 struct subpage_prot_table *spt = &mm->context.spt; 93 struct subpage_prot_table *spt;
97 u32 **spm, *spp; 94 u32 **spm, *spp;
98 unsigned long i; 95 unsigned long i;
99 size_t nw; 96 size_t nw;
100 unsigned long next, limit; 97 unsigned long next, limit;
101 98
102 down_write(&mm->mmap_sem); 99 down_write(&mm->mmap_sem);
100
101 spt = mm_ctx_subpage_prot(&mm->context);
102 if (!spt)
103 goto err_out;
104
103 limit = addr + len; 105 limit = addr + len;
104 if (limit > spt->maxaddr) 106 if (limit > spt->maxaddr)
105 limit = spt->maxaddr; 107 limit = spt->maxaddr;
@@ -127,6 +129,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
127 /* now flush any existing HPTEs for the range */ 129 /* now flush any existing HPTEs for the range */
128 hpte_flush_range(mm, addr, nw); 130 hpte_flush_range(mm, addr, nw);
129 } 131 }
132
133err_out:
130 up_write(&mm->mmap_sem); 134 up_write(&mm->mmap_sem);
131} 135}
132 136
@@ -189,7 +193,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
189 unsigned long, len, u32 __user *, map) 193 unsigned long, len, u32 __user *, map)
190{ 194{
191 struct mm_struct *mm = current->mm; 195 struct mm_struct *mm = current->mm;
192 struct subpage_prot_table *spt = &mm->context.spt; 196 struct subpage_prot_table *spt;
193 u32 **spm, *spp; 197 u32 **spm, *spp;
194 unsigned long i; 198 unsigned long i;
195 size_t nw; 199 size_t nw;
@@ -218,6 +222,21 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
218 return -EFAULT; 222 return -EFAULT;
219 223
220 down_write(&mm->mmap_sem); 224 down_write(&mm->mmap_sem);
225
226 spt = mm_ctx_subpage_prot(&mm->context);
227 if (!spt) {
228 /*
229 * Allocate subpage prot table if not already done.
230 * Do this with mmap_sem held
231 */
232 spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
233 if (!spt) {
234 err = -ENOMEM;
235 goto out;
236 }
237 mm->context.hash_context->spt = spt;
238 }
239
221 subpage_mark_vma_nohuge(mm, addr, len); 240 subpage_mark_vma_nohuge(mm, addr, len);
222 for (limit = addr + len; addr < limit; addr = next) { 241 for (limit = addr + len; addr < limit; addr = next) {
223 next = pmd_addr_end(addr, limit); 242 next = pmd_addr_end(addr, limit);
diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/book3s64/vphn.c
index f83044faac23..0ee7734afb50 100644
--- a/arch/powerpc/mm/vphn.c
+++ b/arch/powerpc/mm/book3s64/vphn.c
@@ -42,7 +42,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
42 u16 new = be16_to_cpup(field++); 42 u16 new = be16_to_cpup(field++);
43 43
44 if (is_32bit) { 44 if (is_32bit) {
45 /* Let's concatenate the 16 bits of this field to the 45 /*
46 * Let's concatenate the 16 bits of this field to the
46 * 15 lower bits of the previous field 47 * 15 lower bits of the previous field
47 */ 48 */
48 unpacked[++nr_assoc_doms] = 49 unpacked[++nr_assoc_doms] =
@@ -56,7 +57,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
56 unpacked[++nr_assoc_doms] = 57 unpacked[++nr_assoc_doms] =
57 cpu_to_be32(new & VPHN_FIELD_MASK); 58 cpu_to_be32(new & VPHN_FIELD_MASK);
58 } else { 59 } else {
59 /* Data is in the lower 15 bits of this field 60 /*
61 * Data is in the lower 15 bits of this field
60 * concatenated with the next 16 bit field 62 * concatenated with the next 16 bit field
61 */ 63 */
62 last = new; 64 last = new;
diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/book3s64/vphn.h
index f9ffdb3942fc..f0b93c2dd578 100644
--- a/arch/powerpc/mm/vphn.h
+++ b/arch/powerpc/mm/book3s64/vphn.h
@@ -2,8 +2,7 @@
2#ifndef _ARCH_POWERPC_MM_VPHN_H_ 2#ifndef _ARCH_POWERPC_MM_VPHN_H_
3#define _ARCH_POWERPC_MM_VPHN_H_ 3#define _ARCH_POWERPC_MM_VPHN_H_
4 4
5/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. 5/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
6 */
7#define VPHN_REGISTER_COUNT 6 6#define VPHN_REGISTER_COUNT 6
8 7
9/* 8/*
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index c8da352e8686..f137286740cb 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -105,7 +105,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
105 u64 vsid, vsidkey; 105 u64 vsid, vsidkey;
106 int psize, ssize; 106 int psize, ssize;
107 107
108 switch (REGION_ID(ea)) { 108 switch (get_region_id(ea)) {
109 case USER_REGION_ID: 109 case USER_REGION_ID:
110 pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); 110 pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
111 if (mm == NULL) 111 if (mm == NULL)
@@ -117,16 +117,20 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
117 break; 117 break;
118 case VMALLOC_REGION_ID: 118 case VMALLOC_REGION_ID:
119 pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); 119 pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
120 if (ea < VMALLOC_END) 120 psize = mmu_vmalloc_psize;
121 psize = mmu_vmalloc_psize;
122 else
123 psize = mmu_io_psize;
124 ssize = mmu_kernel_ssize; 121 ssize = mmu_kernel_ssize;
125 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); 122 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
126 vsidkey = SLB_VSID_KERNEL; 123 vsidkey = SLB_VSID_KERNEL;
127 break; 124 break;
128 case KERNEL_REGION_ID: 125 case IO_REGION_ID:
129 pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); 126 pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
127 psize = mmu_io_psize;
128 ssize = mmu_kernel_ssize;
129 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
130 vsidkey = SLB_VSID_KERNEL;
131 break;
132 case LINEAR_MAP_REGION_ID:
133 pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea);
130 psize = mmu_linear_psize; 134 psize = mmu_linear_psize;
131 ssize = mmu_kernel_ssize; 135 ssize = mmu_kernel_ssize;
132 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); 136 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index b5d2658c26af..2f6154b76328 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -36,7 +36,7 @@
36#include <asm/tlbflush.h> 36#include <asm/tlbflush.h>
37#include <asm/dma.h> 37#include <asm/dma.h>
38 38
39#include "mmu_decl.h" 39#include <mm/mmu_decl.h>
40 40
41/* 41/*
42 * This address range defaults to a value that is safe for all 42 * This address range defaults to a value that is safe for all
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 3f1803672c9b..641891df2046 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
366 if (!drmem_info->lmbs) 366 if (!drmem_info->lmbs)
367 return; 367 return;
368 368
369 for_each_drmem_lmb(lmb) 369 for_each_drmem_lmb(lmb) {
370 read_drconf_v1_cell(lmb, &prop); 370 read_drconf_v1_cell(lmb, &prop);
371 lmb_set_nid(lmb);
372 }
371} 373}
372 374
373static void __init init_drmem_v2_lmbs(const __be32 *prop) 375static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
412 414
413 lmb->aa_index = dr_cell.aa_index; 415 lmb->aa_index = dr_cell.aa_index;
414 lmb->flags = dr_cell.flags; 416 lmb->flags = dr_cell.flags;
417
418 lmb_set_nid(lmb);
415 } 419 }
416 } 420 }
417} 421}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 887f11bcf330..b5d3578d9f65 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -44,6 +44,7 @@
44#include <asm/mmu_context.h> 44#include <asm/mmu_context.h>
45#include <asm/siginfo.h> 45#include <asm/siginfo.h>
46#include <asm/debug.h> 46#include <asm/debug.h>
47#include <asm/kup.h>
47 48
48static inline bool notify_page_fault(struct pt_regs *regs) 49static inline bool notify_page_fault(struct pt_regs *regs)
49{ 50{
@@ -223,19 +224,46 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
223} 224}
224 225
225/* Is this a bad kernel fault ? */ 226/* Is this a bad kernel fault ? */
226static bool bad_kernel_fault(bool is_exec, unsigned long error_code, 227static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
227 unsigned long address) 228 unsigned long address, bool is_write)
228{ 229{
230 int is_exec = TRAP(regs) == 0x400;
231
229 /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ 232 /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
230 if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | 233 if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
231 DSISR_PROTFAULT))) { 234 DSISR_PROTFAULT))) {
232 printk_ratelimited(KERN_CRIT "kernel tried to execute" 235 pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
233 " exec-protected page (%lx) -" 236 address >= TASK_SIZE ? "exec-protected" : "user",
234 "exploit attempt? (uid: %d)\n", 237 address,
235 address, from_kuid(&init_user_ns, 238 from_kuid(&init_user_ns, current_uid()));
236 current_uid())); 239
240 // Kernel exec fault is always bad
241 return true;
237 } 242 }
238 return is_exec || (address >= TASK_SIZE); 243
244 if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
245 !search_exception_tables(regs->nip)) {
246 pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
247 address,
248 from_kuid(&init_user_ns, current_uid()));
249 }
250
251 // Kernel fault on kernel address is bad
252 if (address >= TASK_SIZE)
253 return true;
254
255 // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
256 if (!search_exception_tables(regs->nip))
257 return true;
258
259 // Read/write fault in a valid region (the exception table search passed
260 // above), but blocked by KUAP is bad, it can never succeed.
261 if (bad_kuap_fault(regs, is_write))
262 return true;
263
264 // What's left? Kernel fault on user in well defined regions (extable
265 // matched), and allowed by KUAP in the faulting context.
266 return false;
239} 267}
240 268
241static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, 269static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
@@ -455,9 +483,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
455 483
456 /* 484 /*
457 * The kernel should never take an execute fault nor should it 485 * The kernel should never take an execute fault nor should it
458 * take a page fault to a kernel address. 486 * take a page fault to a kernel address or a page fault to a user
487 * address outside of dedicated places
459 */ 488 */
460 if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address))) 489 if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
461 return SIGSEGV; 490 return SIGSEGV;
462 491
463 /* 492 /*
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index 82a0e37557a5..320c1672b2ae 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -43,9 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
43 type = kmap_atomic_idx_push(); 43 type = kmap_atomic_idx_push();
44 idx = type + KM_TYPE_NR*smp_processor_id(); 44 idx = type + KM_TYPE_NR*smp_processor_id();
45 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 45 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
46#ifdef CONFIG_DEBUG_HIGHMEM 46 WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx)));
47 BUG_ON(!pte_none(*(kmap_pte-idx)));
48#endif
49 __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1); 47 __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
50 local_flush_tlb_page(NULL, vaddr); 48 local_flush_tlb_page(NULL, vaddr);
51 49
@@ -56,7 +54,6 @@ EXPORT_SYMBOL(kmap_atomic_prot);
56void __kunmap_atomic(void *kvaddr) 54void __kunmap_atomic(void *kvaddr)
57{ 55{
58 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; 56 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
59 int type __maybe_unused;
60 57
61 if (vaddr < __fix_to_virt(FIX_KMAP_END)) { 58 if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
62 pagefault_enable(); 59 pagefault_enable();
@@ -64,14 +61,12 @@ void __kunmap_atomic(void *kvaddr)
64 return; 61 return;
65 } 62 }
66 63
67 type = kmap_atomic_idx(); 64 if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) {
68 65 int type = kmap_atomic_idx();
69#ifdef CONFIG_DEBUG_HIGHMEM
70 {
71 unsigned int idx; 66 unsigned int idx;
72 67
73 idx = type + KM_TYPE_NR * smp_processor_id(); 68 idx = type + KM_TYPE_NR * smp_processor_id();
74 BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); 69 WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
75 70
76 /* 71 /*
77 * force other mappings to Oops if they'll try to access 72 * force other mappings to Oops if they'll try to access
@@ -80,7 +75,6 @@ void __kunmap_atomic(void *kvaddr)
80 pte_clear(&init_mm, vaddr, kmap_pte-idx); 75 pte_clear(&init_mm, vaddr, kmap_pte-idx);
81 local_flush_tlb_page(NULL, vaddr); 76 local_flush_tlb_page(NULL, vaddr);
82 } 77 }
83#endif
84 78
85 kmap_atomic_idx_pop(); 79 kmap_atomic_idx_pop();
86 pagefault_enable(); 80 pagefault_enable();
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9e732bb2c84a..c5c9ff2d7afc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -26,20 +26,8 @@
26#include <asm/hugetlb.h> 26#include <asm/hugetlb.h>
27#include <asm/pte-walk.h> 27#include <asm/pte-walk.h>
28 28
29
30#ifdef CONFIG_HUGETLB_PAGE
31
32#define PAGE_SHIFT_64K 16
33#define PAGE_SHIFT_512K 19
34#define PAGE_SHIFT_8M 23
35#define PAGE_SHIFT_16M 24
36#define PAGE_SHIFT_16G 34
37
38bool hugetlb_disabled = false; 29bool hugetlb_disabled = false;
39 30
40unsigned int HPAGE_SHIFT;
41EXPORT_SYMBOL(HPAGE_SHIFT);
42
43#define hugepd_none(hpd) (hpd_val(hpd) == 0) 31#define hugepd_none(hpd) (hpd_val(hpd) == 0)
44 32
45#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) 33#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *)))
@@ -98,19 +86,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
98 for (i = 0; i < num_hugepd; i++, hpdp++) { 86 for (i = 0; i < num_hugepd; i++, hpdp++) {
99 if (unlikely(!hugepd_none(*hpdp))) 87 if (unlikely(!hugepd_none(*hpdp)))
100 break; 88 break;
101 else { 89 hugepd_populate(hpdp, new, pshift);
102#ifdef CONFIG_PPC_BOOK3S_64
103 *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS |
104 (shift_to_mmu_psize(pshift) << 2));
105#elif defined(CONFIG_PPC_8xx)
106 *hpdp = __hugepd(__pa(new) | _PMD_USER |
107 (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
108 _PMD_PAGE_512K) | _PMD_PRESENT);
109#else
110 /* We use the old format for PPC_FSL_BOOK3E */
111 *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
112#endif
113 }
114 } 90 }
115 /* If we bailed from the for loop early, an error occurred, clean up */ 91 /* If we bailed from the for loop early, an error occurred, clean up */
116 if (i < num_hugepd) { 92 if (i < num_hugepd) {
@@ -250,7 +226,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h)
250 return __alloc_bootmem_huge_page(h); 226 return __alloc_bootmem_huge_page(h);
251} 227}
252 228
253#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) 229#ifndef CONFIG_PPC_BOOK3S_64
254#define HUGEPD_FREELIST_SIZE \ 230#define HUGEPD_FREELIST_SIZE \
255 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) 231 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
256 232
@@ -542,23 +518,6 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
542 return (__boundary - 1 < end - 1) ? __boundary : end; 518 return (__boundary - 1 < end - 1) ? __boundary : end;
543} 519}
544 520
545int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
546 unsigned long end, int write, struct page **pages, int *nr)
547{
548 pte_t *ptep;
549 unsigned long sz = 1UL << hugepd_shift(hugepd);
550 unsigned long next;
551
552 ptep = hugepte_offset(hugepd, addr, pdshift);
553 do {
554 next = hugepte_addr_end(addr, end, sz);
555 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
556 return 0;
557 } while (ptep++, addr = next, addr != end);
558
559 return 1;
560}
561
562#ifdef CONFIG_PPC_MM_SLICES 521#ifdef CONFIG_PPC_MM_SLICES
563unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 522unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
564 unsigned long len, unsigned long pgoff, 523 unsigned long len, unsigned long pgoff,
@@ -578,24 +537,15 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
578 537
579unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 538unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
580{ 539{
581#ifdef CONFIG_PPC_MM_SLICES
582 /* With radix we don't use slice, so derive it from vma*/ 540 /* With radix we don't use slice, so derive it from vma*/
583 if (!radix_enabled()) { 541 if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
584 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 542 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
585 543
586 return 1UL << mmu_psize_to_shift(psize); 544 return 1UL << mmu_psize_to_shift(psize);
587 } 545 }
588#endif
589 return vma_kernel_pagesize(vma); 546 return vma_kernel_pagesize(vma);
590} 547}
591 548
592static inline bool is_power_of_4(unsigned long x)
593{
594 if (is_power_of_2(x))
595 return (__ilog2(x) % 2) ? false : true;
596 return false;
597}
598
599static int __init add_huge_page_size(unsigned long long size) 549static int __init add_huge_page_size(unsigned long long size)
600{ 550{
601 int shift = __ffs(size); 551 int shift = __ffs(size);
@@ -603,37 +553,13 @@ static int __init add_huge_page_size(unsigned long long size)
603 553
604 /* Check that it is a page size supported by the hardware and 554 /* Check that it is a page size supported by the hardware and
605 * that it fits within pagetable and slice limits. */ 555 * that it fits within pagetable and slice limits. */
606 if (size <= PAGE_SIZE) 556 if (size <= PAGE_SIZE || !is_power_of_2(size))
607 return -EINVAL;
608#if defined(CONFIG_PPC_FSL_BOOK3E)
609 if (!is_power_of_4(size))
610 return -EINVAL; 557 return -EINVAL;
611#elif !defined(CONFIG_PPC_8xx)
612 if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
613 return -EINVAL;
614#endif
615 558
616 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 559 mmu_psize = check_and_get_huge_psize(size);
560 if (mmu_psize < 0)
617 return -EINVAL; 561 return -EINVAL;
618 562
619#ifdef CONFIG_PPC_BOOK3S_64
620 /*
621 * We need to make sure that for different page sizes reported by
622 * firmware we only add hugetlb support for page sizes that can be
623 * supported by linux page table layout.
624 * For now we have
625 * Radix: 2M and 1G
626 * Hash: 16M and 16G
627 */
628 if (radix_enabled()) {
629 if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
630 return -EINVAL;
631 } else {
632 if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
633 return -EINVAL;
634 }
635#endif
636
637 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); 563 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
638 564
639 /* Return if huge page size has already been setup */ 565 /* Return if huge page size has already been setup */
@@ -669,10 +595,10 @@ static int __init hugetlbpage_init(void)
669 return 0; 595 return 0;
670 } 596 }
671 597
672#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) 598 if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
673 if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) 599 !mmu_has_feature(MMU_FTR_16M_PAGE))
674 return -ENODEV; 600 return -ENODEV;
675#endif 601
676 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 602 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
677 unsigned shift; 603 unsigned shift;
678 unsigned pdshift; 604 unsigned pdshift;
@@ -710,29 +636,13 @@ static int __init hugetlbpage_init(void)
710 pgtable_cache_add(PTE_INDEX_SIZE); 636 pgtable_cache_add(PTE_INDEX_SIZE);
711 else if (pdshift > shift) 637 else if (pdshift > shift)
712 pgtable_cache_add(pdshift - shift); 638 pgtable_cache_add(pdshift - shift);
713#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) 639 else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx))
714 else
715 pgtable_cache_add(PTE_T_ORDER); 640 pgtable_cache_add(PTE_T_ORDER);
716#endif
717 } 641 }
718 642
719#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) 643 if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
720 /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ 644 hugetlbpage_init_default();
721 if (mmu_psize_defs[MMU_PAGE_4M].shift) 645
722 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
723 else if (mmu_psize_defs[MMU_PAGE_512K].shift)
724 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift;
725#else
726 /* Set default large page size. Currently, we pick 16M or 1M
727 * depending on what is available
728 */
729 if (mmu_psize_defs[MMU_PAGE_16M].shift)
730 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
731 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
732 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
733 else if (mmu_psize_defs[MMU_PAGE_2M].shift)
734 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
735#endif
736 return 0; 646 return 0;
737} 647}
738 648
@@ -756,113 +666,8 @@ void flush_dcache_icache_hugepage(struct page *page)
756 } 666 }
757} 667}
758 668
759#endif /* CONFIG_HUGETLB_PAGE */ 669static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
760 670 unsigned long end, int write, struct page **pages, int *nr)
761/*
762 * We have 4 cases for pgds and pmds:
763 * (1) invalid (all zeroes)
764 * (2) pointer to next table, as normal; bottom 6 bits == 0
765 * (3) leaf pte for huge page _PAGE_PTE set
766 * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
767 *
768 * So long as we atomically load page table pointers we are safe against teardown,
769 * we can follow the address down to the the page and take a ref on it.
770 * This function need to be called with interrupts disabled. We use this variant
771 * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
772 */
773pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
774 bool *is_thp, unsigned *hpage_shift)
775{
776 pgd_t pgd, *pgdp;
777 pud_t pud, *pudp;
778 pmd_t pmd, *pmdp;
779 pte_t *ret_pte;
780 hugepd_t *hpdp = NULL;
781 unsigned pdshift = PGDIR_SHIFT;
782
783 if (hpage_shift)
784 *hpage_shift = 0;
785
786 if (is_thp)
787 *is_thp = false;
788
789 pgdp = pgdir + pgd_index(ea);
790 pgd = READ_ONCE(*pgdp);
791 /*
792 * Always operate on the local stack value. This make sure the
793 * value don't get updated by a parallel THP split/collapse,
794 * page fault or a page unmap. The return pte_t * is still not
795 * stable. So should be checked there for above conditions.
796 */
797 if (pgd_none(pgd))
798 return NULL;
799 else if (pgd_huge(pgd)) {
800 ret_pte = (pte_t *) pgdp;
801 goto out;
802 } else if (is_hugepd(__hugepd(pgd_val(pgd))))
803 hpdp = (hugepd_t *)&pgd;
804 else {
805 /*
806 * Even if we end up with an unmap, the pgtable will not
807 * be freed, because we do an rcu free and here we are
808 * irq disabled
809 */
810 pdshift = PUD_SHIFT;
811 pudp = pud_offset(&pgd, ea);
812 pud = READ_ONCE(*pudp);
813
814 if (pud_none(pud))
815 return NULL;
816 else if (pud_huge(pud)) {
817 ret_pte = (pte_t *) pudp;
818 goto out;
819 } else if (is_hugepd(__hugepd(pud_val(pud))))
820 hpdp = (hugepd_t *)&pud;
821 else {
822 pdshift = PMD_SHIFT;
823 pmdp = pmd_offset(&pud, ea);
824 pmd = READ_ONCE(*pmdp);
825 /*
826 * A hugepage collapse is captured by pmd_none, because
827 * it mark the pmd none and do a hpte invalidate.
828 */
829 if (pmd_none(pmd))
830 return NULL;
831
832 if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
833 if (is_thp)
834 *is_thp = true;
835 ret_pte = (pte_t *) pmdp;
836 goto out;
837 }
838 /*
839 * pmd_large check below will handle the swap pmd pte
840 * we need to do both the check because they are config
841 * dependent.
842 */
843 if (pmd_huge(pmd) || pmd_large(pmd)) {
844 ret_pte = (pte_t *) pmdp;
845 goto out;
846 } else if (is_hugepd(__hugepd(pmd_val(pmd))))
847 hpdp = (hugepd_t *)&pmd;
848 else
849 return pte_offset_kernel(&pmd, ea);
850 }
851 }
852 if (!hpdp)
853 return NULL;
854
855 ret_pte = hugepte_offset(*hpdp, ea, pdshift);
856 pdshift = hugepd_shift(*hpdp);
857out:
858 if (hpage_shift)
859 *hpage_shift = pdshift;
860 return ret_pte;
861}
862EXPORT_SYMBOL_GPL(__find_linux_pte);
863
864int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
865 unsigned long end, int write, struct page **pages, int *nr)
866{ 671{
867 unsigned long pte_end; 672 unsigned long pte_end;
868 struct page *head, *page; 673 struct page *head, *page;
@@ -908,3 +713,20 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
908 713
909 return 1; 714 return 1;
910} 715}
716
717int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
718 unsigned long end, int write, struct page **pages, int *nr)
719{
720 pte_t *ptep;
721 unsigned long sz = 1UL << hugepd_shift(hugepd);
722 unsigned long next;
723
724 ptep = hugepte_offset(hugepd, addr, pdshift);
725 do {
726 next = hugepte_addr_end(addr, end, sz);
727 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
728 return 0;
729 } while (ptep++, addr = next, addr != end);
730
731 return 1;
732}
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 1e6910eb70ed..3bcae9e5e954 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -24,6 +24,32 @@
24#include <linux/string.h> 24#include <linux/string.h>
25#include <asm/pgalloc.h> 25#include <asm/pgalloc.h>
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
27#include <asm/kup.h>
28
29static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
30static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
31
32static int __init parse_nosmep(char *p)
33{
34 disable_kuep = true;
35 pr_warn("Disabling Kernel Userspace Execution Prevention\n");
36 return 0;
37}
38early_param("nosmep", parse_nosmep);
39
40static int __init parse_nosmap(char *p)
41{
42 disable_kuap = true;
43 pr_warn("Disabling Kernel Userspace Access Protection\n");
44 return 0;
45}
46early_param("nosmap", parse_nosmap);
47
48void __ref setup_kup(void)
49{
50 setup_kuep(disable_kuep);
51 setup_kuap(disable_kuap);
52}
27 53
28#define CTOR(shift) static void ctor_##shift(void *addr) \ 54#define CTOR(shift) static void ctor_##shift(void *addr) \
29{ \ 55{ \
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 41a3513cadc9..c3121b6c8cbd 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -45,8 +45,10 @@
45#include <asm/tlb.h> 45#include <asm/tlb.h>
46#include <asm/sections.h> 46#include <asm/sections.h>
47#include <asm/hugetlb.h> 47#include <asm/hugetlb.h>
48#include <asm/kup.h>
49#include <asm/kasan.h>
48 50
49#include "mmu_decl.h" 51#include <mm/mmu_decl.h>
50 52
51#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) 53#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
52/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */ 54/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */
@@ -178,6 +180,10 @@ void __init MMU_init(void)
178 btext_unmap(); 180 btext_unmap();
179#endif 181#endif
180 182
183 kasan_mmu_init();
184
185 setup_kup();
186
181 /* Shortly after that, the entire linear mapping will be available */ 187 /* Shortly after that, the entire linear mapping will be available */
182 memblock_set_current_limit(lowmem_end_addr); 188 memblock_set_current_limit(lowmem_end_addr);
183} 189}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a4c155af1597..45b02fa11cd8 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -66,7 +66,7 @@
66#include <asm/iommu.h> 66#include <asm/iommu.h>
67#include <asm/vdso.h> 67#include <asm/vdso.h>
68 68
69#include "mmu_decl.h" 69#include <mm/mmu_decl.h>
70 70
71phys_addr_t memstart_addr = ~0; 71phys_addr_t memstart_addr = ~0;
72EXPORT_SYMBOL_GPL(memstart_addr); 72EXPORT_SYMBOL_GPL(memstart_addr);
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
new file mode 100644
index 000000000000..6577897673dd
--- /dev/null
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -0,0 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0
2
3KASAN_SANITIZE := n
4
5obj-$(CONFIG_PPC32) += kasan_init_32.o
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
new file mode 100644
index 000000000000..0d62be3cba47
--- /dev/null
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -0,0 +1,183 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#define DISABLE_BRANCH_PROFILING
4
5#include <linux/kasan.h>
6#include <linux/printk.h>
7#include <linux/memblock.h>
8#include <linux/sched/task.h>
9#include <linux/vmalloc.h>
10#include <asm/pgalloc.h>
11#include <asm/code-patching.h>
12#include <mm/mmu_decl.h>
13
14static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
15{
16 unsigned long va = (unsigned long)kasan_early_shadow_page;
17 phys_addr_t pa = __pa(kasan_early_shadow_page);
18 int i;
19
20 for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
21 __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
22}
23
24static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
25{
26 pmd_t *pmd;
27 unsigned long k_cur, k_next;
28
29 pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
30
31 for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
32 pte_t *new;
33
34 k_next = pgd_addr_end(k_cur, k_end);
35 if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
36 continue;
37
38 new = pte_alloc_one_kernel(&init_mm);
39
40 if (!new)
41 return -ENOMEM;
42 if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
43 kasan_populate_pte(new, PAGE_READONLY);
44 else
45 kasan_populate_pte(new, PAGE_KERNEL_RO);
46 pmd_populate_kernel(&init_mm, pmd, new);
47 }
48 return 0;
49}
50
51static void __ref *kasan_get_one_page(void)
52{
53 if (slab_is_available())
54 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
55
56 return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
57}
58
59static int __ref kasan_init_region(void *start, size_t size)
60{
61 unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
62 unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
63 unsigned long k_cur;
64 int ret;
65 void *block = NULL;
66
67 ret = kasan_init_shadow_page_tables(k_start, k_end);
68 if (ret)
69 return ret;
70
71 if (!slab_is_available())
72 block = memblock_alloc(k_end - k_start, PAGE_SIZE);
73
74 for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
75 pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
76 void *va = block ? block + k_cur - k_start : kasan_get_one_page();
77 pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
78
79 if (!va)
80 return -ENOMEM;
81
82 __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
83 }
84 flush_tlb_kernel_range(k_start, k_end);
85 return 0;
86}
87
88static void __init kasan_remap_early_shadow_ro(void)
89{
90 if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
91 kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
92 else
93 kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
94
95 flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
96}
97
98void __init kasan_mmu_init(void)
99{
100 int ret;
101 struct memblock_region *reg;
102
103 if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
104 ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
105
106 if (ret)
107 panic("kasan: kasan_init_shadow_page_tables() failed");
108 }
109
110 for_each_memblock(memory, reg) {
111 phys_addr_t base = reg->base;
112 phys_addr_t top = min(base + reg->size, total_lowmem);
113
114 if (base >= top)
115 continue;
116
117 ret = kasan_init_region(__va(base), top - base);
118 if (ret)
119 panic("kasan: kasan_init_region() failed");
120 }
121}
122
123void __init kasan_init(void)
124{
125 kasan_remap_early_shadow_ro();
126
127 clear_page(kasan_early_shadow_page);
128
129 /* At this point kasan is fully initialized. Enable error messages */
130 init_task.kasan_depth = 0;
131 pr_info("KASAN init done\n");
132}
133
134#ifdef CONFIG_MODULES
135void *module_alloc(unsigned long size)
136{
137 void *base = vmalloc_exec(size);
138
139 if (!base)
140 return NULL;
141
142 if (!kasan_init_region(base, size))
143 return base;
144
145 vfree(base);
146
147 return NULL;
148}
149#endif
150
151#ifdef CONFIG_PPC_BOOK3S_32
152u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0};
153
154static void __init kasan_early_hash_table(void)
155{
156 modify_instruction_site(&patch__hash_page_A0, 0xffff, __pa(early_hash) >> 16);
157 modify_instruction_site(&patch__flush_hash_A0, 0xffff, __pa(early_hash) >> 16);
158
159 Hash = (struct hash_pte *)early_hash;
160}
161#else
162static void __init kasan_early_hash_table(void) {}
163#endif
164
165void __init kasan_early_init(void)
166{
167 unsigned long addr = KASAN_SHADOW_START;
168 unsigned long end = KASAN_SHADOW_END;
169 unsigned long next;
170 pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr);
171
172 BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);
173
174 kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);
175
176 do {
177 next = pgd_addr_end(addr, end);
178 pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
179 } while (pmd++, addr = next, addr != end);
180
181 if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
182 kasan_early_hash_table();
183}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f6787f90e158..cd525d709072 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -54,7 +54,7 @@
54#include <asm/swiotlb.h> 54#include <asm/swiotlb.h>
55#include <asm/rtas.h> 55#include <asm/rtas.h>
56 56
57#include "mmu_decl.h" 57#include <mm/mmu_decl.h>
58 58
59#ifndef CPU_FTR_COHERENT_ICACHE 59#ifndef CPU_FTR_COHERENT_ICACHE
60#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ 60#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */
@@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
109 return -ENODEV; 109 return -ENODEV;
110} 110}
111 111
112int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 112int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
113 bool want_memblock) 113 bool want_memblock)
114{ 114{
115 unsigned long start_pfn = start >> PAGE_SHIFT; 115 unsigned long start_pfn = start >> PAGE_SHIFT;
116 unsigned long nr_pages = size >> PAGE_SHIFT; 116 unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -131,8 +131,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
131} 131}
132 132
133#ifdef CONFIG_MEMORY_HOTREMOVE 133#ifdef CONFIG_MEMORY_HOTREMOVE
134int __meminit arch_remove_memory(int nid, u64 start, u64 size, 134int __ref arch_remove_memory(int nid, u64 start, u64 size,
135 struct vmem_altmap *altmap) 135 struct vmem_altmap *altmap)
136{ 136{
137 unsigned long start_pfn = start >> PAGE_SHIFT; 137 unsigned long start_pfn = start >> PAGE_SHIFT;
138 unsigned long nr_pages = size >> PAGE_SHIFT; 138 unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -161,7 +161,8 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size,
161 */ 161 */
162 vm_unmap_aliases(); 162 vm_unmap_aliases();
163 163
164 resize_hpt_for_hotplug(memblock_phys_mem_size()); 164 if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
165 pr_warn("Hash collision while resizing HPT\n");
165 166
166 return ret; 167 return ret;
167} 168}
@@ -309,6 +310,10 @@ void __init mem_init(void)
309 mem_init_print_info(NULL); 310 mem_init_print_info(NULL);
310#ifdef CONFIG_PPC32 311#ifdef CONFIG_PPC32
311 pr_info("Kernel virtual memory layout:\n"); 312 pr_info("Kernel virtual memory layout:\n");
313#ifdef CONFIG_KASAN
314 pr_info(" * 0x%08lx..0x%08lx : kasan shadow mem\n",
315 KASAN_SHADOW_START, KASAN_SHADOW_END);
316#endif
312 pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); 317 pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP);
313#ifdef CONFIG_HIGHMEM 318#ifdef CONFIG_HIGHMEM
314 pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", 319 pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index bb52320b7369..6b049d82b98a 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -98,7 +98,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
98 switch_mmu_context(prev, next, tsk); 98 switch_mmu_context(prev, next, tsk);
99} 99}
100 100
101#ifdef CONFIG_PPC32 101#ifndef CONFIG_PPC_BOOK3S_64
102void arch_exit_mmap(struct mm_struct *mm) 102void arch_exit_mmap(struct mm_struct *mm)
103{ 103{
104 void *frag = pte_frag_get(&mm->context); 104 void *frag = pte_frag_get(&mm->context);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 74ff61dabcb1..7bac0aa2026a 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -83,6 +83,8 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
83} 83}
84#endif 84#endif
85 85
86static inline void print_system_hash_info(void) {}
87
86#else /* CONFIG_PPC_MMU_NOHASH */ 88#else /* CONFIG_PPC_MMU_NOHASH */
87 89
88extern void hash_preload(struct mm_struct *mm, unsigned long ea, 90extern void hash_preload(struct mm_struct *mm, unsigned long ea,
@@ -92,6 +94,8 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea,
92extern void _tlbie(unsigned long address); 94extern void _tlbie(unsigned long address);
93extern void _tlbia(void); 95extern void _tlbia(void);
94 96
97void print_system_hash_info(void);
98
95#endif /* CONFIG_PPC_MMU_NOHASH */ 99#endif /* CONFIG_PPC_MMU_NOHASH */
96 100
97#ifdef CONFIG_PPC32 101#ifdef CONFIG_PPC32
@@ -104,8 +108,8 @@ extern int __map_without_bats;
104extern unsigned int rtas_data, rtas_size; 108extern unsigned int rtas_data, rtas_size;
105 109
106struct hash_pte; 110struct hash_pte;
107extern struct hash_pte *Hash, *Hash_end; 111extern struct hash_pte *Hash;
108extern unsigned long Hash_size, Hash_mask; 112extern u8 early_hash[];
109 113
110#endif /* CONFIG_PPC32 */ 114#endif /* CONFIG_PPC32 */
111 115
@@ -130,6 +134,7 @@ extern void wii_memory_fixups(void);
130 */ 134 */
131#ifdef CONFIG_PPC32 135#ifdef CONFIG_PPC32
132extern void MMU_init_hw(void); 136extern void MMU_init_hw(void);
137void MMU_init_hw_patch(void);
133unsigned long mmu_mapin_ram(unsigned long base, unsigned long top); 138unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
134#endif 139#endif
135 140
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/nohash/40x.c
index b9cf6f8764b0..460459b6f53e 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/nohash/40x.c
@@ -49,7 +49,7 @@
49#include <asm/machdep.h> 49#include <asm/machdep.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51 51
52#include "mmu_decl.h" 52#include <mm/mmu_decl.h>
53 53
54extern int __map_without_ltlbs; 54extern int __map_without_ltlbs;
55/* 55/*
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/nohash/44x.c
index aad127acdbaa..c07983ebc02e 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -31,7 +31,7 @@
31#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
32#include <asm/code-patching.h> 32#include <asm/code-patching.h>
33 33
34#include "mmu_decl.h" 34#include <mm/mmu_decl.h>
35 35
36/* Used by the 44x TLB replacement exception handler. 36/* Used by the 44x TLB replacement exception handler.
37 * Just needed it declared someplace. 37 * Just needed it declared someplace.
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/nohash/8xx.c
index fe1f6443d57f..70d55b615b62 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -17,7 +17,7 @@
17#include <asm/fixmap.h> 17#include <asm/fixmap.h>
18#include <asm/code-patching.h> 18#include <asm/code-patching.h>
19 19
20#include "mmu_decl.h" 20#include <mm/mmu_decl.h>
21 21
22#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) 22#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
23 23
@@ -213,3 +213,27 @@ void flush_instruction_cache(void)
213 mtspr(SPRN_IC_CST, IDC_INVALL); 213 mtspr(SPRN_IC_CST, IDC_INVALL);
214 isync(); 214 isync();
215} 215}
216
217#ifdef CONFIG_PPC_KUEP
218void __init setup_kuep(bool disabled)
219{
220 if (disabled)
221 return;
222
223 pr_info("Activating Kernel Userspace Execution Prevention\n");
224
225 mtspr(SPRN_MI_AP, MI_APG_KUEP);
226}
227#endif
228
229#ifdef CONFIG_PPC_KUAP
230void __init setup_kuap(bool disabled)
231{
232 pr_info("Activating Kernel Userspace Access Protection\n");
233
234 if (disabled)
235 pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
236
237 mtspr(SPRN_MD_AP, MD_APG_KUAP);
238}
239#endif
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
new file mode 100644
index 000000000000..33b6f6f29d3f
--- /dev/null
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -0,0 +1,18 @@
1# SPDX-License-Identifier: GPL-2.0
2
3ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
4
5obj-y += mmu_context.o tlb.o tlb_low.o
6obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
7obj-$(CONFIG_40x) += 40x.o
8obj-$(CONFIG_44x) += 44x.o
9obj-$(CONFIG_PPC_8xx) += 8xx.o
10obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o
11ifdef CONFIG_HUGETLB_PAGE
12obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o
13endif
14
15# Disable kcov instrumentation on sensitive code
16# This is necessary for booting with kcov enabled on book3e machines
17KCOV_INSTRUMENT_tlb.o := n
18KCOV_INSTRUMENT_fsl_booke.o := n
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
index f84ec46cdb26..61915f4d3c7f 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
@@ -11,8 +11,9 @@
11 11
12#include <asm/mmu.h> 12#include <asm/mmu.h>
13 13
14#ifdef CONFIG_PPC_FSL_BOOK3E
15#ifdef CONFIG_PPC64 14#ifdef CONFIG_PPC64
15#include <asm/paca.h>
16
16static inline int tlb1_next(void) 17static inline int tlb1_next(void)
17{ 18{
18 struct paca_struct *paca = get_paca(); 19 struct paca_struct *paca = get_paca();
@@ -29,33 +30,6 @@ static inline int tlb1_next(void)
29 tcd->esel_next = next; 30 tcd->esel_next = next;
30 return this; 31 return this;
31} 32}
32#else
33static inline int tlb1_next(void)
34{
35 int index, ncams;
36
37 ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
38
39 index = this_cpu_read(next_tlbcam_idx);
40
41 /* Just round-robin the entries and wrap when we hit the end */
42 if (unlikely(index == ncams - 1))
43 __this_cpu_write(next_tlbcam_idx, tlbcam_index);
44 else
45 __this_cpu_inc(next_tlbcam_idx);
46
47 return index;
48}
49#endif /* !PPC64 */
50#endif /* FSL */
51
52static inline int mmu_get_tsize(int psize)
53{
54 return mmu_psize_defs[psize].enc;
55}
56
57#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64)
58#include <asm/paca.h>
59 33
60static inline void book3e_tlb_lock(void) 34static inline void book3e_tlb_lock(void)
61{ 35{
@@ -98,6 +72,23 @@ static inline void book3e_tlb_unlock(void)
98 paca->tcd_ptr->lock = 0; 72 paca->tcd_ptr->lock = 0;
99} 73}
100#else 74#else
75static inline int tlb1_next(void)
76{
77 int index, ncams;
78
79 ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
80
81 index = this_cpu_read(next_tlbcam_idx);
82
83 /* Just round-robin the entries and wrap when we hit the end */
84 if (unlikely(index == ncams - 1))
85 __this_cpu_write(next_tlbcam_idx, tlbcam_index);
86 else
87 __this_cpu_inc(next_tlbcam_idx);
88
89 return index;
90}
91
101static inline void book3e_tlb_lock(void) 92static inline void book3e_tlb_lock(void)
102{ 93{
103} 94}
@@ -139,10 +130,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
139 unsigned long psize, tsize, shift; 130 unsigned long psize, tsize, shift;
140 unsigned long flags; 131 unsigned long flags;
141 struct mm_struct *mm; 132 struct mm_struct *mm;
142
143#ifdef CONFIG_PPC_FSL_BOOK3E
144 int index; 133 int index;
145#endif
146 134
147 if (unlikely(is_kernel_addr(ea))) 135 if (unlikely(is_kernel_addr(ea)))
148 return; 136 return;
@@ -166,11 +154,9 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
166 return; 154 return;
167 } 155 }
168 156
169#ifdef CONFIG_PPC_FSL_BOOK3E
170 /* We have to use the CAM(TLB1) on FSL parts for hugepages */ 157 /* We have to use the CAM(TLB1) on FSL parts for hugepages */
171 index = tlb1_next(); 158 index = tlb1_next();
172 mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); 159 mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
173#endif
174 160
175 mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); 161 mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
176 mas2 = ea & ~((1UL << shift) - 1); 162 mas2 = ea & ~((1UL << shift) - 1);
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 1032ef7aaf62..75e9e2c35fe2 100644
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -15,7 +15,7 @@
15#include <asm/tlb.h> 15#include <asm/tlb.h>
16#include <asm/dma.h> 16#include <asm/dma.h>
17 17
18#include "mmu_decl.h" 18#include <mm/mmu_decl.h>
19 19
20#ifdef CONFIG_SPARSEMEM_VMEMMAP 20#ifdef CONFIG_SPARSEMEM_VMEMMAP
21/* 21/*
@@ -55,7 +55,7 @@ void vmemmap_remove_mapping(unsigned long start,
55#endif 55#endif
56#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 56#endif /* CONFIG_SPARSEMEM_VMEMMAP */
57 57
58static __ref void *early_alloc_pgtable(unsigned long size) 58static void __init *early_alloc_pgtable(unsigned long size)
59{ 59{
60 void *ptr; 60 void *ptr;
61 61
@@ -74,7 +74,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
74 * map_kernel_page adds an entry to the ioremap page table 74 * map_kernel_page adds an entry to the ioremap page table
75 * and adds an entry to the HPT, possibly bolting it 75 * and adds an entry to the HPT, possibly bolting it
76 */ 76 */
77int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) 77int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
78{ 78{
79 pgd_t *pgdp; 79 pgd_t *pgdp;
80 pud_t *pudp; 80 pud_t *pudp;
@@ -98,20 +98,17 @@ int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
98#ifndef __PAGETABLE_PUD_FOLDED 98#ifndef __PAGETABLE_PUD_FOLDED
99 if (pgd_none(*pgdp)) { 99 if (pgd_none(*pgdp)) {
100 pudp = early_alloc_pgtable(PUD_TABLE_SIZE); 100 pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
101 BUG_ON(pudp == NULL);
102 pgd_populate(&init_mm, pgdp, pudp); 101 pgd_populate(&init_mm, pgdp, pudp);
103 } 102 }
104#endif /* !__PAGETABLE_PUD_FOLDED */ 103#endif /* !__PAGETABLE_PUD_FOLDED */
105 pudp = pud_offset(pgdp, ea); 104 pudp = pud_offset(pgdp, ea);
106 if (pud_none(*pudp)) { 105 if (pud_none(*pudp)) {
107 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); 106 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
108 BUG_ON(pmdp == NULL);
109 pud_populate(&init_mm, pudp, pmdp); 107 pud_populate(&init_mm, pudp, pmdp);
110 } 108 }
111 pmdp = pmd_offset(pudp, ea); 109 pmdp = pmd_offset(pudp, ea);
112 if (!pmd_present(*pmdp)) { 110 if (!pmd_present(*pmdp)) {
113 ptep = early_alloc_pgtable(PAGE_SIZE); 111 ptep = early_alloc_pgtable(PAGE_SIZE);
114 BUG_ON(ptep == NULL);
115 pmd_populate_kernel(&init_mm, pmdp, ptep); 112 pmd_populate_kernel(&init_mm, pmdp, ptep);
116 } 113 }
117 ptep = pte_offset_kernel(pmdp, ea); 114 ptep = pte_offset_kernel(pmdp, ea);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 210cbc1faf63..71a1a36751dd 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -54,7 +54,7 @@
54#include <asm/setup.h> 54#include <asm/setup.h>
55#include <asm/paca.h> 55#include <asm/paca.h>
56 56
57#include "mmu_decl.h" 57#include <mm/mmu_decl.h>
58 58
59unsigned int tlbcam_index; 59unsigned int tlbcam_index;
60 60
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/nohash/mmu_context.c
index 1945c5f19f5e..ae4505d5b4b8 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -52,7 +52,7 @@
52#include <asm/mmu_context.h> 52#include <asm/mmu_context.h>
53#include <asm/tlbflush.h> 53#include <asm/tlbflush.h>
54 54
55#include "mmu_decl.h" 55#include <mm/mmu_decl.h>
56 56
57/* 57/*
58 * The MPC8xx has only 16 contexts. We rotate through them on each task switch. 58 * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/nohash/tlb.c
index ac23dc1c6535..24f88efb05bf 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -46,7 +46,7 @@
46#include <asm/hugetlb.h> 46#include <asm/hugetlb.h>
47#include <asm/paca.h> 47#include <asm/paca.h>
48 48
49#include "mmu_decl.h" 49#include <mm/mmu_decl.h>
50 50
51/* 51/*
52 * This struct lists the sw-supported page sizes. The hardawre MMU may support 52 * This struct lists the sw-supported page sizes. The hardawre MMU may support
@@ -433,11 +433,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
433 unsigned long rid = (address & rmask) | 0x1000000000000000ul; 433 unsigned long rid = (address & rmask) | 0x1000000000000000ul;
434 unsigned long vpte = address & ~rmask; 434 unsigned long vpte = address & ~rmask;
435 435
436#ifdef CONFIG_PPC_64K_PAGES
437 vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
438#else
439 vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; 436 vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
440#endif
441 vpte |= rid; 437 vpte |= rid;
442 __flush_tlb_page(tlb->mm, vpte, tsize, 0); 438 __flush_tlb_page(tlb->mm, vpte, tsize, 0);
443 } 439 }
@@ -625,21 +621,12 @@ static void early_init_this_mmu(void)
625 621
626 case PPC_HTW_IBM: 622 case PPC_HTW_IBM:
627 mas4 |= MAS4_INDD; 623 mas4 |= MAS4_INDD;
628#ifdef CONFIG_PPC_64K_PAGES
629 mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
630 mmu_pte_psize = MMU_PAGE_256M;
631#else
632 mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; 624 mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
633 mmu_pte_psize = MMU_PAGE_1M; 625 mmu_pte_psize = MMU_PAGE_1M;
634#endif
635 break; 626 break;
636 627
637 case PPC_HTW_NONE: 628 case PPC_HTW_NONE:
638#ifdef CONFIG_PPC_64K_PAGES
639 mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
640#else
641 mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; 629 mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
642#endif
643 mmu_pte_psize = mmu_virtual_psize; 630 mmu_pte_psize = mmu_virtual_psize;
644 break; 631 break;
645 } 632 }
@@ -800,5 +787,9 @@ void __init early_init_mmu(void)
800#ifdef CONFIG_PPC_47x 787#ifdef CONFIG_PPC_47x
801 early_init_mmu_47x(); 788 early_init_mmu_47x();
802#endif 789#endif
790
791#ifdef CONFIG_PPC_MM_SLICES
792 mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
793#endif
803} 794}
804#endif /* CONFIG_PPC64 */ 795#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index e066a658acac..e066a658acac 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 9ed90064f542..58959ce15415 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -24,11 +24,7 @@
24#include <asm/kvm_booke_hv_asm.h> 24#include <asm/kvm_booke_hv_asm.h>
25#include <asm/feature-fixups.h> 25#include <asm/feature-fixups.h>
26 26
27#ifdef CONFIG_PPC_64K_PAGES
28#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
29#else
30#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE) 27#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE)
31#endif
32#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE) 28#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
33#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) 29#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
34#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) 30#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
@@ -167,13 +163,11 @@ MMU_FTR_SECTION_ELSE
167 ldx r14,r14,r15 /* grab pgd entry */ 163 ldx r14,r14,r15 /* grab pgd entry */
168ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) 164ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
169 165
170#ifndef CONFIG_PPC_64K_PAGES
171 rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 166 rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
172 clrrdi r15,r15,3 167 clrrdi r15,r15,3
173 cmpdi cr0,r14,0 168 cmpdi cr0,r14,0
174 bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */ 169 bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */
175 ldx r14,r14,r15 /* grab pud entry */ 170 ldx r14,r14,r15 /* grab pud entry */
176#endif /* CONFIG_PPC_64K_PAGES */
177 171
178 rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 172 rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
179 clrrdi r15,r15,3 173 clrrdi r15,r15,3
@@ -682,18 +676,7 @@ normal_tlb_miss:
682 * order to handle the weird page table format used by linux 676 * order to handle the weird page table format used by linux
683 */ 677 */
684 ori r10,r15,0x1 678 ori r10,r15,0x1
685#ifdef CONFIG_PPC_64K_PAGES
686 /* For the top bits, 16 bytes per PTE */
687 rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
688 /* Now create the bottom bits as 0 in position 0x8000 and
689 * the rest calculated for 8 bytes per PTE
690 */
691 rldicl r15,r16,64-(PAGE_SHIFT-3),64-15
692 /* Insert the bottom bits in */
693 rlwimi r14,r15,0,16,31
694#else
695 rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 679 rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
696#endif
697 sldi r15,r10,60 680 sldi r15,r10,60
698 clrrdi r14,r14,3 681 clrrdi r14,r14,3
699 or r10,r15,r14 682 or r10,r15,r14
@@ -732,11 +715,7 @@ finish_normal_tlb_miss:
732 715
733 /* Check page size, if not standard, update MAS1 */ 716 /* Check page size, if not standard, update MAS1 */
734 rldicl r11,r14,64-8,64-8 717 rldicl r11,r14,64-8,64-8
735#ifdef CONFIG_PPC_64K_PAGES
736 cmpldi cr0,r11,BOOK3E_PAGESZ_64K
737#else
738 cmpldi cr0,r11,BOOK3E_PAGESZ_4K 718 cmpldi cr0,r11,BOOK3E_PAGESZ_4K
739#endif
740 beq- 1f 719 beq- 1f
741 mfspr r11,SPRN_MAS1 720 mfspr r11,SPRN_MAS1
742 rlwimi r11,r14,31,21,24 721 rlwimi r11,r14,31,21,24
@@ -857,14 +836,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
857 cmpdi cr0,r15,0 836 cmpdi cr0,r15,0
858 bge virt_page_table_tlb_miss_fault 837 bge virt_page_table_tlb_miss_fault
859 838
860#ifndef CONFIG_PPC_64K_PAGES
861 /* Get to PUD entry */ 839 /* Get to PUD entry */
862 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 840 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
863 clrrdi r10,r11,3 841 clrrdi r10,r11,3
864 ldx r15,r10,r15 842 ldx r15,r10,r15
865 cmpdi cr0,r15,0 843 cmpdi cr0,r15,0
866 bge virt_page_table_tlb_miss_fault 844 bge virt_page_table_tlb_miss_fault
867#endif /* CONFIG_PPC_64K_PAGES */
868 845
869 /* Get to PMD entry */ 846 /* Get to PMD entry */
870 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 847 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
@@ -1106,14 +1083,12 @@ htw_tlb_miss:
1106 cmpdi cr0,r15,0 1083 cmpdi cr0,r15,0
1107 bge htw_tlb_miss_fault 1084 bge htw_tlb_miss_fault
1108 1085
1109#ifndef CONFIG_PPC_64K_PAGES
1110 /* Get to PUD entry */ 1086 /* Get to PUD entry */
1111 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 1087 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
1112 clrrdi r10,r11,3 1088 clrrdi r10,r11,3
1113 ldx r15,r10,r15 1089 ldx r15,r10,r15
1114 cmpdi cr0,r15,0 1090 cmpdi cr0,r15,0
1115 bge htw_tlb_miss_fault 1091 bge htw_tlb_miss_fault
1116#endif /* CONFIG_PPC_64K_PAGES */
1117 1092
1118 /* Get to PMD entry */ 1093 /* Get to PMD entry */
1119 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 1094 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
@@ -1132,9 +1107,7 @@ htw_tlb_miss:
1132 * 4K page we need to extract a bit from the virtual address and 1107 * 4K page we need to extract a bit from the virtual address and
1133 * insert it into the "PA52" bit of the RPN. 1108 * insert it into the "PA52" bit of the RPN.
1134 */ 1109 */
1135#ifndef CONFIG_PPC_64K_PAGES
1136 rlwimi r15,r16,32-9,20,20 1110 rlwimi r15,r16,32-9,20,20
1137#endif
1138 /* Now we build the MAS: 1111 /* Now we build the MAS:
1139 * 1112 *
1140 * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG 1113 * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
@@ -1144,11 +1117,7 @@ htw_tlb_miss:
1144 * MAS 2 : Use defaults 1117 * MAS 2 : Use defaults
1145 * MAS 3+7 : Needs to be done 1118 * MAS 3+7 : Needs to be done
1146 */ 1119 */
1147#ifdef CONFIG_PPC_64K_PAGES
1148 ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT)
1149#else
1150 ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) 1120 ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
1151#endif
1152 1121
1153BEGIN_MMU_FTR_SECTION 1122BEGIN_MMU_FTR_SECTION
1154 srdi r16,r10,32 1123 srdi r16,r10,32
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f976676004ad..57e64273cb33 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -32,7 +32,6 @@
32#include <asm/sparsemem.h> 32#include <asm/sparsemem.h>
33#include <asm/prom.h> 33#include <asm/prom.h>
34#include <asm/smp.h> 34#include <asm/smp.h>
35#include <asm/cputhreads.h>
36#include <asm/topology.h> 35#include <asm/topology.h>
37#include <asm/firmware.h> 36#include <asm/firmware.h>
38#include <asm/paca.h> 37#include <asm/paca.h>
@@ -908,16 +907,22 @@ static int __init early_numa(char *p)
908} 907}
909early_param("numa", early_numa); 908early_param("numa", early_numa);
910 909
911static bool topology_updates_enabled = true; 910/*
911 * The platform can inform us through one of several mechanisms
912 * (post-migration device tree updates, PRRN or VPHN) that the NUMA
913 * assignment of a resource has changed. This controls whether we act
914 * on that. Disabled by default.
915 */
916static bool topology_updates_enabled;
912 917
913static int __init early_topology_updates(char *p) 918static int __init early_topology_updates(char *p)
914{ 919{
915 if (!p) 920 if (!p)
916 return 0; 921 return 0;
917 922
918 if (!strcmp(p, "off")) { 923 if (!strcmp(p, "on")) {
919 pr_info("Disabling topology updates\n"); 924 pr_warn("Caution: enabling topology updates\n");
920 topology_updates_enabled = false; 925 topology_updates_enabled = true;
921 } 926 }
922 927
923 return 0; 928 return 0;
@@ -1063,7 +1068,7 @@ u64 memory_hotplug_max(void)
1063/* Virtual Processor Home Node (VPHN) support */ 1068/* Virtual Processor Home Node (VPHN) support */
1064#ifdef CONFIG_PPC_SPLPAR 1069#ifdef CONFIG_PPC_SPLPAR
1065 1070
1066#include "vphn.h" 1071#include "book3s64/vphn.h"
1067 1072
1068struct topology_update_data { 1073struct topology_update_data {
1069 struct topology_update_data *next; 1074 struct topology_update_data *next;
@@ -1498,6 +1503,9 @@ int start_topology_update(void)
1498{ 1503{
1499 int rc = 0; 1504 int rc = 0;
1500 1505
1506 if (!topology_updates_enabled)
1507 return 0;
1508
1501 if (firmware_has_feature(FW_FEATURE_PRRN)) { 1509 if (firmware_has_feature(FW_FEATURE_PRRN)) {
1502 if (!prrn_enabled) { 1510 if (!prrn_enabled) {
1503 prrn_enabled = 1; 1511 prrn_enabled = 1;
@@ -1531,6 +1539,9 @@ int stop_topology_update(void)
1531{ 1539{
1532 int rc = 0; 1540 int rc = 0;
1533 1541
1542 if (!topology_updates_enabled)
1543 return 0;
1544
1534 if (prrn_enabled) { 1545 if (prrn_enabled) {
1535 prrn_enabled = 0; 1546 prrn_enabled = 0;
1536#ifdef CONFIG_SMP 1547#ifdef CONFIG_SMP
@@ -1588,11 +1599,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
1588 1599
1589 kbuf[read_len] = '\0'; 1600 kbuf[read_len] = '\0';
1590 1601
1591 if (!strncmp(kbuf, "on", 2)) 1602 if (!strncmp(kbuf, "on", 2)) {
1603 topology_updates_enabled = true;
1592 start_topology_update(); 1604 start_topology_update();
1593 else if (!strncmp(kbuf, "off", 3)) 1605 } else if (!strncmp(kbuf, "off", 3)) {
1594 stop_topology_update(); 1606 stop_topology_update();
1595 else 1607 topology_updates_enabled = false;
1608 } else
1596 return -EINVAL; 1609 return -EINVAL;
1597 1610
1598 return count; 1611 return count;
@@ -1607,9 +1620,7 @@ static const struct file_operations topology_ops = {
1607 1620
1608static int topology_update_init(void) 1621static int topology_update_init(void)
1609{ 1622{
1610 /* Do not poll for changes if disabled at boot */ 1623 start_topology_update();
1611 if (topology_updates_enabled)
1612 start_topology_update();
1613 1624
1614 if (vphn_enabled) 1625 if (vphn_enabled)
1615 topology_schedule_update(); 1626 topology_schedule_update();
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index d3d61d29b4f1..db4a6253df92 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -30,6 +30,7 @@
30#include <asm/pgalloc.h> 30#include <asm/pgalloc.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/tlb.h> 32#include <asm/tlb.h>
33#include <asm/hugetlb.h>
33 34
34static inline int is_exec_fault(void) 35static inline int is_exec_fault(void)
35{ 36{
@@ -299,3 +300,116 @@ unsigned long vmalloc_to_phys(void *va)
299 return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); 300 return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
300} 301}
301EXPORT_SYMBOL_GPL(vmalloc_to_phys); 302EXPORT_SYMBOL_GPL(vmalloc_to_phys);
303
304/*
305 * We have 4 cases for pgds and pmds:
306 * (1) invalid (all zeroes)
307 * (2) pointer to next table, as normal; bottom 6 bits == 0
308 * (3) leaf pte for huge page _PAGE_PTE set
309 * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
310 *
311 * So long as we atomically load page table pointers we are safe against teardown,
312 * we can follow the address down to the the page and take a ref on it.
313 * This function need to be called with interrupts disabled. We use this variant
314 * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
315 */
316pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
317 bool *is_thp, unsigned *hpage_shift)
318{
319 pgd_t pgd, *pgdp;
320 pud_t pud, *pudp;
321 pmd_t pmd, *pmdp;
322 pte_t *ret_pte;
323 hugepd_t *hpdp = NULL;
324 unsigned pdshift = PGDIR_SHIFT;
325
326 if (hpage_shift)
327 *hpage_shift = 0;
328
329 if (is_thp)
330 *is_thp = false;
331
332 pgdp = pgdir + pgd_index(ea);
333 pgd = READ_ONCE(*pgdp);
334 /*
335 * Always operate on the local stack value. This make sure the
336 * value don't get updated by a parallel THP split/collapse,
337 * page fault or a page unmap. The return pte_t * is still not
338 * stable. So should be checked there for above conditions.
339 */
340 if (pgd_none(pgd))
341 return NULL;
342
343 if (pgd_huge(pgd)) {
344 ret_pte = (pte_t *)pgdp;
345 goto out;
346 }
347 if (is_hugepd(__hugepd(pgd_val(pgd)))) {
348 hpdp = (hugepd_t *)&pgd;
349 goto out_huge;
350 }
351
352 /*
353 * Even if we end up with an unmap, the pgtable will not
354 * be freed, because we do an rcu free and here we are
355 * irq disabled
356 */
357 pdshift = PUD_SHIFT;
358 pudp = pud_offset(&pgd, ea);
359 pud = READ_ONCE(*pudp);
360
361 if (pud_none(pud))
362 return NULL;
363
364 if (pud_huge(pud)) {
365 ret_pte = (pte_t *)pudp;
366 goto out;
367 }
368 if (is_hugepd(__hugepd(pud_val(pud)))) {
369 hpdp = (hugepd_t *)&pud;
370 goto out_huge;
371 }
372 pdshift = PMD_SHIFT;
373 pmdp = pmd_offset(&pud, ea);
374 pmd = READ_ONCE(*pmdp);
375 /*
376 * A hugepage collapse is captured by pmd_none, because
377 * it mark the pmd none and do a hpte invalidate.
378 */
379 if (pmd_none(pmd))
380 return NULL;
381
382 if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
383 if (is_thp)
384 *is_thp = true;
385 ret_pte = (pte_t *)pmdp;
386 goto out;
387 }
388 /*
389 * pmd_large check below will handle the swap pmd pte
390 * we need to do both the check because they are config
391 * dependent.
392 */
393 if (pmd_huge(pmd) || pmd_large(pmd)) {
394 ret_pte = (pte_t *)pmdp;
395 goto out;
396 }
397 if (is_hugepd(__hugepd(pmd_val(pmd)))) {
398 hpdp = (hugepd_t *)&pmd;
399 goto out_huge;
400 }
401
402 return pte_offset_kernel(&pmd, ea);
403
404out_huge:
405 if (!hpdp)
406 return NULL;
407
408 ret_pte = hugepte_offset(*hpdp, ea, pdshift);
409 pdshift = hugepd_shift(*hpdp);
410out:
411 if (hpage_shift)
412 *hpage_shift = pdshift;
413 return ret_pte;
414}
415EXPORT_SYMBOL_GPL(__find_linux_pte);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 6e56a6240bfa..16ada373b32b 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -36,26 +36,13 @@
36#include <asm/setup.h> 36#include <asm/setup.h>
37#include <asm/sections.h> 37#include <asm/sections.h>
38 38
39#include "mmu_decl.h" 39#include <mm/mmu_decl.h>
40 40
41unsigned long ioremap_bot; 41unsigned long ioremap_bot;
42EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ 42EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
43 43
44extern char etext[], _stext[], _sinittext[], _einittext[]; 44extern char etext[], _stext[], _sinittext[], _einittext[];
45 45
46__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
47{
48 if (!slab_is_available())
49 return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
50
51 return (pte_t *)pte_fragment_alloc(mm, 1);
52}
53
54pgtable_t pte_alloc_one(struct mm_struct *mm)
55{
56 return (pgtable_t)pte_fragment_alloc(mm, 0);
57}
58
59void __iomem * 46void __iomem *
60ioremap(phys_addr_t addr, unsigned long size) 47ioremap(phys_addr_t addr, unsigned long size)
61{ 48{
@@ -205,7 +192,29 @@ void iounmap(volatile void __iomem *addr)
205} 192}
206EXPORT_SYMBOL(iounmap); 193EXPORT_SYMBOL(iounmap);
207 194
208int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) 195static void __init *early_alloc_pgtable(unsigned long size)
196{
197 void *ptr = memblock_alloc(size, size);
198
199 if (!ptr)
200 panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
201 __func__, size, size);
202
203 return ptr;
204}
205
206static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
207{
208 if (pmd_none(*pmdp)) {
209 pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
210
211 pmd_populate_kernel(&init_mm, pmdp, ptep);
212 }
213 return pte_offset_kernel(pmdp, va);
214}
215
216
217int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
209{ 218{
210 pmd_t *pd; 219 pmd_t *pd;
211 pte_t *pg; 220 pte_t *pg;
@@ -214,7 +223,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
214 /* Use upper 10 bits of VA to index the first level map */ 223 /* Use upper 10 bits of VA to index the first level map */
215 pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); 224 pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
216 /* Use middle 10 bits of VA to index the second-level map */ 225 /* Use middle 10 bits of VA to index the second-level map */
217 pg = pte_alloc_kernel(pd, va); 226 if (likely(slab_is_available()))
227 pg = pte_alloc_kernel(pd, va);
228 else
229 pg = early_pte_alloc_kernel(pd, va);
218 if (pg != 0) { 230 if (pg != 0) {
219 err = 0; 231 err = 0;
220 /* The PTE should never be already set nor present in the 232 /* The PTE should never be already set nor present in the
@@ -384,6 +396,9 @@ void mark_rodata_ro(void)
384 PFN_DOWN((unsigned long)__start_rodata); 396 PFN_DOWN((unsigned long)__start_rodata);
385 397
386 change_page_attr(page, numpages, PAGE_KERNEL_RO); 398 change_page_attr(page, numpages, PAGE_KERNEL_RO);
399
400 // mark_initmem_nx() should have already run by now
401 ptdump_check_wx();
387} 402}
388#endif 403#endif
389 404
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index fb1375c07e8c..d2d976ff8a0e 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -52,7 +52,7 @@
52#include <asm/firmware.h> 52#include <asm/firmware.h>
53#include <asm/dma.h> 53#include <asm/dma.h>
54 54
55#include "mmu_decl.h" 55#include <mm/mmu_decl.h>
56 56
57 57
58#ifdef CONFIG_PPC_BOOK3S_64 58#ifdef CONFIG_PPC_BOOK3S_64
@@ -90,14 +90,13 @@ unsigned long __pgd_val_bits;
90EXPORT_SYMBOL(__pgd_val_bits); 90EXPORT_SYMBOL(__pgd_val_bits);
91unsigned long __kernel_virt_start; 91unsigned long __kernel_virt_start;
92EXPORT_SYMBOL(__kernel_virt_start); 92EXPORT_SYMBOL(__kernel_virt_start);
93unsigned long __kernel_virt_size;
94EXPORT_SYMBOL(__kernel_virt_size);
95unsigned long __vmalloc_start; 93unsigned long __vmalloc_start;
96EXPORT_SYMBOL(__vmalloc_start); 94EXPORT_SYMBOL(__vmalloc_start);
97unsigned long __vmalloc_end; 95unsigned long __vmalloc_end;
98EXPORT_SYMBOL(__vmalloc_end); 96EXPORT_SYMBOL(__vmalloc_end);
99unsigned long __kernel_io_start; 97unsigned long __kernel_io_start;
100EXPORT_SYMBOL(__kernel_io_start); 98EXPORT_SYMBOL(__kernel_io_start);
99unsigned long __kernel_io_end;
101struct page *vmemmap; 100struct page *vmemmap;
102EXPORT_SYMBOL(vmemmap); 101EXPORT_SYMBOL(vmemmap);
103unsigned long __pte_frag_nr; 102unsigned long __pte_frag_nr;
@@ -121,6 +120,11 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
121 if (pgprot_val(prot) & H_PAGE_4K_PFN) 120 if (pgprot_val(prot) & H_PAGE_4K_PFN)
122 return NULL; 121 return NULL;
123 122
123 if ((ea + size) >= (void *)IOREMAP_END) {
124 pr_warn("Outside the supported range\n");
125 return NULL;
126 }
127
124 WARN_ON(pa & ~PAGE_MASK); 128 WARN_ON(pa & ~PAGE_MASK);
125 WARN_ON(((unsigned long)ea) & ~PAGE_MASK); 129 WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
126 WARN_ON(size & ~PAGE_MASK); 130 WARN_ON(size & ~PAGE_MASK);
@@ -328,6 +332,9 @@ void mark_rodata_ro(void)
328 radix__mark_rodata_ro(); 332 radix__mark_rodata_ro();
329 else 333 else
330 hash__mark_rodata_ro(); 334 hash__mark_rodata_ro();
335
336 // mark_initmem_nx() should have already run by now
337 ptdump_check_wx();
331} 338}
332 339
333void mark_initmem_nx(void) 340void mark_initmem_nx(void)
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index b430e4e08af6..b9bda0105841 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
500 address_markers[7].start_address = IOREMAP_BASE; 500 address_markers[7].start_address = IOREMAP_BASE;
501 address_markers[8].start_address = IOREMAP_END; 501 address_markers[8].start_address = IOREMAP_END;
502#ifdef CONFIG_PPC_BOOK3S_64 502#ifdef CONFIG_PPC_BOOK3S_64
503 address_markers[9].start_address = H_VMEMMAP_BASE; 503 address_markers[9].start_address = H_VMEMMAP_START;
504#else 504#else
505 address_markers[9].start_address = VMEMMAP_BASE; 505 address_markers[9].start_address = VMEMMAP_BASE;
506#endif 506#endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 37138428ab55..646876d9da64 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -31,7 +31,7 @@
31#include "ptdump.h" 31#include "ptdump.h"
32 32
33#ifdef CONFIG_PPC32 33#ifdef CONFIG_PPC32
34#define KERN_VIRT_START 0 34#define KERN_VIRT_START PAGE_OFFSET
35#endif 35#endif
36 36
37/* 37/*
@@ -68,6 +68,8 @@ struct pg_state {
68 unsigned long last_pa; 68 unsigned long last_pa;
69 unsigned int level; 69 unsigned int level;
70 u64 current_flags; 70 u64 current_flags;
71 bool check_wx;
72 unsigned long wx_pages;
71}; 73};
72 74
73struct addr_marker { 75struct addr_marker {
@@ -101,9 +103,25 @@ static struct addr_marker address_markers[] = {
101 { 0, "Fixmap start" }, 103 { 0, "Fixmap start" },
102 { 0, "Fixmap end" }, 104 { 0, "Fixmap end" },
103#endif 105#endif
106#ifdef CONFIG_KASAN
107 { 0, "kasan shadow mem start" },
108 { 0, "kasan shadow mem end" },
109#endif
104 { -1, NULL }, 110 { -1, NULL },
105}; 111};
106 112
113#define pt_dump_seq_printf(m, fmt, args...) \
114({ \
115 if (m) \
116 seq_printf(m, fmt, ##args); \
117})
118
119#define pt_dump_seq_putc(m, c) \
120({ \
121 if (m) \
122 seq_putc(m, c); \
123})
124
107static void dump_flag_info(struct pg_state *st, const struct flag_info 125static void dump_flag_info(struct pg_state *st, const struct flag_info
108 *flag, u64 pte, int num) 126 *flag, u64 pte, int num)
109{ 127{
@@ -121,19 +139,19 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info
121 val = pte & flag->val; 139 val = pte & flag->val;
122 if (flag->shift) 140 if (flag->shift)
123 val = val >> flag->shift; 141 val = val >> flag->shift;
124 seq_printf(st->seq, " %s:%llx", flag->set, val); 142 pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val);
125 } else { 143 } else {
126 if ((pte & flag->mask) == flag->val) 144 if ((pte & flag->mask) == flag->val)
127 s = flag->set; 145 s = flag->set;
128 else 146 else
129 s = flag->clear; 147 s = flag->clear;
130 if (s) 148 if (s)
131 seq_printf(st->seq, " %s", s); 149 pt_dump_seq_printf(st->seq, " %s", s);
132 } 150 }
133 st->current_flags &= ~flag->mask; 151 st->current_flags &= ~flag->mask;
134 } 152 }
135 if (st->current_flags != 0) 153 if (st->current_flags != 0)
136 seq_printf(st->seq, " unknown flags:%llx", st->current_flags); 154 pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
137} 155}
138 156
139static void dump_addr(struct pg_state *st, unsigned long addr) 157static void dump_addr(struct pg_state *st, unsigned long addr)
@@ -148,12 +166,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
148#define REG "0x%08lx" 166#define REG "0x%08lx"
149#endif 167#endif
150 168
151 seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); 169 pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
152 if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { 170 if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) {
153 seq_printf(st->seq, "[" REG "]", st->start_pa); 171 pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa);
154 delta = PAGE_SIZE >> 10; 172 delta = PAGE_SIZE >> 10;
155 } else { 173 } else {
156 seq_printf(st->seq, " " REG " ", st->start_pa); 174 pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
157 delta = (addr - st->start_address) >> 10; 175 delta = (addr - st->start_address) >> 10;
158 } 176 }
159 /* Work out what appropriate unit to use */ 177 /* Work out what appropriate unit to use */
@@ -161,10 +179,24 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
161 delta >>= 10; 179 delta >>= 10;
162 unit++; 180 unit++;
163 } 181 }
164 seq_printf(st->seq, "%9lu%c", delta, *unit); 182 pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit);
165 183
166} 184}
167 185
186static void note_prot_wx(struct pg_state *st, unsigned long addr)
187{
188 if (!st->check_wx)
189 return;
190
191 if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X)))
192 return;
193
194 WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
195 (void *)st->start_address, (void *)st->start_address);
196
197 st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
198}
199
168static void note_page(struct pg_state *st, unsigned long addr, 200static void note_page(struct pg_state *st, unsigned long addr,
169 unsigned int level, u64 val) 201 unsigned int level, u64 val)
170{ 202{
@@ -178,7 +210,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
178 st->start_address = addr; 210 st->start_address = addr;
179 st->start_pa = pa; 211 st->start_pa = pa;
180 st->last_pa = pa; 212 st->last_pa = pa;
181 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 213 pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
182 /* 214 /*
183 * Dump the section of virtual memory when: 215 * Dump the section of virtual memory when:
184 * - the PTE flags from one entry to the next differs. 216 * - the PTE flags from one entry to the next differs.
@@ -194,6 +226,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
194 226
195 /* Check the PTE flags */ 227 /* Check the PTE flags */
196 if (st->current_flags) { 228 if (st->current_flags) {
229 note_prot_wx(st, addr);
197 dump_addr(st, addr); 230 dump_addr(st, addr);
198 231
199 /* Dump all the flags */ 232 /* Dump all the flags */
@@ -202,7 +235,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
202 st->current_flags, 235 st->current_flags,
203 pg_level[st->level].num); 236 pg_level[st->level].num);
204 237
205 seq_putc(st->seq, '\n'); 238 pt_dump_seq_putc(st->seq, '\n');
206 } 239 }
207 240
208 /* 241 /*
@@ -211,7 +244,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
211 */ 244 */
212 while (addr >= st->marker[1].start_address) { 245 while (addr >= st->marker[1].start_address) {
213 st->marker++; 246 st->marker++;
214 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 247 pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
215 } 248 }
216 st->start_address = addr; 249 st->start_address = addr;
217 st->start_pa = pa; 250 st->start_pa = pa;
@@ -303,8 +336,9 @@ static void populate_markers(void)
303 address_markers[i++].start_address = PHB_IO_END; 336 address_markers[i++].start_address = PHB_IO_END;
304 address_markers[i++].start_address = IOREMAP_BASE; 337 address_markers[i++].start_address = IOREMAP_BASE;
305 address_markers[i++].start_address = IOREMAP_END; 338 address_markers[i++].start_address = IOREMAP_END;
339 /* What is the ifdef about? */
306#ifdef CONFIG_PPC_BOOK3S_64 340#ifdef CONFIG_PPC_BOOK3S_64
307 address_markers[i++].start_address = H_VMEMMAP_BASE; 341 address_markers[i++].start_address = H_VMEMMAP_START;
308#else 342#else
309 address_markers[i++].start_address = VMEMMAP_BASE; 343 address_markers[i++].start_address = VMEMMAP_BASE;
310#endif 344#endif
@@ -322,6 +356,10 @@ static void populate_markers(void)
322#endif 356#endif
323 address_markers[i++].start_address = FIXADDR_START; 357 address_markers[i++].start_address = FIXADDR_START;
324 address_markers[i++].start_address = FIXADDR_TOP; 358 address_markers[i++].start_address = FIXADDR_TOP;
359#ifdef CONFIG_KASAN
360 address_markers[i++].start_address = KASAN_SHADOW_START;
361 address_markers[i++].start_address = KASAN_SHADOW_END;
362#endif
325#endif /* CONFIG_PPC64 */ 363#endif /* CONFIG_PPC64 */
326} 364}
327 365
@@ -366,6 +404,30 @@ static void build_pgtable_complete_mask(void)
366 pg_level[i].mask |= pg_level[i].flag[j].mask; 404 pg_level[i].mask |= pg_level[i].flag[j].mask;
367} 405}
368 406
407#ifdef CONFIG_PPC_DEBUG_WX
408void ptdump_check_wx(void)
409{
410 struct pg_state st = {
411 .seq = NULL,
412 .marker = address_markers,
413 .check_wx = true,
414 };
415
416 if (radix_enabled())
417 st.start_address = PAGE_OFFSET;
418 else
419 st.start_address = KERN_VIRT_START;
420
421 walk_pagetables(&st);
422
423 if (st.wx_pages)
424 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
425 st.wx_pages);
426 else
427 pr_info("Checked W+X mappings: passed, no W+X pages found\n");
428}
429#endif
430
369static int ptdump_init(void) 431static int ptdump_init(void)
370{ 432{
371 struct dentry *debugfs_file; 433 struct dentry *debugfs_file;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index aec91dbcdc0b..97fbf7b54422 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
101{ 101{
102 struct vm_area_struct *vma; 102 struct vm_area_struct *vma;
103 103
104 if ((mm->context.slb_addr_limit - len) < addr) 104 if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
105 return 0; 105 return 0;
106 vma = find_vma(mm, addr); 106 vma = find_vma(mm, addr);
107 return (!vma || (addr + len) <= vm_start_gap(vma)); 107 return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -118,13 +118,11 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
118 unsigned long start = slice << SLICE_HIGH_SHIFT; 118 unsigned long start = slice << SLICE_HIGH_SHIFT;
119 unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); 119 unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
120 120
121#ifdef CONFIG_PPC64
122 /* Hack, so that each addresses is controlled by exactly one 121 /* Hack, so that each addresses is controlled by exactly one
123 * of the high or low area bitmaps, the first high area starts 122 * of the high or low area bitmaps, the first high area starts
124 * at 4GB, not 0 */ 123 * at 4GB, not 0 */
125 if (start == 0) 124 if (start == 0)
126 start = SLICE_LOW_TOP; 125 start = (unsigned long)SLICE_LOW_TOP;
127#endif
128 126
129 return !slice_area_is_free(mm, start, end - start); 127 return !slice_area_is_free(mm, start, end - start);
130} 128}
@@ -150,40 +148,6 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
150 __set_bit(i, ret->high_slices); 148 __set_bit(i, ret->high_slices);
151} 149}
152 150
153#ifdef CONFIG_PPC_BOOK3S_64
154static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
155{
156#ifdef CONFIG_PPC_64K_PAGES
157 if (psize == MMU_PAGE_64K)
158 return &mm->context.mask_64k;
159#endif
160 if (psize == MMU_PAGE_4K)
161 return &mm->context.mask_4k;
162#ifdef CONFIG_HUGETLB_PAGE
163 if (psize == MMU_PAGE_16M)
164 return &mm->context.mask_16m;
165 if (psize == MMU_PAGE_16G)
166 return &mm->context.mask_16g;
167#endif
168 BUG();
169}
170#elif defined(CONFIG_PPC_8xx)
171static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
172{
173 if (psize == mmu_virtual_psize)
174 return &mm->context.mask_base_psize;
175#ifdef CONFIG_HUGETLB_PAGE
176 if (psize == MMU_PAGE_512K)
177 return &mm->context.mask_512k;
178 if (psize == MMU_PAGE_8M)
179 return &mm->context.mask_8m;
180#endif
181 BUG();
182}
183#else
184#error "Must define the slice masks for page sizes supported by the platform"
185#endif
186
187static bool slice_check_range_fits(struct mm_struct *mm, 151static bool slice_check_range_fits(struct mm_struct *mm,
188 const struct slice_mask *available, 152 const struct slice_mask *available,
189 unsigned long start, unsigned long len) 153 unsigned long start, unsigned long len)
@@ -246,14 +210,14 @@ static void slice_convert(struct mm_struct *mm,
246 slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); 210 slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
247 slice_print_mask(" mask", mask); 211 slice_print_mask(" mask", mask);
248 212
249 psize_mask = slice_mask_for_size(mm, psize); 213 psize_mask = slice_mask_for_size(&mm->context, psize);
250 214
251 /* We need to use a spinlock here to protect against 215 /* We need to use a spinlock here to protect against
252 * concurrent 64k -> 4k demotion ... 216 * concurrent 64k -> 4k demotion ...
253 */ 217 */
254 spin_lock_irqsave(&slice_convert_lock, flags); 218 spin_lock_irqsave(&slice_convert_lock, flags);
255 219
256 lpsizes = mm->context.low_slices_psize; 220 lpsizes = mm_ctx_low_slices(&mm->context);
257 for (i = 0; i < SLICE_NUM_LOW; i++) { 221 for (i = 0; i < SLICE_NUM_LOW; i++) {
258 if (!(mask->low_slices & (1u << i))) 222 if (!(mask->low_slices & (1u << i)))
259 continue; 223 continue;
@@ -263,7 +227,7 @@ static void slice_convert(struct mm_struct *mm,
263 227
264 /* Update the slice_mask */ 228 /* Update the slice_mask */
265 old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; 229 old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
266 old_mask = slice_mask_for_size(mm, old_psize); 230 old_mask = slice_mask_for_size(&mm->context, old_psize);
267 old_mask->low_slices &= ~(1u << i); 231 old_mask->low_slices &= ~(1u << i);
268 psize_mask->low_slices |= 1u << i; 232 psize_mask->low_slices |= 1u << i;
269 233
@@ -272,8 +236,8 @@ static void slice_convert(struct mm_struct *mm,
272 (((unsigned long)psize) << (mask_index * 4)); 236 (((unsigned long)psize) << (mask_index * 4));
273 } 237 }
274 238
275 hpsizes = mm->context.high_slices_psize; 239 hpsizes = mm_ctx_high_slices(&mm->context);
276 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { 240 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
277 if (!test_bit(i, mask->high_slices)) 241 if (!test_bit(i, mask->high_slices))
278 continue; 242 continue;
279 243
@@ -282,7 +246,7 @@ static void slice_convert(struct mm_struct *mm,
282 246
283 /* Update the slice_mask */ 247 /* Update the slice_mask */
284 old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; 248 old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
285 old_mask = slice_mask_for_size(mm, old_psize); 249 old_mask = slice_mask_for_size(&mm->context, old_psize);
286 __clear_bit(i, old_mask->high_slices); 250 __clear_bit(i, old_mask->high_slices);
287 __set_bit(i, psize_mask->high_slices); 251 __set_bit(i, psize_mask->high_slices);
288 252
@@ -292,8 +256,8 @@ static void slice_convert(struct mm_struct *mm,
292 } 256 }
293 257
294 slice_dbg(" lsps=%lx, hsps=%lx\n", 258 slice_dbg(" lsps=%lx, hsps=%lx\n",
295 (unsigned long)mm->context.low_slices_psize, 259 (unsigned long)mm_ctx_low_slices(&mm->context),
296 (unsigned long)mm->context.high_slices_psize); 260 (unsigned long)mm_ctx_high_slices(&mm->context));
297 261
298 spin_unlock_irqrestore(&slice_convert_lock, flags); 262 spin_unlock_irqrestore(&slice_convert_lock, flags);
299 263
@@ -393,7 +357,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
393 * DEFAULT_MAP_WINDOW we should apply this. 357 * DEFAULT_MAP_WINDOW we should apply this.
394 */ 358 */
395 if (high_limit > DEFAULT_MAP_WINDOW) 359 if (high_limit > DEFAULT_MAP_WINDOW)
396 addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW; 360 addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
397 361
398 while (addr > min_addr) { 362 while (addr > min_addr) {
399 info.high_limit = addr; 363 info.high_limit = addr;
@@ -505,20 +469,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
505 return -ENOMEM; 469 return -ENOMEM;
506 } 470 }
507 471
508 if (high_limit > mm->context.slb_addr_limit) { 472 if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
509 /* 473 /*
510 * Increasing the slb_addr_limit does not require 474 * Increasing the slb_addr_limit does not require
511 * slice mask cache to be recalculated because it should 475 * slice mask cache to be recalculated because it should
512 * be already initialised beyond the old address limit. 476 * be already initialised beyond the old address limit.
513 */ 477 */
514 mm->context.slb_addr_limit = high_limit; 478 mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
515 479
516 on_each_cpu(slice_flush_segments, mm, 1); 480 on_each_cpu(slice_flush_segments, mm, 1);
517 } 481 }
518 482
519 /* Sanity checks */ 483 /* Sanity checks */
520 BUG_ON(mm->task_size == 0); 484 BUG_ON(mm->task_size == 0);
521 BUG_ON(mm->context.slb_addr_limit == 0); 485 BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
522 VM_BUG_ON(radix_enabled()); 486 VM_BUG_ON(radix_enabled());
523 487
524 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); 488 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
@@ -538,7 +502,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
538 /* First make up a "good" mask of slices that have the right size 502 /* First make up a "good" mask of slices that have the right size
539 * already 503 * already
540 */ 504 */
541 maskp = slice_mask_for_size(mm, psize); 505 maskp = slice_mask_for_size(&mm->context, psize);
542 506
543 /* 507 /*
544 * Here "good" means slices that are already the right page size, 508 * Here "good" means slices that are already the right page size,
@@ -565,7 +529,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
565 * a pointer to good mask for the next code to use. 529 * a pointer to good mask for the next code to use.
566 */ 530 */
567 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { 531 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
568 compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); 532 compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
569 if (fixed) 533 if (fixed)
570 slice_or_mask(&good_mask, maskp, compat_maskp); 534 slice_or_mask(&good_mask, maskp, compat_maskp);
571 else 535 else
@@ -642,14 +606,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
642 newaddr = slice_find_area(mm, len, &potential_mask, 606 newaddr = slice_find_area(mm, len, &potential_mask,
643 psize, topdown, high_limit); 607 psize, topdown, high_limit);
644 608
645#ifdef CONFIG_PPC_64K_PAGES 609 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM &&
646 if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) { 610 psize == MMU_PAGE_64K) {
647 /* retry the search with 4k-page slices included */ 611 /* retry the search with 4k-page slices included */
648 slice_or_mask(&potential_mask, &potential_mask, compat_maskp); 612 slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
649 newaddr = slice_find_area(mm, len, &potential_mask, 613 newaddr = slice_find_area(mm, len, &potential_mask,
650 psize, topdown, high_limit); 614 psize, topdown, high_limit);
651 } 615 }
652#endif
653 616
654 if (newaddr == -ENOMEM) 617 if (newaddr == -ENOMEM)
655 return -ENOMEM; 618 return -ENOMEM;
@@ -696,7 +659,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
696 unsigned long flags) 659 unsigned long flags)
697{ 660{
698 return slice_get_unmapped_area(addr, len, flags, 661 return slice_get_unmapped_area(addr, len, flags,
699 current->mm->context.user_psize, 0); 662 mm_ctx_user_psize(&current->mm->context), 0);
700} 663}
701 664
702unsigned long arch_get_unmapped_area_topdown(struct file *filp, 665unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -706,7 +669,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
706 const unsigned long flags) 669 const unsigned long flags)
707{ 670{
708 return slice_get_unmapped_area(addr0, len, flags, 671 return slice_get_unmapped_area(addr0, len, flags,
709 current->mm->context.user_psize, 1); 672 mm_ctx_user_psize(&current->mm->context), 1);
710} 673}
711 674
712unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) 675unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
@@ -717,10 +680,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
717 VM_BUG_ON(radix_enabled()); 680 VM_BUG_ON(radix_enabled());
718 681
719 if (slice_addr_is_low(addr)) { 682 if (slice_addr_is_low(addr)) {
720 psizes = mm->context.low_slices_psize; 683 psizes = mm_ctx_low_slices(&mm->context);
721 index = GET_LOW_SLICE_INDEX(addr); 684 index = GET_LOW_SLICE_INDEX(addr);
722 } else { 685 } else {
723 psizes = mm->context.high_slices_psize; 686 psizes = mm_ctx_high_slices(&mm->context);
724 index = GET_HIGH_SLICE_INDEX(addr); 687 index = GET_HIGH_SLICE_INDEX(addr);
725 } 688 }
726 mask_index = index & 0x1; 689 mask_index = index & 0x1;
@@ -741,27 +704,22 @@ void slice_init_new_context_exec(struct mm_struct *mm)
741 * case of fork it is just inherited from the mm being 704 * case of fork it is just inherited from the mm being
742 * duplicated. 705 * duplicated.
743 */ 706 */
744#ifdef CONFIG_PPC64 707 mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT);
745 mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; 708 mm_ctx_set_user_psize(&mm->context, psize);
746#else
747 mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
748#endif
749
750 mm->context.user_psize = psize;
751 709
752 /* 710 /*
753 * Set all slice psizes to the default. 711 * Set all slice psizes to the default.
754 */ 712 */
755 lpsizes = mm->context.low_slices_psize; 713 lpsizes = mm_ctx_low_slices(&mm->context);
756 memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1); 714 memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
757 715
758 hpsizes = mm->context.high_slices_psize; 716 hpsizes = mm_ctx_high_slices(&mm->context);
759 memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); 717 memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
760 718
761 /* 719 /*
762 * Slice mask cache starts zeroed, fill the default size cache. 720 * Slice mask cache starts zeroed, fill the default size cache.
763 */ 721 */
764 mask = slice_mask_for_size(mm, psize); 722 mask = slice_mask_for_size(&mm->context, psize);
765 mask->low_slices = ~0UL; 723 mask->low_slices = ~0UL;
766 if (SLICE_NUM_HIGH) 724 if (SLICE_NUM_HIGH)
767 bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); 725 bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
@@ -777,7 +735,7 @@ void slice_setup_new_exec(void)
777 if (!is_32bit_task()) 735 if (!is_32bit_task())
778 return; 736 return;
779 737
780 mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; 738 mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
781} 739}
782#endif 740#endif
783 741
@@ -816,22 +774,21 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
816 unsigned long len) 774 unsigned long len)
817{ 775{
818 const struct slice_mask *maskp; 776 const struct slice_mask *maskp;
819 unsigned int psize = mm->context.user_psize; 777 unsigned int psize = mm_ctx_user_psize(&mm->context);
820 778
821 VM_BUG_ON(radix_enabled()); 779 VM_BUG_ON(radix_enabled());
822 780
823 maskp = slice_mask_for_size(mm, psize); 781 maskp = slice_mask_for_size(&mm->context, psize);
824#ifdef CONFIG_PPC_64K_PAGES 782
825 /* We need to account for 4k slices too */ 783 /* We need to account for 4k slices too */
826 if (psize == MMU_PAGE_64K) { 784 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
827 const struct slice_mask *compat_maskp; 785 const struct slice_mask *compat_maskp;
828 struct slice_mask available; 786 struct slice_mask available;
829 787
830 compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); 788 compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
831 slice_or_mask(&available, maskp, compat_maskp); 789 slice_or_mask(&available, maskp, compat_maskp);
832 return !slice_check_range_fits(mm, &available, addr, len); 790 return !slice_check_range_fits(mm, &available, addr, len);
833 } 791 }
834#endif
835 792
836 return !slice_check_range_fits(mm, maskp, addr, len); 793 return !slice_check_range_fits(mm, maskp, addr, len);
837} 794}
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index ab26df5bacb9..c155dcbb8691 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -5,7 +5,8 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
5obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o 5obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
6obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ 6obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
7 power5+-pmu.o power6-pmu.o power7-pmu.o \ 7 power5+-pmu.o power6-pmu.o power7-pmu.o \
8 isa207-common.o power8-pmu.o power9-pmu.o 8 isa207-common.o power8-pmu.o power9-pmu.o \
9 generic-compat-pmu.o
9obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o 10obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
10 11
11obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o 12obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0723002a396..a66fb9c01c9e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -22,6 +22,10 @@
22#include <asm/ptrace.h> 22#include <asm/ptrace.h>
23#include <asm/code-patching.h> 23#include <asm/code-patching.h>
24 24
25#ifdef CONFIG_PPC64
26#include "internal.h"
27#endif
28
25#define BHRB_MAX_ENTRIES 32 29#define BHRB_MAX_ENTRIES 32
26#define BHRB_TARGET 0x0000000000000002 30#define BHRB_TARGET 0x0000000000000002
27#define BHRB_PREDICTION 0x0000000000000001 31#define BHRB_PREDICTION 0x0000000000000001
@@ -2294,3 +2298,27 @@ int register_power_pmu(struct power_pmu *pmu)
2294 power_pmu_prepare_cpu, NULL); 2298 power_pmu_prepare_cpu, NULL);
2295 return 0; 2299 return 0;
2296} 2300}
2301
2302#ifdef CONFIG_PPC64
2303static int __init init_ppc64_pmu(void)
2304{
2305 /* run through all the pmu drivers one at a time */
2306 if (!init_power5_pmu())
2307 return 0;
2308 else if (!init_power5p_pmu())
2309 return 0;
2310 else if (!init_power6_pmu())
2311 return 0;
2312 else if (!init_power7_pmu())
2313 return 0;
2314 else if (!init_power8_pmu())
2315 return 0;
2316 else if (!init_power9_pmu())
2317 return 0;
2318 else if (!init_ppc970_pmu())
2319 return 0;
2320 else
2321 return init_generic_compat_pmu();
2322}
2323early_initcall(init_ppc64_pmu);
2324#endif
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
new file mode 100644
index 000000000000..5e5a54d5588e
--- /dev/null
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -0,0 +1,234 @@
1// SPDX-License-Identifier: GPL-2.0+
2//
3// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
4
5#define pr_fmt(fmt) "generic-compat-pmu: " fmt
6
7#include "isa207-common.h"
8
9/*
10 * Raw event encoding:
11 *
12 * 60 56 52 48 44 40 36 32
13 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
14 *
15 * 28 24 20 16 12 8 4 0
16 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
17 * [ pmc ] [unit ] [ ] m [ pmcxsel ]
18 * | |
19 * | *- mark
20 * |
21 * |
22 * *- combine
23 *
24 * Below uses IBM bit numbering.
25 *
26 * MMCR1[x:y] = unit (PMCxUNIT)
27 * MMCR1[24] = pmc1combine[0]
28 * MMCR1[25] = pmc1combine[1]
29 * MMCR1[26] = pmc2combine[0]
30 * MMCR1[27] = pmc2combine[1]
31 * MMCR1[28] = pmc3combine[0]
32 * MMCR1[29] = pmc3combine[1]
33 * MMCR1[30] = pmc4combine[0]
34 * MMCR1[31] = pmc4combine[1]
35 *
36 */
37
38/*
39 * Some power9 event codes.
40 */
41#define EVENT(_name, _code) _name = _code,
42
43enum {
44EVENT(PM_CYC, 0x0001e)
45EVENT(PM_INST_CMPL, 0x00002)
46};
47
48#undef EVENT
49
50GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
51GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
52
53static struct attribute *generic_compat_events_attr[] = {
54 GENERIC_EVENT_PTR(PM_CYC),
55 GENERIC_EVENT_PTR(PM_INST_CMPL),
56 NULL
57};
58
59static struct attribute_group generic_compat_pmu_events_group = {
60 .name = "events",
61 .attrs = generic_compat_events_attr,
62};
63
64PMU_FORMAT_ATTR(event, "config:0-19");
65PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
66PMU_FORMAT_ATTR(mark, "config:8");
67PMU_FORMAT_ATTR(combine, "config:10-11");
68PMU_FORMAT_ATTR(unit, "config:12-15");
69PMU_FORMAT_ATTR(pmc, "config:16-19");
70
71static struct attribute *generic_compat_pmu_format_attr[] = {
72 &format_attr_event.attr,
73 &format_attr_pmcxsel.attr,
74 &format_attr_mark.attr,
75 &format_attr_combine.attr,
76 &format_attr_unit.attr,
77 &format_attr_pmc.attr,
78 NULL,
79};
80
81static struct attribute_group generic_compat_pmu_format_group = {
82 .name = "format",
83 .attrs = generic_compat_pmu_format_attr,
84};
85
86static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
87 &generic_compat_pmu_format_group,
88 &generic_compat_pmu_events_group,
89 NULL,
90};
91
92static int compat_generic_events[] = {
93 [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
94 [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
95};
96
97#define C(x) PERF_COUNT_HW_CACHE_##x
98
99/*
100 * Table of generalized cache-related events.
101 * 0 means not supported, -1 means nonsensical, other values
102 * are event codes.
103 */
104static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
105 [ C(L1D) ] = {
106 [ C(OP_READ) ] = {
107 [ C(RESULT_ACCESS) ] = 0,
108 [ C(RESULT_MISS) ] = 0,
109 },
110 [ C(OP_WRITE) ] = {
111 [ C(RESULT_ACCESS) ] = 0,
112 [ C(RESULT_MISS) ] = 0,
113 },
114 [ C(OP_PREFETCH) ] = {
115 [ C(RESULT_ACCESS) ] = 0,
116 [ C(RESULT_MISS) ] = 0,
117 },
118 },
119 [ C(L1I) ] = {
120 [ C(OP_READ) ] = {
121 [ C(RESULT_ACCESS) ] = 0,
122 [ C(RESULT_MISS) ] = 0,
123 },
124 [ C(OP_WRITE) ] = {
125 [ C(RESULT_ACCESS) ] = 0,
126 [ C(RESULT_MISS) ] = -1,
127 },
128 [ C(OP_PREFETCH) ] = {
129 [ C(RESULT_ACCESS) ] = 0,
130 [ C(RESULT_MISS) ] = 0,
131 },
132 },
133 [ C(LL) ] = {
134 [ C(OP_READ) ] = {
135 [ C(RESULT_ACCESS) ] = 0,
136 [ C(RESULT_MISS) ] = 0,
137 },
138 [ C(OP_WRITE) ] = {
139 [ C(RESULT_ACCESS) ] = 0,
140 [ C(RESULT_MISS) ] = 0,
141 },
142 [ C(OP_PREFETCH) ] = {
143 [ C(RESULT_ACCESS) ] = 0,
144 [ C(RESULT_MISS) ] = 0,
145 },
146 },
147 [ C(DTLB) ] = {
148 [ C(OP_READ) ] = {
149 [ C(RESULT_ACCESS) ] = 0,
150 [ C(RESULT_MISS) ] = 0,
151 },
152 [ C(OP_WRITE) ] = {
153 [ C(RESULT_ACCESS) ] = -1,
154 [ C(RESULT_MISS) ] = -1,
155 },
156 [ C(OP_PREFETCH) ] = {
157 [ C(RESULT_ACCESS) ] = -1,
158 [ C(RESULT_MISS) ] = -1,
159 },
160 },
161 [ C(ITLB) ] = {
162 [ C(OP_READ) ] = {
163 [ C(RESULT_ACCESS) ] = 0,
164 [ C(RESULT_MISS) ] = 0,
165 },
166 [ C(OP_WRITE) ] = {
167 [ C(RESULT_ACCESS) ] = -1,
168 [ C(RESULT_MISS) ] = -1,
169 },
170 [ C(OP_PREFETCH) ] = {
171 [ C(RESULT_ACCESS) ] = -1,
172 [ C(RESULT_MISS) ] = -1,
173 },
174 },
175 [ C(BPU) ] = {
176 [ C(OP_READ) ] = {
177 [ C(RESULT_ACCESS) ] = 0,
178 [ C(RESULT_MISS) ] = 0,
179 },
180 [ C(OP_WRITE) ] = {
181 [ C(RESULT_ACCESS) ] = -1,
182 [ C(RESULT_MISS) ] = -1,
183 },
184 [ C(OP_PREFETCH) ] = {
185 [ C(RESULT_ACCESS) ] = -1,
186 [ C(RESULT_MISS) ] = -1,
187 },
188 },
189 [ C(NODE) ] = {
190 [ C(OP_READ) ] = {
191 [ C(RESULT_ACCESS) ] = -1,
192 [ C(RESULT_MISS) ] = -1,
193 },
194 [ C(OP_WRITE) ] = {
195 [ C(RESULT_ACCESS) ] = -1,
196 [ C(RESULT_MISS) ] = -1,
197 },
198 [ C(OP_PREFETCH) ] = {
199 [ C(RESULT_ACCESS) ] = -1,
200 [ C(RESULT_MISS) ] = -1,
201 },
202 },
203};
204
205#undef C
206
207static struct power_pmu generic_compat_pmu = {
208 .name = "GENERIC_COMPAT",
209 .n_counter = MAX_PMU_COUNTERS,
210 .add_fields = ISA207_ADD_FIELDS,
211 .test_adder = ISA207_TEST_ADDER,
212 .compute_mmcr = isa207_compute_mmcr,
213 .get_constraint = isa207_get_constraint,
214 .disable_pmc = isa207_disable_pmc,
215 .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
216 .n_generic = ARRAY_SIZE(compat_generic_events),
217 .generic_events = compat_generic_events,
218 .cache_events = &generic_compat_cache_events,
219 .attr_groups = generic_compat_pmu_attr_groups,
220};
221
222int init_generic_compat_pmu(void)
223{
224 int rc = 0;
225
226 rc = register_power_pmu(&generic_compat_pmu);
227 if (rc)
228 return rc;
229
230 /* Tell userspace that EBB is supported */
231 cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
232
233 return 0;
234}
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index b1c37cc3fa98..31fa753e2eb2 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -43,12 +43,17 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
43static struct imc_pmu *thread_imc_pmu; 43static struct imc_pmu *thread_imc_pmu;
44static int thread_imc_mem_size; 44static int thread_imc_mem_size;
45 45
46/* Trace IMC data structures */
47static DEFINE_PER_CPU(u64 *, trace_imc_mem);
48static struct imc_pmu_ref *trace_imc_refc;
49static int trace_imc_mem_size;
50
46static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) 51static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
47{ 52{
48 return container_of(event->pmu, struct imc_pmu, pmu); 53 return container_of(event->pmu, struct imc_pmu, pmu);
49} 54}
50 55
51PMU_FORMAT_ATTR(event, "config:0-40"); 56PMU_FORMAT_ATTR(event, "config:0-61");
52PMU_FORMAT_ATTR(offset, "config:0-31"); 57PMU_FORMAT_ATTR(offset, "config:0-31");
53PMU_FORMAT_ATTR(rvalue, "config:32"); 58PMU_FORMAT_ATTR(rvalue, "config:32");
54PMU_FORMAT_ATTR(mode, "config:33-40"); 59PMU_FORMAT_ATTR(mode, "config:33-40");
@@ -65,6 +70,25 @@ static struct attribute_group imc_format_group = {
65 .attrs = imc_format_attrs, 70 .attrs = imc_format_attrs,
66}; 71};
67 72
73/* Format attribute for imc trace-mode */
74PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
75PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
76PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
77PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
78static struct attribute *trace_imc_format_attrs[] = {
79 &format_attr_event.attr,
80 &format_attr_cpmc_reserved.attr,
81 &format_attr_cpmc_event.attr,
82 &format_attr_cpmc_samplesel.attr,
83 &format_attr_cpmc_load.attr,
84 NULL,
85};
86
87static struct attribute_group trace_imc_format_group = {
88.name = "format",
89.attrs = trace_imc_format_attrs,
90};
91
68/* Get the cpumask printed to a buffer "buf" */ 92/* Get the cpumask printed to a buffer "buf" */
69static ssize_t imc_pmu_cpumask_get_attr(struct device *dev, 93static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
70 struct device_attribute *attr, 94 struct device_attribute *attr,
@@ -487,6 +511,11 @@ static int nest_imc_event_init(struct perf_event *event)
487 * Get the base memory addresss for this cpu. 511 * Get the base memory addresss for this cpu.
488 */ 512 */
489 chip_id = cpu_to_chip_id(event->cpu); 513 chip_id = cpu_to_chip_id(event->cpu);
514
515 /* Return, if chip_id is not valid */
516 if (chip_id < 0)
517 return -ENODEV;
518
490 pcni = pmu->mem_info; 519 pcni = pmu->mem_info;
491 do { 520 do {
492 if (pcni->id == chip_id) { 521 if (pcni->id == chip_id) {
@@ -494,7 +523,7 @@ static int nest_imc_event_init(struct perf_event *event)
494 break; 523 break;
495 } 524 }
496 pcni++; 525 pcni++;
497 } while (pcni); 526 } while (pcni->vbase != 0);
498 527
499 if (!flag) 528 if (!flag)
500 return -ENODEV; 529 return -ENODEV;
@@ -788,8 +817,11 @@ static int core_imc_event_init(struct perf_event *event)
788} 817}
789 818
790/* 819/*
791 * Allocates a page of memory for each of the online cpus, and write the 820 * Allocates a page of memory for each of the online cpus, and load
792 * physical base address of that page to the LDBAR for that cpu. 821 * LDBAR with 0.
822 * The physical base address of the page allocated for a cpu will be
823 * written to the LDBAR for that cpu, when the thread-imc event
824 * is added.
793 * 825 *
794 * LDBAR Register Layout: 826 * LDBAR Register Layout:
795 * 827 *
@@ -807,7 +839,7 @@ static int core_imc_event_init(struct perf_event *event)
807 */ 839 */
808static int thread_imc_mem_alloc(int cpu_id, int size) 840static int thread_imc_mem_alloc(int cpu_id, int size)
809{ 841{
810 u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id); 842 u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
811 int nid = cpu_to_node(cpu_id); 843 int nid = cpu_to_node(cpu_id);
812 844
813 if (!local_mem) { 845 if (!local_mem) {
@@ -824,9 +856,7 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
824 per_cpu(thread_imc_mem, cpu_id) = local_mem; 856 per_cpu(thread_imc_mem, cpu_id) = local_mem;
825 } 857 }
826 858
827 ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; 859 mtspr(SPRN_LDBAR, 0);
828
829 mtspr(SPRN_LDBAR, ldbar_value);
830 return 0; 860 return 0;
831} 861}
832 862
@@ -858,6 +888,9 @@ static int thread_imc_event_init(struct perf_event *event)
858 if (event->attr.type != event->pmu->type) 888 if (event->attr.type != event->pmu->type)
859 return -ENOENT; 889 return -ENOENT;
860 890
891 if (!capable(CAP_SYS_ADMIN))
892 return -EACCES;
893
861 /* Sampling not supported */ 894 /* Sampling not supported */
862 if (event->hw.sample_period) 895 if (event->hw.sample_period)
863 return -EINVAL; 896 return -EINVAL;
@@ -977,6 +1010,7 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
977{ 1010{
978 int core_id; 1011 int core_id;
979 struct imc_pmu_ref *ref; 1012 struct imc_pmu_ref *ref;
1013 u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
980 1014
981 if (flags & PERF_EF_START) 1015 if (flags & PERF_EF_START)
982 imc_event_start(event, flags); 1016 imc_event_start(event, flags);
@@ -985,6 +1019,9 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
985 return -EINVAL; 1019 return -EINVAL;
986 1020
987 core_id = smp_processor_id() / threads_per_core; 1021 core_id = smp_processor_id() / threads_per_core;
1022 ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
1023 mtspr(SPRN_LDBAR, ldbar_value);
1024
988 /* 1025 /*
989 * imc pmus are enabled only when it is used. 1026 * imc pmus are enabled only when it is used.
990 * See if this is triggered for the first time. 1027 * See if this is triggered for the first time.
@@ -1016,11 +1053,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
1016 int core_id; 1053 int core_id;
1017 struct imc_pmu_ref *ref; 1054 struct imc_pmu_ref *ref;
1018 1055
1019 /* 1056 mtspr(SPRN_LDBAR, 0);
1020 * Take a snapshot and calculate the delta and update
1021 * the event counter values.
1022 */
1023 imc_event_update(event);
1024 1057
1025 core_id = smp_processor_id() / threads_per_core; 1058 core_id = smp_processor_id() / threads_per_core;
1026 ref = &core_imc_refc[core_id]; 1059 ref = &core_imc_refc[core_id];
@@ -1039,6 +1072,240 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
1039 ref->refc = 0; 1072 ref->refc = 0;
1040 } 1073 }
1041 mutex_unlock(&ref->lock); 1074 mutex_unlock(&ref->lock);
1075 /*
1076 * Take a snapshot and calculate the delta and update
1077 * the event counter values.
1078 */
1079 imc_event_update(event);
1080}
1081
1082/*
1083 * Allocate a page of memory for each cpu, and load LDBAR with 0.
1084 */
1085static int trace_imc_mem_alloc(int cpu_id, int size)
1086{
1087 u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
1088 int phys_id = cpu_to_node(cpu_id), rc = 0;
1089 int core_id = (cpu_id / threads_per_core);
1090
1091 if (!local_mem) {
1092 local_mem = page_address(alloc_pages_node(phys_id,
1093 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
1094 __GFP_NOWARN, get_order(size)));
1095 if (!local_mem)
1096 return -ENOMEM;
1097 per_cpu(trace_imc_mem, cpu_id) = local_mem;
1098
1099 /* Initialise the counters for trace mode */
1100 rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
1101 get_hard_smp_processor_id(cpu_id));
1102 if (rc) {
1103 pr_info("IMC:opal init failed for trace imc\n");
1104 return rc;
1105 }
1106 }
1107
1108 /* Init the mutex, if not already */
1109 trace_imc_refc[core_id].id = core_id;
1110 mutex_init(&trace_imc_refc[core_id].lock);
1111
1112 mtspr(SPRN_LDBAR, 0);
1113 return 0;
1114}
1115
1116static int ppc_trace_imc_cpu_online(unsigned int cpu)
1117{
1118 return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1119}
1120
1121static int ppc_trace_imc_cpu_offline(unsigned int cpu)
1122{
1123 mtspr(SPRN_LDBAR, 0);
1124 return 0;
1125}
1126
1127static int trace_imc_cpu_init(void)
1128{
1129 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
1130 "perf/powerpc/imc_trace:online",
1131 ppc_trace_imc_cpu_online,
1132 ppc_trace_imc_cpu_offline);
1133}
1134
1135static u64 get_trace_imc_event_base_addr(void)
1136{
1137 return (u64)per_cpu(trace_imc_mem, smp_processor_id());
1138}
1139
1140/*
1141 * Function to parse trace-imc data obtained
1142 * and to prepare the perf sample.
1143 */
1144static int trace_imc_prepare_sample(struct trace_imc_data *mem,
1145 struct perf_sample_data *data,
1146 u64 *prev_tb,
1147 struct perf_event_header *header,
1148 struct perf_event *event)
1149{
1150 /* Sanity checks for a valid record */
1151 if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
1152 *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
1153 else
1154 return -EINVAL;
1155
1156 if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
1157 be64_to_cpu(READ_ONCE(mem->tb2)))
1158 return -EINVAL;
1159
1160 /* Prepare perf sample */
1161 data->ip = be64_to_cpu(READ_ONCE(mem->ip));
1162 data->period = event->hw.last_period;
1163
1164 header->type = PERF_RECORD_SAMPLE;
1165 header->size = sizeof(*header) + event->header_size;
1166 header->misc = 0;
1167
1168 if (is_kernel_addr(data->ip))
1169 header->misc |= PERF_RECORD_MISC_KERNEL;
1170 else
1171 header->misc |= PERF_RECORD_MISC_USER;
1172
1173 perf_event_header__init_id(header, data, event);
1174
1175 return 0;
1176}
1177
1178static void dump_trace_imc_data(struct perf_event *event)
1179{
1180 struct trace_imc_data *mem;
1181 int i, ret;
1182 u64 prev_tb = 0;
1183
1184 mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
1185 for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
1186 i++, mem++) {
1187 struct perf_sample_data data;
1188 struct perf_event_header header;
1189
1190 ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
1191 if (ret) /* Exit, if not a valid record */
1192 break;
1193 else {
1194 /* If this is a valid record, create the sample */
1195 struct perf_output_handle handle;
1196
1197 if (perf_output_begin(&handle, event, header.size))
1198 return;
1199
1200 perf_output_sample(&handle, &header, &data, event);
1201 perf_output_end(&handle);
1202 }
1203 }
1204}
1205
1206static int trace_imc_event_add(struct perf_event *event, int flags)
1207{
1208 int core_id = smp_processor_id() / threads_per_core;
1209 struct imc_pmu_ref *ref = NULL;
1210 u64 local_mem, ldbar_value;
1211
1212 /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
1213 local_mem = get_trace_imc_event_base_addr();
1214 ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
1215
1216 if (core_imc_refc)
1217 ref = &core_imc_refc[core_id];
1218 if (!ref) {
1219 /* If core-imc is not enabled, use trace-imc reference count */
1220 if (trace_imc_refc)
1221 ref = &trace_imc_refc[core_id];
1222 if (!ref)
1223 return -EINVAL;
1224 }
1225 mtspr(SPRN_LDBAR, ldbar_value);
1226 mutex_lock(&ref->lock);
1227 if (ref->refc == 0) {
1228 if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
1229 get_hard_smp_processor_id(smp_processor_id()))) {
1230 mutex_unlock(&ref->lock);
1231 pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
1232 mtspr(SPRN_LDBAR, 0);
1233 return -EINVAL;
1234 }
1235 }
1236 ++ref->refc;
1237 mutex_unlock(&ref->lock);
1238
1239 return 0;
1240}
1241
1242static void trace_imc_event_read(struct perf_event *event)
1243{
1244 return;
1245}
1246
1247static void trace_imc_event_stop(struct perf_event *event, int flags)
1248{
1249 u64 local_mem = get_trace_imc_event_base_addr();
1250 dump_trace_imc_data(event);
1251 memset((void *)local_mem, 0, sizeof(u64));
1252}
1253
1254static void trace_imc_event_start(struct perf_event *event, int flags)
1255{
1256 return;
1257}
1258
1259static void trace_imc_event_del(struct perf_event *event, int flags)
1260{
1261 int core_id = smp_processor_id() / threads_per_core;
1262 struct imc_pmu_ref *ref = NULL;
1263
1264 if (core_imc_refc)
1265 ref = &core_imc_refc[core_id];
1266 if (!ref) {
1267 /* If core-imc is not enabled, use trace-imc reference count */
1268 if (trace_imc_refc)
1269 ref = &trace_imc_refc[core_id];
1270 if (!ref)
1271 return;
1272 }
1273 mtspr(SPRN_LDBAR, 0);
1274 mutex_lock(&ref->lock);
1275 ref->refc--;
1276 if (ref->refc == 0) {
1277 if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
1278 get_hard_smp_processor_id(smp_processor_id()))) {
1279 mutex_unlock(&ref->lock);
1280 pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
1281 return;
1282 }
1283 } else if (ref->refc < 0) {
1284 ref->refc = 0;
1285 }
1286 mutex_unlock(&ref->lock);
1287 trace_imc_event_stop(event, flags);
1288}
1289
1290static int trace_imc_event_init(struct perf_event *event)
1291{
1292 struct task_struct *target;
1293
1294 if (event->attr.type != event->pmu->type)
1295 return -ENOENT;
1296
1297 if (!capable(CAP_SYS_ADMIN))
1298 return -EACCES;
1299
1300 /* Return if this is a couting event */
1301 if (event->attr.sample_period == 0)
1302 return -ENOENT;
1303
1304 event->hw.idx = -1;
1305 target = event->hw.target;
1306
1307 event->pmu->task_ctx_nr = perf_hw_context;
1308 return 0;
1042} 1309}
1043 1310
1044/* update_pmu_ops : Populate the appropriate operations for "pmu" */ 1311/* update_pmu_ops : Populate the appropriate operations for "pmu" */
@@ -1071,6 +1338,14 @@ static int update_pmu_ops(struct imc_pmu *pmu)
1071 pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn; 1338 pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
1072 pmu->pmu.commit_txn = thread_imc_pmu_commit_txn; 1339 pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
1073 break; 1340 break;
1341 case IMC_DOMAIN_TRACE:
1342 pmu->pmu.event_init = trace_imc_event_init;
1343 pmu->pmu.add = trace_imc_event_add;
1344 pmu->pmu.del = trace_imc_event_del;
1345 pmu->pmu.start = trace_imc_event_start;
1346 pmu->pmu.stop = trace_imc_event_stop;
1347 pmu->pmu.read = trace_imc_event_read;
1348 pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
1074 default: 1349 default:
1075 break; 1350 break;
1076 } 1351 }
@@ -1163,6 +1438,18 @@ static void cleanup_all_thread_imc_memory(void)
1163 } 1438 }
1164} 1439}
1165 1440
1441static void cleanup_all_trace_imc_memory(void)
1442{
1443 int i, order = get_order(trace_imc_mem_size);
1444
1445 for_each_online_cpu(i) {
1446 if (per_cpu(trace_imc_mem, i))
1447 free_pages((u64)per_cpu(trace_imc_mem, i), order);
1448
1449 }
1450 kfree(trace_imc_refc);
1451}
1452
1166/* Function to free the attr_groups which are dynamically allocated */ 1453/* Function to free the attr_groups which are dynamically allocated */
1167static void imc_common_mem_free(struct imc_pmu *pmu_ptr) 1454static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
1168{ 1455{
@@ -1204,6 +1491,11 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
1204 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); 1491 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
1205 cleanup_all_thread_imc_memory(); 1492 cleanup_all_thread_imc_memory();
1206 } 1493 }
1494
1495 if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
1496 cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
1497 cleanup_all_trace_imc_memory();
1498 }
1207} 1499}
1208 1500
1209/* 1501/*
@@ -1286,6 +1578,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
1286 1578
1287 thread_imc_pmu = pmu_ptr; 1579 thread_imc_pmu = pmu_ptr;
1288 break; 1580 break;
1581 case IMC_DOMAIN_TRACE:
1582 /* Update the pmu name */
1583 pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
1584 if (!pmu_ptr->pmu.name)
1585 return -ENOMEM;
1586
1587 nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1588 trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
1589 GFP_KERNEL);
1590 if (!trace_imc_refc)
1591 return -ENOMEM;
1592
1593 trace_imc_mem_size = pmu_ptr->counter_mem_size;
1594 for_each_online_cpu(cpu) {
1595 res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
1596 if (res) {
1597 cleanup_all_trace_imc_memory();
1598 goto err;
1599 }
1600 }
1601 break;
1289 default: 1602 default:
1290 return -EINVAL; 1603 return -EINVAL;
1291 } 1604 }
@@ -1359,6 +1672,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
1359 } 1672 }
1360 1673
1361 break; 1674 break;
1675 case IMC_DOMAIN_TRACE:
1676 ret = trace_imc_cpu_init();
1677 if (ret) {
1678 cleanup_all_trace_imc_memory();
1679 goto err_free_mem;
1680 }
1681
1682 break;
1362 default: 1683 default:
1363 return -EINVAL; /* Unknown domain */ 1684 return -EINVAL; /* Unknown domain */
1364 } 1685 }
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
new file mode 100644
index 000000000000..f755c64da137
--- /dev/null
+++ b/arch/powerpc/perf/internal.h
@@ -0,0 +1,12 @@
1// SPDX-License-Identifier: GPL-2.0+
2//
3// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
4
5extern int init_ppc970_pmu(void);
6extern int init_power5_pmu(void);
7extern int init_power5p_pmu(void);
8extern int init_power6_pmu(void);
9extern int init_power7_pmu(void);
10extern int init_power8_pmu(void);
11extern int init_power9_pmu(void);
12extern int init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index 0526dac66007..9aa803504cb2 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = {
677 .cache_events = &power5p_cache_events, 677 .cache_events = &power5p_cache_events,
678}; 678};
679 679
680static int __init init_power5p_pmu(void) 680int init_power5p_pmu(void)
681{ 681{
682 if (!cur_cpu_spec->oprofile_cpu_type || 682 if (!cur_cpu_spec->oprofile_cpu_type ||
683 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") 683 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
@@ -686,5 +686,3 @@ static int __init init_power5p_pmu(void)
686 686
687 return register_power_pmu(&power5p_pmu); 687 return register_power_pmu(&power5p_pmu);
688} 688}
689
690early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 4dc99f9f7962..30cb13d081a9 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = {
618 .flags = PPMU_HAS_SSLOT, 618 .flags = PPMU_HAS_SSLOT,
619}; 619};
620 620
621static int __init init_power5_pmu(void) 621int init_power5_pmu(void)
622{ 622{
623 if (!cur_cpu_spec->oprofile_cpu_type || 623 if (!cur_cpu_spec->oprofile_cpu_type ||
624 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) 624 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
@@ -626,5 +626,3 @@ static int __init init_power5_pmu(void)
626 626
627 return register_power_pmu(&power5_pmu); 627 return register_power_pmu(&power5_pmu);
628} 628}
629
630early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 9c9d646b68a1..80ec48632cfe 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -540,7 +540,7 @@ static struct power_pmu power6_pmu = {
540 .cache_events = &power6_cache_events, 540 .cache_events = &power6_cache_events,
541}; 541};
542 542
543static int __init init_power6_pmu(void) 543int init_power6_pmu(void)
544{ 544{
545 if (!cur_cpu_spec->oprofile_cpu_type || 545 if (!cur_cpu_spec->oprofile_cpu_type ||
546 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) 546 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
@@ -548,5 +548,3 @@ static int __init init_power6_pmu(void)
548 548
549 return register_power_pmu(&power6_pmu); 549 return register_power_pmu(&power6_pmu);
550} 550}
551
552early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 6dbae9884ec4..bb6efd5d2530 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -445,7 +445,7 @@ static struct power_pmu power7_pmu = {
445 .cache_events = &power7_cache_events, 445 .cache_events = &power7_cache_events,
446}; 446};
447 447
448static int __init init_power7_pmu(void) 448int init_power7_pmu(void)
449{ 449{
450 if (!cur_cpu_spec->oprofile_cpu_type || 450 if (!cur_cpu_spec->oprofile_cpu_type ||
451 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) 451 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
@@ -456,5 +456,3 @@ static int __init init_power7_pmu(void)
456 456
457 return register_power_pmu(&power7_pmu); 457 return register_power_pmu(&power7_pmu);
458} 458}
459
460early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index d12a2db26353..bcc3409a06de 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -379,7 +379,7 @@ static struct power_pmu power8_pmu = {
379 .bhrb_nr = 32, 379 .bhrb_nr = 32,
380}; 380};
381 381
382static int __init init_power8_pmu(void) 382int init_power8_pmu(void)
383{ 383{
384 int rc; 384 int rc;
385 385
@@ -399,4 +399,3 @@ static int __init init_power8_pmu(void)
399 399
400 return 0; 400 return 0;
401} 401}
402early_initcall(init_power8_pmu);
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 063c9d9f2516..6b1dc9a83ede 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -63,8 +63,6 @@ EVENT(PM_RUN_CYC_ALT, 0x200f4)
63/* Instruction Dispatched */ 63/* Instruction Dispatched */
64EVENT(PM_INST_DISP, 0x200f2) 64EVENT(PM_INST_DISP, 0x200f2)
65EVENT(PM_INST_DISP_ALT, 0x300f2) 65EVENT(PM_INST_DISP_ALT, 0x300f2)
66/* Alternate Branch event code */
67EVENT(PM_BR_CMPL_ALT, 0x10012)
68/* Branch event that are not strongly biased */ 66/* Branch event that are not strongly biased */
69EVENT(PM_BR_2PATH, 0x20036) 67EVENT(PM_BR_2PATH, 0x20036)
70/* ALternate branch event that are not strongly biased */ 68/* ALternate branch event that are not strongly biased */
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 030544e35959..3a31ac6f4805 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -437,7 +437,7 @@ static struct power_pmu power9_pmu = {
437 .bhrb_nr = 32, 437 .bhrb_nr = 32,
438}; 438};
439 439
440static int __init init_power9_pmu(void) 440int init_power9_pmu(void)
441{ 441{
442 int rc = 0; 442 int rc = 0;
443 unsigned int pvr = mfspr(SPRN_PVR); 443 unsigned int pvr = mfspr(SPRN_PVR);
@@ -467,4 +467,3 @@ static int __init init_power9_pmu(void)
467 467
468 return 0; 468 return 0;
469} 469}
470early_initcall(init_power9_pmu);
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 8b6a8a36fa38..1d3370914022 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -490,7 +490,7 @@ static struct power_pmu ppc970_pmu = {
490 .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING, 490 .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
491}; 491};
492 492
493static int __init init_ppc970_pmu(void) 493int init_ppc970_pmu(void)
494{ 494{
495 if (!cur_cpu_spec->oprofile_cpu_type || 495 if (!cur_cpu_spec->oprofile_cpu_type ||
496 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") 496 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
@@ -499,5 +499,3 @@ static int __init init_ppc970_pmu(void)
499 499
500 return register_power_pmu(&ppc970_pmu); 500 return register_power_pmu(&ppc970_pmu);
501} 501}
502
503early_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
index 5c31d8292d3b..e7c2c3fb011a 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -221,8 +221,10 @@ int mpc837x_usb_cfg(void)
221 int ret = 0; 221 int ret = 0;
222 222
223 np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); 223 np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
224 if (!np || !of_device_is_available(np)) 224 if (!np || !of_device_is_available(np)) {
225 of_node_put(np);
225 return -ENODEV; 226 return -ENODEV;
227 }
226 prop = of_get_property(np, "phy_type", NULL); 228 prop = of_get_property(np, "phy_type", NULL);
227 229
228 if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) { 230 if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d43ef3..e9617d35fd1f 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,10 +153,9 @@ int mpc8xx_pic_init(void)
153 if (mpc8xx_pic_host == NULL) { 153 if (mpc8xx_pic_host == NULL) {
154 printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); 154 printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
155 ret = -ENOMEM; 155 ret = -ENOMEM;
156 goto out;
157 } 156 }
158 return 0;
159 157
158 ret = 0;
160out: 159out:
161 of_node_put(np); 160 of_node_put(np);
162 return ret; 161 return ret;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 50cd09b4e05d..d0e172d47574 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -25,6 +25,8 @@ config PPC_BOOK3S_32
25 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" 25 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
26 select PPC_FPU 26 select PPC_FPU
27 select PPC_HAVE_PMU_SUPPORT 27 select PPC_HAVE_PMU_SUPPORT
28 select PPC_HAVE_KUEP
29 select PPC_HAVE_KUAP
28 30
29config PPC_85xx 31config PPC_85xx
30 bool "Freescale 85xx" 32 bool "Freescale 85xx"
@@ -34,6 +36,9 @@ config PPC_8xx
34 bool "Freescale 8xx" 36 bool "Freescale 8xx"
35 select FSL_SOC 37 select FSL_SOC
36 select SYS_SUPPORTS_HUGETLBFS 38 select SYS_SUPPORTS_HUGETLBFS
39 select PPC_HAVE_KUEP
40 select PPC_HAVE_KUAP
41 select PPC_MM_SLICES if HUGETLB_PAGE
37 42
38config 40x 43config 40x
39 bool "AMCC 40x" 44 bool "AMCC 40x"
@@ -75,6 +80,7 @@ config PPC_BOOK3S_64
75 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE 80 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
76 select ARCH_SUPPORTS_NUMA_BALANCING 81 select ARCH_SUPPORTS_NUMA_BALANCING
77 select IRQ_WORK 82 select IRQ_WORK
83 select PPC_MM_SLICES
78 84
79config PPC_BOOK3E_64 85config PPC_BOOK3E_64
80 bool "Embedded processors" 86 bool "Embedded processors"
@@ -326,6 +332,8 @@ config PPC_RADIX_MMU
326 bool "Radix MMU Support" 332 bool "Radix MMU Support"
327 depends on PPC_BOOK3S_64 && HUGETLB_PAGE 333 depends on PPC_BOOK3S_64 && HUGETLB_PAGE
328 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA 334 select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
335 select PPC_HAVE_KUEP
336 select PPC_HAVE_KUAP
329 default y 337 default y
330 help 338 help
331 Enable support for the Power ISA 3.0 Radix style MMU. Currently this 339 Enable support for the Power ISA 3.0 Radix style MMU. Currently this
@@ -345,6 +353,37 @@ config PPC_RADIX_MMU_DEFAULT
345 353
346 If you're unsure, say Y. 354 If you're unsure, say Y.
347 355
356config PPC_HAVE_KUEP
357 bool
358
359config PPC_KUEP
360 bool "Kernel Userspace Execution Prevention"
361 depends on PPC_HAVE_KUEP
362 default y
363 help
364 Enable support for Kernel Userspace Execution Prevention (KUEP)
365
366 If you're unsure, say Y.
367
368config PPC_HAVE_KUAP
369 bool
370
371config PPC_KUAP
372 bool "Kernel Userspace Access Protection"
373 depends on PPC_HAVE_KUAP
374 default y
375 help
376 Enable support for Kernel Userspace Access Protection (KUAP)
377
378 If you're unsure, say Y.
379
380config PPC_KUAP_DEBUG
381 bool "Extra debugging for Kernel Userspace Access Protection"
382 depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32)
383 help
384 Add extra debugging for Kernel Userspace Access Protection (KUAP)
385 If you're unsure, say N.
386
348config ARCH_ENABLE_HUGEPAGE_MIGRATION 387config ARCH_ENABLE_HUGEPAGE_MIGRATION
349 def_bool y 388 def_bool y
350 depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION 389 depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -354,14 +393,16 @@ config PPC_MMU_NOHASH
354 def_bool y 393 def_bool y
355 depends on !PPC_BOOK3S 394 depends on !PPC_BOOK3S
356 395
396config PPC_MMU_NOHASH_32
397 def_bool y
398 depends on PPC_MMU_NOHASH && PPC32
399
357config PPC_BOOK3E_MMU 400config PPC_BOOK3E_MMU
358 def_bool y 401 def_bool y
359 depends on FSL_BOOKE || PPC_BOOK3E 402 depends on FSL_BOOKE || PPC_BOOK3E
360 403
361config PPC_MM_SLICES 404config PPC_MM_SLICES
362 bool 405 bool
363 default y if PPC_BOOK3S_64
364 default y if PPC_8xx && HUGETLB_PAGE
365 406
366config PPC_HAVE_PMU_SUPPORT 407config PPC_HAVE_PMU_SUPPORT
367 bool 408 bool
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 7f12c7b78c0f..6646f152d57b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -194,7 +194,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
194 * faults need to be deferred to process context. 194 * faults need to be deferred to process context.
195 */ 195 */
196 if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) && 196 if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
197 (REGION_ID(ea) != USER_REGION_ID)) { 197 (get_region_id(ea) != USER_REGION_ID)) {
198 198
199 spin_unlock(&spu->register_lock); 199 spin_unlock(&spu->register_lock);
200 ret = hash_page(ea, 200 ret = hash_page(ea,
@@ -224,7 +224,7 @@ static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
224 unsigned long ea = (unsigned long)addr; 224 unsigned long ea = (unsigned long)addr;
225 u64 llp; 225 u64 llp;
226 226
227 if (REGION_ID(ea) == KERNEL_REGION_ID) 227 if (get_region_id(ea) == LINEAR_MAP_REGION_ID)
228 llp = mmu_psize_defs[mmu_linear_psize].sllp; 228 llp = mmu_psize_defs[mmu_linear_psize].sllp;
229 else 229 else
230 llp = mmu_psize_defs[mmu_virtual_psize].sllp; 230 llp = mmu_psize_defs[mmu_virtual_psize].sllp;
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 0409714e8070..829bf3697dc9 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -44,7 +44,8 @@
44 44
45#define HOLLY_PCI_CFG_PHYS 0x7c000000 45#define HOLLY_PCI_CFG_PHYS 0x7c000000
46 46
47int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn) 47static int holly_exclude_device(struct pci_controller *hose, u_char bus,
48 u_char devfn)
48{ 49{
49 if (bus == 0 && PCI_SLOT(devfn) == 0) 50 if (bus == 0 && PCI_SLOT(devfn) == 0)
50 return PCIBIOS_DEVICE_NOT_FOUND; 51 return PCIBIOS_DEVICE_NOT_FOUND;
@@ -187,13 +188,13 @@ static void __init holly_init_IRQ(void)
187 tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0); 188 tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
188} 189}
189 190
190void holly_show_cpuinfo(struct seq_file *m) 191static void holly_show_cpuinfo(struct seq_file *m)
191{ 192{
192 seq_printf(m, "vendor\t\t: IBM\n"); 193 seq_printf(m, "vendor\t\t: IBM\n");
193 seq_printf(m, "machine\t\t: PPC750 GX/CL\n"); 194 seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
194} 195}
195 196
196void __noreturn holly_restart(char *cmd) 197static void __noreturn holly_restart(char *cmd)
197{ 198{
198 __be32 __iomem *ocn_bar1 = NULL; 199 __be32 __iomem *ocn_bar1 = NULL;
199 unsigned long bar; 200 unsigned long bar;
@@ -233,18 +234,6 @@ void __noreturn holly_restart(char *cmd)
233 for (;;) ; 234 for (;;) ;
234} 235}
235 236
236void holly_power_off(void)
237{
238 local_irq_disable();
239 /* No way to shut power off with software */
240 for (;;) ;
241}
242
243void holly_halt(void)
244{
245 holly_power_off();
246}
247
248/* 237/*
249 * Called very early, device-tree isn't unflattened 238 * Called very early, device-tree isn't unflattened
250 */ 239 */
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 20ebf35d7913..f4247ade71ca 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -2,6 +2,12 @@
2CFLAGS_bootx_init.o += -fPIC 2CFLAGS_bootx_init.o += -fPIC
3CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector) 3CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector)
4 4
5KASAN_SANITIZE_bootx_init.o := n
6
7ifdef CONFIG_KASAN
8CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING
9endif
10
5ifdef CONFIG_FUNCTION_TRACER 11ifdef CONFIG_FUNCTION_TRACER
6# Do not trace early boot code 12# Do not trace early boot code
7CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE) 13CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index e52f9b06dd9c..c9133f7908ca 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -16,6 +16,7 @@
16#include <linux/device.h> 16#include <linux/device.h>
17#include <linux/cpu.h> 17#include <linux/cpu.h>
18 18
19#include <asm/asm-prototypes.h>
19#include <asm/firmware.h> 20#include <asm/firmware.h>
20#include <asm/machdep.h> 21#include <asm/machdep.h>
21#include <asm/opal.h> 22#include <asm/opal.h>
@@ -48,10 +49,10 @@ static u64 pnv_default_stop_mask;
48static bool default_stop_found; 49static bool default_stop_found;
49 50
50/* 51/*
51 * First deep stop state. Used to figure out when to save/restore 52 * First stop state levels when SPR and TB loss can occur.
52 * hypervisor context.
53 */ 53 */
54u64 pnv_first_deep_stop_state = MAX_STOP_STATE; 54static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
55static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
55 56
56/* 57/*
57 * psscr value and mask of the deepest stop idle state. 58 * psscr value and mask of the deepest stop idle state.
@@ -62,6 +63,8 @@ static u64 pnv_deepest_stop_psscr_mask;
62static u64 pnv_deepest_stop_flag; 63static u64 pnv_deepest_stop_flag;
63static bool deepest_stop_found; 64static bool deepest_stop_found;
64 65
66static unsigned long power7_offline_type;
67
65static int pnv_save_sprs_for_deep_states(void) 68static int pnv_save_sprs_for_deep_states(void)
66{ 69{
67 int cpu; 70 int cpu;
@@ -72,12 +75,12 @@ static int pnv_save_sprs_for_deep_states(void)
72 * all cpus at boot. Get these reg values of current cpu and use the 75 * all cpus at boot. Get these reg values of current cpu and use the
73 * same across all cpus. 76 * same across all cpus.
74 */ 77 */
75 uint64_t lpcr_val = mfspr(SPRN_LPCR); 78 uint64_t lpcr_val = mfspr(SPRN_LPCR);
76 uint64_t hid0_val = mfspr(SPRN_HID0); 79 uint64_t hid0_val = mfspr(SPRN_HID0);
77 uint64_t hid1_val = mfspr(SPRN_HID1); 80 uint64_t hid1_val = mfspr(SPRN_HID1);
78 uint64_t hid4_val = mfspr(SPRN_HID4); 81 uint64_t hid4_val = mfspr(SPRN_HID4);
79 uint64_t hid5_val = mfspr(SPRN_HID5); 82 uint64_t hid5_val = mfspr(SPRN_HID5);
80 uint64_t hmeer_val = mfspr(SPRN_HMEER); 83 uint64_t hmeer_val = mfspr(SPRN_HMEER);
81 uint64_t msr_val = MSR_IDLE; 84 uint64_t msr_val = MSR_IDLE;
82 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 85 uint64_t psscr_val = pnv_deepest_stop_psscr_val;
83 86
@@ -137,89 +140,6 @@ static int pnv_save_sprs_for_deep_states(void)
137 return 0; 140 return 0;
138} 141}
139 142
140static void pnv_alloc_idle_core_states(void)
141{
142 int i, j;
143 int nr_cores = cpu_nr_cores();
144 u32 *core_idle_state;
145
146 /*
147 * core_idle_state - The lower 8 bits track the idle state of
148 * each thread of the core.
149 *
150 * The most significant bit is the lock bit.
151 *
152 * Initially all the bits corresponding to threads_per_core
153 * are set. They are cleared when the thread enters deep idle
154 * state like sleep and winkle/stop.
155 *
156 * Initially the lock bit is cleared. The lock bit has 2
157 * purposes:
158 * a. While the first thread in the core waking up from
159 * idle is restoring core state, it prevents other
160 * threads in the core from switching to process
161 * context.
162 * b. While the last thread in the core is saving the
163 * core state, it prevents a different thread from
164 * waking up.
165 */
166 for (i = 0; i < nr_cores; i++) {
167 int first_cpu = i * threads_per_core;
168 int node = cpu_to_node(first_cpu);
169 size_t paca_ptr_array_size;
170
171 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
172 *core_idle_state = (1 << threads_per_core) - 1;
173 paca_ptr_array_size = (threads_per_core *
174 sizeof(struct paca_struct *));
175
176 for (j = 0; j < threads_per_core; j++) {
177 int cpu = first_cpu + j;
178
179 paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
180 paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
181 paca_ptrs[cpu]->thread_mask = 1 << j;
182 }
183 }
184
185 update_subcore_sibling_mask();
186
187 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
188 int rc = pnv_save_sprs_for_deep_states();
189
190 if (likely(!rc))
191 return;
192
193 /*
194 * The stop-api is unable to restore hypervisor
195 * resources on wakeup from platform idle states which
196 * lose full context. So disable such states.
197 */
198 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
199 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
200 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
201
202 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
203 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
204 /*
205 * Use the default stop state for CPU-Hotplug
206 * if available.
207 */
208 if (default_stop_found) {
209 pnv_deepest_stop_psscr_val =
210 pnv_default_stop_val;
211 pnv_deepest_stop_psscr_mask =
212 pnv_default_stop_mask;
213 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
214 pnv_deepest_stop_psscr_val);
215 } else { /* Fallback to snooze loop for CPU-Hotplug */
216 deepest_stop_found = false;
217 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
218 }
219 }
220 }
221}
222
223u32 pnv_get_supported_cpuidle_states(void) 143u32 pnv_get_supported_cpuidle_states(void)
224{ 144{
225 return supported_cpuidle_states; 145 return supported_cpuidle_states;
@@ -238,6 +158,9 @@ static void pnv_fastsleep_workaround_apply(void *info)
238 *err = 1; 158 *err = 1;
239} 159}
240 160
161static bool power7_fastsleep_workaround_entry = true;
162static bool power7_fastsleep_workaround_exit = true;
163
241/* 164/*
242 * Used to store fastsleep workaround state 165 * Used to store fastsleep workaround state
243 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 166 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
@@ -269,21 +192,15 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
269 * fastsleep_workaround_applyonce = 1 implies 192 * fastsleep_workaround_applyonce = 1 implies
270 * fastsleep workaround needs to be left in 'applied' state on all 193 * fastsleep workaround needs to be left in 'applied' state on all
271 * the cores. Do this by- 194 * the cores. Do this by-
272 * 1. Patching out the call to 'undo' workaround in fastsleep exit path 195 * 1. Disable the 'undo' workaround in fastsleep exit path
273 * 2. Sending ipi to all the cores which have at least one online thread 196 * 2. Sendi IPIs to all the cores which have at least one online thread
274 * 3. Patching out the call to 'apply' workaround in fastsleep entry 197 * 3. Disable the 'apply' workaround in fastsleep entry path
275 * path 198 *
276 * There is no need to send ipi to cores which have all threads 199 * There is no need to send ipi to cores which have all threads
277 * offlined, as last thread of the core entering fastsleep or deeper 200 * offlined, as last thread of the core entering fastsleep or deeper
278 * state would have applied workaround. 201 * state would have applied workaround.
279 */ 202 */
280 err = patch_instruction( 203 power7_fastsleep_workaround_exit = false;
281 (unsigned int *)pnv_fastsleep_workaround_at_exit,
282 PPC_INST_NOP);
283 if (err) {
284 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
285 goto fail;
286 }
287 204
288 get_online_cpus(); 205 get_online_cpus();
289 primary_thread_mask = cpu_online_cores_map(); 206 primary_thread_mask = cpu_online_cores_map();
@@ -296,13 +213,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
296 goto fail; 213 goto fail;
297 } 214 }
298 215
299 err = patch_instruction( 216 power7_fastsleep_workaround_entry = false;
300 (unsigned int *)pnv_fastsleep_workaround_at_entry,
301 PPC_INST_NOP);
302 if (err) {
303 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
304 goto fail;
305 }
306 217
307 fastsleep_workaround_applyonce = 1; 218 fastsleep_workaround_applyonce = 1;
308 219
@@ -315,27 +226,346 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
315 show_fastsleep_workaround_applyonce, 226 show_fastsleep_workaround_applyonce,
316 store_fastsleep_workaround_applyonce); 227 store_fastsleep_workaround_applyonce);
317 228
318static unsigned long __power7_idle_type(unsigned long type) 229static inline void atomic_start_thread_idle(void)
319{ 230{
231 int cpu = raw_smp_processor_id();
232 int first = cpu_first_thread_sibling(cpu);
233 int thread_nr = cpu_thread_in_core(cpu);
234 unsigned long *state = &paca_ptrs[first]->idle_state;
235
236 clear_bit(thread_nr, state);
237}
238
239static inline void atomic_stop_thread_idle(void)
240{
241 int cpu = raw_smp_processor_id();
242 int first = cpu_first_thread_sibling(cpu);
243 int thread_nr = cpu_thread_in_core(cpu);
244 unsigned long *state = &paca_ptrs[first]->idle_state;
245
246 set_bit(thread_nr, state);
247}
248
249static inline void atomic_lock_thread_idle(void)
250{
251 int cpu = raw_smp_processor_id();
252 int first = cpu_first_thread_sibling(cpu);
253 unsigned long *state = &paca_ptrs[first]->idle_state;
254
255 while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
256 barrier();
257}
258
259static inline void atomic_unlock_and_stop_thread_idle(void)
260{
261 int cpu = raw_smp_processor_id();
262 int first = cpu_first_thread_sibling(cpu);
263 unsigned long thread = 1UL << cpu_thread_in_core(cpu);
264 unsigned long *state = &paca_ptrs[first]->idle_state;
265 u64 s = READ_ONCE(*state);
266 u64 new, tmp;
267
268 BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
269 BUG_ON(s & thread);
270
271again:
272 new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
273 tmp = cmpxchg(state, s, new);
274 if (unlikely(tmp != s)) {
275 s = tmp;
276 goto again;
277 }
278}
279
280static inline void atomic_unlock_thread_idle(void)
281{
282 int cpu = raw_smp_processor_id();
283 int first = cpu_first_thread_sibling(cpu);
284 unsigned long *state = &paca_ptrs[first]->idle_state;
285
286 BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
287 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
288}
289
290/* P7 and P8 */
291struct p7_sprs {
292 /* per core */
293 u64 tscr;
294 u64 worc;
295
296 /* per subcore */
297 u64 sdr1;
298 u64 rpr;
299
300 /* per thread */
301 u64 lpcr;
302 u64 hfscr;
303 u64 fscr;
304 u64 purr;
305 u64 spurr;
306 u64 dscr;
307 u64 wort;
308
309 /* per thread SPRs that get lost in shallow states */
310 u64 amr;
311 u64 iamr;
312 u64 amor;
313 u64 uamor;
314};
315
316static unsigned long power7_idle_insn(unsigned long type)
317{
318 int cpu = raw_smp_processor_id();
319 int first = cpu_first_thread_sibling(cpu);
320 unsigned long *state = &paca_ptrs[first]->idle_state;
321 unsigned long thread = 1UL << cpu_thread_in_core(cpu);
322 unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
320 unsigned long srr1; 323 unsigned long srr1;
324 bool full_winkle;
325 struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
326 bool sprs_saved = false;
327 int rc;
321 328
322 if (!prep_irq_for_idle_irqsoff()) 329 if (unlikely(type != PNV_THREAD_NAP)) {
323 return 0; 330 atomic_lock_thread_idle();
331
332 BUG_ON(!(*state & thread));
333 *state &= ~thread;
334
335 if (power7_fastsleep_workaround_entry) {
336 if ((*state & core_thread_mask) == 0) {
337 rc = opal_config_cpu_idle_state(
338 OPAL_CONFIG_IDLE_FASTSLEEP,
339 OPAL_CONFIG_IDLE_APPLY);
340 BUG_ON(rc);
341 }
342 }
343
344 if (type == PNV_THREAD_WINKLE) {
345 sprs.tscr = mfspr(SPRN_TSCR);
346 sprs.worc = mfspr(SPRN_WORC);
347
348 sprs.sdr1 = mfspr(SPRN_SDR1);
349 sprs.rpr = mfspr(SPRN_RPR);
350
351 sprs.lpcr = mfspr(SPRN_LPCR);
352 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
353 sprs.hfscr = mfspr(SPRN_HFSCR);
354 sprs.fscr = mfspr(SPRN_FSCR);
355 }
356 sprs.purr = mfspr(SPRN_PURR);
357 sprs.spurr = mfspr(SPRN_SPURR);
358 sprs.dscr = mfspr(SPRN_DSCR);
359 sprs.wort = mfspr(SPRN_WORT);
360
361 sprs_saved = true;
362
363 /*
364 * Increment winkle counter and set all winkle bits if
365 * all threads are winkling. This allows wakeup side to
366 * distinguish between fast sleep and winkle state
367 * loss. Fast sleep still has to resync the timebase so
368 * this may not be a really big win.
369 */
370 *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
371 if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
372 >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
373 == threads_per_core)
374 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
375 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
376 }
377
378 atomic_unlock_thread_idle();
379 }
380
381 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
382 sprs.amr = mfspr(SPRN_AMR);
383 sprs.iamr = mfspr(SPRN_IAMR);
384 sprs.amor = mfspr(SPRN_AMOR);
385 sprs.uamor = mfspr(SPRN_UAMOR);
386 }
387
388 local_paca->thread_idle_state = type;
389 srr1 = isa206_idle_insn_mayloss(type); /* go idle */
390 local_paca->thread_idle_state = PNV_THREAD_RUNNING;
391
392 WARN_ON_ONCE(!srr1);
393 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
394
395 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
396 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
397 /*
398 * We don't need an isync after the mtsprs here because
399 * the upcoming mtmsrd is execution synchronizing.
400 */
401 mtspr(SPRN_AMR, sprs.amr);
402 mtspr(SPRN_IAMR, sprs.iamr);
403 mtspr(SPRN_AMOR, sprs.amor);
404 mtspr(SPRN_UAMOR, sprs.uamor);
405 }
406 }
407
408 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
409 hmi_exception_realmode(NULL);
410
411 if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
412 if (unlikely(type != PNV_THREAD_NAP)) {
413 atomic_lock_thread_idle();
414 if (type == PNV_THREAD_WINKLE) {
415 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
416 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
417 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
418 }
419 atomic_unlock_and_stop_thread_idle();
420 }
421 return srr1;
422 }
423
424 /* HV state loss */
425 BUG_ON(type == PNV_THREAD_NAP);
426
427 atomic_lock_thread_idle();
428
429 full_winkle = false;
430 if (type == PNV_THREAD_WINKLE) {
431 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
432 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
433 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
434 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
435 full_winkle = true;
436 BUG_ON(!sprs_saved);
437 }
438 }
439
440 WARN_ON(*state & thread);
441
442 if ((*state & core_thread_mask) != 0)
443 goto core_woken;
444
445 /* Per-core SPRs */
446 if (full_winkle) {
447 mtspr(SPRN_TSCR, sprs.tscr);
448 mtspr(SPRN_WORC, sprs.worc);
449 }
450
451 if (power7_fastsleep_workaround_exit) {
452 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
453 OPAL_CONFIG_IDLE_UNDO);
454 BUG_ON(rc);
455 }
456
457 /* TB */
458 if (opal_resync_timebase() != OPAL_SUCCESS)
459 BUG();
460
461core_woken:
462 if (!full_winkle)
463 goto subcore_woken;
464
465 if ((*state & local_paca->subcore_sibling_mask) != 0)
466 goto subcore_woken;
467
468 /* Per-subcore SPRs */
469 mtspr(SPRN_SDR1, sprs.sdr1);
470 mtspr(SPRN_RPR, sprs.rpr);
471
472subcore_woken:
473 /*
474 * isync after restoring shared SPRs and before unlocking. Unlock
475 * only contains hwsync which does not necessarily do the right
476 * thing for SPRs.
477 */
478 isync();
479 atomic_unlock_and_stop_thread_idle();
480
481 /* Fast sleep does not lose SPRs */
482 if (!full_winkle)
483 return srr1;
484
485 /* Per-thread SPRs */
486 mtspr(SPRN_LPCR, sprs.lpcr);
487 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
488 mtspr(SPRN_HFSCR, sprs.hfscr);
489 mtspr(SPRN_FSCR, sprs.fscr);
490 }
491 mtspr(SPRN_PURR, sprs.purr);
492 mtspr(SPRN_SPURR, sprs.spurr);
493 mtspr(SPRN_DSCR, sprs.dscr);
494 mtspr(SPRN_WORT, sprs.wort);
495
496 mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
497
498 /*
499 * The SLB has to be restored here, but it sometimes still
500 * contains entries, so the __ variant must be used to prevent
501 * multi hits.
502 */
503 __slb_restore_bolted_realmode();
504
505 return srr1;
506}
507
508extern unsigned long idle_kvm_start_guest(unsigned long srr1);
509
510#ifdef CONFIG_HOTPLUG_CPU
511static unsigned long power7_offline(void)
512{
513 unsigned long srr1;
514
515 mtmsr(MSR_IDLE);
516
517#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
518 /* Tell KVM we're entering idle. */
519 /******************************************************/
520 /* N O T E W E L L ! ! ! N O T E W E L L */
521 /* The following store to HSTATE_HWTHREAD_STATE(r13) */
522 /* MUST occur in real mode, i.e. with the MMU off, */
523 /* and the MMU must stay off until we clear this flag */
524 /* and test HSTATE_HWTHREAD_REQ(r13) in */
525 /* pnv_powersave_wakeup in this file. */
526 /* The reason is that another thread can switch the */
527 /* MMU to a guest context whenever this flag is set */
528 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
529 /* that would potentially cause this thread to start */
530 /* executing instructions from guest memory in */
531 /* hypervisor mode, leading to a host crash or data */
532 /* corruption, or worse. */
533 /******************************************************/
534 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
535#endif
324 536
325 __ppc64_runlatch_off(); 537 __ppc64_runlatch_off();
326 srr1 = power7_idle_insn(type); 538 srr1 = power7_idle_insn(power7_offline_type);
327 __ppc64_runlatch_on(); 539 __ppc64_runlatch_on();
328 540
329 fini_irq_for_idle_irqsoff(); 541#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
542 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
543 /* Order setting hwthread_state vs. testing hwthread_req */
544 smp_mb();
545 if (local_paca->kvm_hstate.hwthread_req)
546 srr1 = idle_kvm_start_guest(srr1);
547#endif
548
549 mtmsr(MSR_KERNEL);
330 550
331 return srr1; 551 return srr1;
332} 552}
553#endif
333 554
334void power7_idle_type(unsigned long type) 555void power7_idle_type(unsigned long type)
335{ 556{
336 unsigned long srr1; 557 unsigned long srr1;
337 558
338 srr1 = __power7_idle_type(type); 559 if (!prep_irq_for_idle_irqsoff())
560 return;
561
562 mtmsr(MSR_IDLE);
563 __ppc64_runlatch_off();
564 srr1 = power7_idle_insn(type);
565 __ppc64_runlatch_on();
566 mtmsr(MSR_KERNEL);
567
568 fini_irq_for_idle_irqsoff();
339 irq_set_pending_from_srr1(srr1); 569 irq_set_pending_from_srr1(srr1);
340} 570}
341 571
@@ -347,33 +577,292 @@ void power7_idle(void)
347 power7_idle_type(PNV_THREAD_NAP); 577 power7_idle_type(PNV_THREAD_NAP);
348} 578}
349 579
350static unsigned long __power9_idle_type(unsigned long stop_psscr_val, 580struct p9_sprs {
351 unsigned long stop_psscr_mask) 581 /* per core */
582 u64 ptcr;
583 u64 rpr;
584 u64 tscr;
585 u64 ldbar;
586
587 /* per thread */
588 u64 lpcr;
589 u64 hfscr;
590 u64 fscr;
591 u64 pid;
592 u64 purr;
593 u64 spurr;
594 u64 dscr;
595 u64 wort;
596
597 u64 mmcra;
598 u32 mmcr0;
599 u32 mmcr1;
600 u64 mmcr2;
601
602 /* per thread SPRs that get lost in shallow states */
603 u64 amr;
604 u64 iamr;
605 u64 amor;
606 u64 uamor;
607};
608
609static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
352{ 610{
353 unsigned long psscr; 611 int cpu = raw_smp_processor_id();
612 int first = cpu_first_thread_sibling(cpu);
613 unsigned long *state = &paca_ptrs[first]->idle_state;
614 unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
354 unsigned long srr1; 615 unsigned long srr1;
616 unsigned long pls;
617 unsigned long mmcr0 = 0;
618 struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
619 bool sprs_saved = false;
355 620
356 if (!prep_irq_for_idle_irqsoff()) 621 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
357 return 0; 622 /* EC=ESL=0 case */
623
624 BUG_ON(!mmu_on);
625
626 /*
627 * Wake synchronously. SRESET via xscom may still cause
628 * a 0x100 powersave wakeup with SRR1 reason!
629 */
630 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
631 if (likely(!srr1))
632 return 0;
633
634 /*
635 * Registers not saved, can't recover!
636 * This would be a hardware bug
637 */
638 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
639
640 goto out;
641 }
642
643 /* EC=ESL=1 case */
644#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
645 if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
646 local_paca->requested_psscr = psscr;
647 /* order setting requested_psscr vs testing dont_stop */
648 smp_mb();
649 if (atomic_read(&local_paca->dont_stop)) {
650 local_paca->requested_psscr = 0;
651 return 0;
652 }
653 }
654#endif
655
656 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
657 /*
658 * POWER9 DD2 can incorrectly set PMAO when waking up
659 * after a state-loss idle. Saving and restoring MMCR0
660 * over idle is a workaround.
661 */
662 mmcr0 = mfspr(SPRN_MMCR0);
663 }
664 if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) {
665 sprs.lpcr = mfspr(SPRN_LPCR);
666 sprs.hfscr = mfspr(SPRN_HFSCR);
667 sprs.fscr = mfspr(SPRN_FSCR);
668 sprs.pid = mfspr(SPRN_PID);
669 sprs.purr = mfspr(SPRN_PURR);
670 sprs.spurr = mfspr(SPRN_SPURR);
671 sprs.dscr = mfspr(SPRN_DSCR);
672 sprs.wort = mfspr(SPRN_WORT);
673
674 sprs.mmcra = mfspr(SPRN_MMCRA);
675 sprs.mmcr0 = mfspr(SPRN_MMCR0);
676 sprs.mmcr1 = mfspr(SPRN_MMCR1);
677 sprs.mmcr2 = mfspr(SPRN_MMCR2);
678
679 sprs.ptcr = mfspr(SPRN_PTCR);
680 sprs.rpr = mfspr(SPRN_RPR);
681 sprs.tscr = mfspr(SPRN_TSCR);
682 sprs.ldbar = mfspr(SPRN_LDBAR);
683
684 sprs_saved = true;
685
686 atomic_start_thread_idle();
687 }
688
689 sprs.amr = mfspr(SPRN_AMR);
690 sprs.iamr = mfspr(SPRN_IAMR);
691 sprs.amor = mfspr(SPRN_AMOR);
692 sprs.uamor = mfspr(SPRN_UAMOR);
693
694 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
695
696#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
697 local_paca->requested_psscr = 0;
698#endif
358 699
359 psscr = mfspr(SPRN_PSSCR); 700 psscr = mfspr(SPRN_PSSCR);
360 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
361 701
702 WARN_ON_ONCE(!srr1);
703 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
704
705 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
706 unsigned long mmcra;
707
708 /*
709 * We don't need an isync after the mtsprs here because the
710 * upcoming mtmsrd is execution synchronizing.
711 */
712 mtspr(SPRN_AMR, sprs.amr);
713 mtspr(SPRN_IAMR, sprs.iamr);
714 mtspr(SPRN_AMOR, sprs.amor);
715 mtspr(SPRN_UAMOR, sprs.uamor);
716
717 /*
718 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
719 * might have been corrupted and needs flushing. We also need
720 * to reload MMCR0 (see mmcr0 comment above).
721 */
722 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
723 asm volatile(PPC_INVALIDATE_ERAT);
724 mtspr(SPRN_MMCR0, mmcr0);
725 }
726
727 /*
728 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
729 * to ensure the PMU starts running.
730 */
731 mmcra = mfspr(SPRN_MMCRA);
732 mmcra |= PPC_BIT(60);
733 mtspr(SPRN_MMCRA, mmcra);
734 mmcra &= ~PPC_BIT(60);
735 mtspr(SPRN_MMCRA, mmcra);
736 }
737
738 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
739 hmi_exception_realmode(NULL);
740
741 /*
742 * On POWER9, SRR1 bits do not match exactly as expected.
743 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
744 * just always test PSSCR for SPR/TB state loss.
745 */
746 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
747 if (likely(pls < pnv_first_spr_loss_level)) {
748 if (sprs_saved)
749 atomic_stop_thread_idle();
750 goto out;
751 }
752
753 /* HV state loss */
754 BUG_ON(!sprs_saved);
755
756 atomic_lock_thread_idle();
757
758 if ((*state & core_thread_mask) != 0)
759 goto core_woken;
760
761 /* Per-core SPRs */
762 mtspr(SPRN_PTCR, sprs.ptcr);
763 mtspr(SPRN_RPR, sprs.rpr);
764 mtspr(SPRN_TSCR, sprs.tscr);
765 mtspr(SPRN_LDBAR, sprs.ldbar);
766
767 if (pls >= pnv_first_tb_loss_level) {
768 /* TB loss */
769 if (opal_resync_timebase() != OPAL_SUCCESS)
770 BUG();
771 }
772
773 /*
774 * isync after restoring shared SPRs and before unlocking. Unlock
775 * only contains hwsync which does not necessarily do the right
776 * thing for SPRs.
777 */
778 isync();
779
780core_woken:
781 atomic_unlock_and_stop_thread_idle();
782
783 /* Per-thread SPRs */
784 mtspr(SPRN_LPCR, sprs.lpcr);
785 mtspr(SPRN_HFSCR, sprs.hfscr);
786 mtspr(SPRN_FSCR, sprs.fscr);
787 mtspr(SPRN_PID, sprs.pid);
788 mtspr(SPRN_PURR, sprs.purr);
789 mtspr(SPRN_SPURR, sprs.spurr);
790 mtspr(SPRN_DSCR, sprs.dscr);
791 mtspr(SPRN_WORT, sprs.wort);
792
793 mtspr(SPRN_MMCRA, sprs.mmcra);
794 mtspr(SPRN_MMCR0, sprs.mmcr0);
795 mtspr(SPRN_MMCR1, sprs.mmcr1);
796 mtspr(SPRN_MMCR2, sprs.mmcr2);
797
798 mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
799
800 if (!radix_enabled())
801 __slb_restore_bolted_realmode();
802
803out:
804 if (mmu_on)
805 mtmsr(MSR_KERNEL);
806
807 return srr1;
808}
809
810#ifdef CONFIG_HOTPLUG_CPU
811static unsigned long power9_offline_stop(unsigned long psscr)
812{
813 unsigned long srr1;
814
815#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
362 __ppc64_runlatch_off(); 816 __ppc64_runlatch_off();
363 srr1 = power9_idle_stop(psscr); 817 srr1 = power9_idle_stop(psscr, true);
364 __ppc64_runlatch_on(); 818 __ppc64_runlatch_on();
819#else
820 /*
821 * Tell KVM we're entering idle.
822 * This does not have to be done in real mode because the P9 MMU
823 * is independent per-thread. Some steppings share radix/hash mode
824 * between threads, but in that case KVM has a barrier sync in real
825 * mode before and after switching between radix and hash.
826 *
827 * kvm_start_guest must still be called in real mode though, hence
828 * the false argument.
829 */
830 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
365 831
366 fini_irq_for_idle_irqsoff(); 832 __ppc64_runlatch_off();
833 srr1 = power9_idle_stop(psscr, false);
834 __ppc64_runlatch_on();
835
836 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
837 /* Order setting hwthread_state vs. testing hwthread_req */
838 smp_mb();
839 if (local_paca->kvm_hstate.hwthread_req)
840 srr1 = idle_kvm_start_guest(srr1);
841 mtmsr(MSR_KERNEL);
842#endif
367 843
368 return srr1; 844 return srr1;
369} 845}
846#endif
370 847
371void power9_idle_type(unsigned long stop_psscr_val, 848void power9_idle_type(unsigned long stop_psscr_val,
372 unsigned long stop_psscr_mask) 849 unsigned long stop_psscr_mask)
373{ 850{
851 unsigned long psscr;
374 unsigned long srr1; 852 unsigned long srr1;
375 853
376 srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask); 854 if (!prep_irq_for_idle_irqsoff())
855 return;
856
857 psscr = mfspr(SPRN_PSSCR);
858 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
859
860 __ppc64_runlatch_off();
861 srr1 = power9_idle_stop(psscr, true);
862 __ppc64_runlatch_on();
863
864 fini_irq_for_idle_irqsoff();
865
377 irq_set_pending_from_srr1(srr1); 866 irq_set_pending_from_srr1(srr1);
378} 867}
379 868
@@ -409,7 +898,7 @@ void pnv_power9_force_smt4_catch(void)
409 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 898 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
410 } 899 }
411 /* order setting dont_stop vs testing requested_psscr */ 900 /* order setting dont_stop vs testing requested_psscr */
412 mb(); 901 smp_mb();
413 for (thr = 0; thr < threads_per_core; ++thr) { 902 for (thr = 0; thr < threads_per_core; ++thr) {
414 if (!paca_ptrs[cpu0+thr]->requested_psscr) 903 if (!paca_ptrs[cpu0+thr]->requested_psscr)
415 ++awake_threads; 904 ++awake_threads;
@@ -481,7 +970,6 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
481unsigned long pnv_cpu_offline(unsigned int cpu) 970unsigned long pnv_cpu_offline(unsigned int cpu)
482{ 971{
483 unsigned long srr1; 972 unsigned long srr1;
484 u32 idle_states = pnv_get_supported_cpuidle_states();
485 973
486 __ppc64_runlatch_off(); 974 __ppc64_runlatch_off();
487 975
@@ -492,15 +980,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
492 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 980 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
493 pnv_deepest_stop_psscr_val; 981 pnv_deepest_stop_psscr_val;
494 srr1 = power9_offline_stop(psscr); 982 srr1 = power9_offline_stop(psscr);
495 983 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
496 } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && 984 srr1 = power7_offline();
497 (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
498 srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
499 } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
500 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
501 srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
502 } else if (idle_states & OPAL_PM_NAP_ENABLED) {
503 srr1 = power7_idle_insn(PNV_THREAD_NAP);
504 } else { 985 } else {
505 /* This is the fallback method. We emulate snooze */ 986 /* This is the fallback method. We emulate snooze */
506 while (!generic_check_cpu_restart(cpu)) { 987 while (!generic_check_cpu_restart(cpu)) {
@@ -596,33 +1077,44 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
596 * @dt_idle_states: Number of idle state entries 1077 * @dt_idle_states: Number of idle state entries
597 * Returns 0 on success 1078 * Returns 0 on success
598 */ 1079 */
599static int __init pnv_power9_idle_init(void) 1080static void __init pnv_power9_idle_init(void)
600{ 1081{
601 u64 max_residency_ns = 0; 1082 u64 max_residency_ns = 0;
602 int i; 1083 int i;
603 1084
604 /* 1085 /*
605 * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
606 * and the pnv_default_stop_{val,mask}.
607 *
608 * pnv_first_deep_stop_state should be set to the first stop
609 * level to cause hypervisor state loss.
610 *
611 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 1086 * pnv_deepest_stop_{val,mask} should be set to values corresponding to
612 * the deepest stop state. 1087 * the deepest stop state.
613 * 1088 *
614 * pnv_default_stop_{val,mask} should be set to values corresponding to 1089 * pnv_default_stop_{val,mask} should be set to values corresponding to
615 * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. 1090 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
616 */ 1091 */
617 pnv_first_deep_stop_state = MAX_STOP_STATE; 1092 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
1093 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
618 for (i = 0; i < nr_pnv_idle_states; i++) { 1094 for (i = 0; i < nr_pnv_idle_states; i++) {
619 int err; 1095 int err;
620 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1096 struct pnv_idle_states_t *state = &pnv_idle_states[i];
621 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1097 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
622 1098
1099 if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1100 (pnv_first_tb_loss_level > psscr_rl))
1101 pnv_first_tb_loss_level = psscr_rl;
1102
623 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1103 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
624 pnv_first_deep_stop_state > psscr_rl) 1104 (pnv_first_spr_loss_level > psscr_rl))
625 pnv_first_deep_stop_state = psscr_rl; 1105 pnv_first_spr_loss_level = psscr_rl;
1106
1107 /*
1108 * The idle code does not deal with TB loss occurring
1109 * in a shallower state than SPR loss, so force it to
1110 * behave like SPRs are lost if TB is lost. POWER9 would
1111 * never encouter this, but a POWER8 core would if it
1112 * implemented the stop instruction. So this is for forward
1113 * compatibility.
1114 */
1115 if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1116 (pnv_first_spr_loss_level > psscr_rl))
1117 pnv_first_spr_loss_level = psscr_rl;
626 1118
627 err = validate_psscr_val_mask(&state->psscr_val, 1119 err = validate_psscr_val_mask(&state->psscr_val,
628 &state->psscr_mask, 1120 &state->psscr_mask,
@@ -647,6 +1139,7 @@ static int __init pnv_power9_idle_init(void)
647 pnv_default_stop_val = state->psscr_val; 1139 pnv_default_stop_val = state->psscr_val;
648 pnv_default_stop_mask = state->psscr_mask; 1140 pnv_default_stop_mask = state->psscr_mask;
649 default_stop_found = true; 1141 default_stop_found = true;
1142 WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
650 } 1143 }
651 } 1144 }
652 1145
@@ -666,10 +1159,40 @@ static int __init pnv_power9_idle_init(void)
666 pnv_deepest_stop_psscr_mask); 1159 pnv_deepest_stop_psscr_mask);
667 } 1160 }
668 1161
669 pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", 1162 pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n",
670 pnv_first_deep_stop_state); 1163 pnv_first_spr_loss_level);
671 1164
672 return 0; 1165 pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n",
1166 pnv_first_tb_loss_level);
1167}
1168
1169static void __init pnv_disable_deep_states(void)
1170{
1171 /*
1172 * The stop-api is unable to restore hypervisor
1173 * resources on wakeup from platform idle states which
1174 * lose full context. So disable such states.
1175 */
1176 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
1177 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
1178 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
1179
1180 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1181 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
1182 /*
1183 * Use the default stop state for CPU-Hotplug
1184 * if available.
1185 */
1186 if (default_stop_found) {
1187 pnv_deepest_stop_psscr_val = pnv_default_stop_val;
1188 pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
1189 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
1190 pnv_deepest_stop_psscr_val);
1191 } else { /* Fallback to snooze loop for CPU-Hotplug */
1192 deepest_stop_found = false;
1193 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
1194 }
1195 }
673} 1196}
674 1197
675/* 1198/*
@@ -684,10 +1207,8 @@ static void __init pnv_probe_idle_states(void)
684 return; 1207 return;
685 } 1208 }
686 1209
687 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1210 if (cpu_has_feature(CPU_FTR_ARCH_300))
688 if (pnv_power9_idle_init()) 1211 pnv_power9_idle_init();
689 return;
690 }
691 1212
692 for (i = 0; i < nr_pnv_idle_states; i++) 1213 for (i = 0; i < nr_pnv_idle_states; i++)
693 supported_cpuidle_states |= pnv_idle_states[i].flags; 1214 supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -807,11 +1328,33 @@ out:
807 1328
808static int __init pnv_init_idle_states(void) 1329static int __init pnv_init_idle_states(void)
809{ 1330{
1331 int cpu;
810 int rc = 0; 1332 int rc = 0;
811 supported_cpuidle_states = 0; 1333
1334 /* Set up PACA fields */
1335 for_each_present_cpu(cpu) {
1336 struct paca_struct *p = paca_ptrs[cpu];
1337
1338 p->idle_state = 0;
1339 if (cpu == cpu_first_thread_sibling(cpu))
1340 p->idle_state = (1 << threads_per_core) - 1;
1341
1342 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1343 /* P7/P8 nap */
1344 p->thread_idle_state = PNV_THREAD_RUNNING;
1345 } else {
1346 /* P9 stop */
1347#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1348 p->requested_psscr = 0;
1349 atomic_set(&p->dont_stop, 0);
1350#endif
1351 }
1352 }
812 1353
813 /* In case we error out nr_pnv_idle_states will be zero */ 1354 /* In case we error out nr_pnv_idle_states will be zero */
814 nr_pnv_idle_states = 0; 1355 nr_pnv_idle_states = 0;
1356 supported_cpuidle_states = 0;
1357
815 if (cpuidle_disable != IDLE_NO_OVERRIDE) 1358 if (cpuidle_disable != IDLE_NO_OVERRIDE)
816 goto out; 1359 goto out;
817 rc = pnv_parse_cpuidle_dt(); 1360 rc = pnv_parse_cpuidle_dt();
@@ -819,27 +1362,40 @@ static int __init pnv_init_idle_states(void)
819 return rc; 1362 return rc;
820 pnv_probe_idle_states(); 1363 pnv_probe_idle_states();
821 1364
822 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 1365 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
823 patch_instruction( 1366 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
824 (unsigned int *)pnv_fastsleep_workaround_at_entry, 1367 power7_fastsleep_workaround_entry = false;
825 PPC_INST_NOP); 1368 power7_fastsleep_workaround_exit = false;
826 patch_instruction( 1369 } else {
827 (unsigned int *)pnv_fastsleep_workaround_at_exit, 1370 /*
828 PPC_INST_NOP); 1371 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
829 } else { 1372 * workaround is needed to use fastsleep. Provide sysfs
830 /* 1373 * control to choose how this workaround has to be
831 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 1374 * applied.
832 * workaround is needed to use fastsleep. Provide sysfs 1375 */
833 * control to choose how this workaround has to be applied. 1376 device_create_file(cpu_subsys.dev_root,
834 */
835 device_create_file(cpu_subsys.dev_root,
836 &dev_attr_fastsleep_workaround_applyonce); 1377 &dev_attr_fastsleep_workaround_applyonce);
837 } 1378 }
1379
1380 update_subcore_sibling_mask();
1381
1382 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
1383 ppc_md.power_save = power7_idle;
1384 power7_offline_type = PNV_THREAD_NAP;
1385 }
838 1386
839 pnv_alloc_idle_core_states(); 1387 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
1388 (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
1389 power7_offline_type = PNV_THREAD_WINKLE;
1390 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
1391 (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
1392 power7_offline_type = PNV_THREAD_SLEEP;
1393 }
840 1394
841 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) 1395 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
842 ppc_md.power_save = power7_idle; 1396 if (pnv_save_sprs_for_deep_states())
1397 pnv_disable_deep_states();
1398 }
843 1399
844out: 1400out:
845 return 0; 1401 return 0;
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index daad8c45c8e7..36c8fa3647a2 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -121,6 +121,8 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
121 121
122#define OPAL_CALL(name, opcode) \ 122#define OPAL_CALL(name, opcode) \
123int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ 123int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
124 int64_t a4, int64_t a5, int64_t a6, int64_t a7); \
125int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
124 int64_t a4, int64_t a5, int64_t a6, int64_t a7) \ 126 int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
125{ \ 127{ \
126 return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \ 128 return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
@@ -218,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
218OPAL_CALL(opal_get_param, OPAL_GET_PARAM); 220OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
219OPAL_CALL(opal_set_param, OPAL_SET_PARAM); 221OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
220OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); 222OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
223OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2);
221OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); 224OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
222OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); 225OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
223OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); 226OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
@@ -260,6 +263,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
260OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); 263OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
261OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); 264OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
262OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); 265OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
266OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE);
267OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
268OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
263OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); 269OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
264OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); 270OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
265OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); 271OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 58a07948c76e..3e497b91d210 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
127 nr_chips)) 127 nr_chips))
128 goto error; 128 goto error;
129 129
130 pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), 130 pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info),
131 GFP_KERNEL); 131 GFP_KERNEL);
132 if (!pmu_ptr->mem_info) 132 if (!pmu_ptr->mem_info)
133 goto error; 133 goto error;
@@ -284,6 +284,9 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
284 case IMC_TYPE_THREAD: 284 case IMC_TYPE_THREAD:
285 domain = IMC_DOMAIN_THREAD; 285 domain = IMC_DOMAIN_THREAD;
286 break; 286 break;
287 case IMC_TYPE_TRACE:
288 domain = IMC_DOMAIN_TRACE;
289 break;
287 default: 290 default:
288 pr_warn("IMC Unknown Device type \n"); 291 pr_warn("IMC Unknown Device type \n");
289 domain = -1; 292 domain = -1;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2b0eca104f86..f2b063b027f0 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -505,7 +505,7 @@ static int opal_recover_mce(struct pt_regs *regs,
505 recovered = 0; 505 recovered = 0;
506 } 506 }
507 507
508 if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) { 508 if (!recovered && evt->sync_error) {
509 /* 509 /*
510 * Try to kill processes if we get a synchronous machine check 510 * Try to kill processes if we get a synchronous machine check
511 * (e.g., one caused by execution of this instruction). This 511 * (e.g., one caused by execution of this instruction). This
@@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs)
614 return 0; 614 return 0;
615} 615}
616 616
617int opal_hmi_exception_early2(struct pt_regs *regs)
618{
619 s64 rc;
620 __be64 out_flags;
621
622 /*
623 * call opal hmi handler.
624 * Check 64-bit flag mask to find out if an event was generated,
625 * and whether TB is still valid or not etc.
626 */
627 rc = opal_handle_hmi2(&out_flags);
628 if (rc != OPAL_SUCCESS)
629 return 0;
630
631 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
632 local_paca->hmi_event_available = 1;
633 if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
634 tb_invalid = true;
635 return 1;
636}
637
617/* HMI exception handler called in virtual mode during check_irq_replay. */ 638/* HMI exception handler called in virtual mode during check_irq_replay. */
618int opal_handle_hmi_exception(struct pt_regs *regs) 639int opal_handle_hmi_exception(struct pt_regs *regs)
619{ 640{
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3ead4c237ed0..126602b4e399 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -847,11 +847,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
847 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 847 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
848 pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); 848 pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
849 if (rc) 849 if (rc)
850 pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc); 850 pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
851 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 851 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
852 bcomp, dcomp, fcomp, OPAL_UNMAP_PE); 852 bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
853 if (rc) 853 if (rc)
854 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 854 pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc);
855 855
856 pe->pbus = NULL; 856 pe->pbus = NULL;
857 pe->pdev = NULL; 857 pe->pdev = NULL;
@@ -1174,11 +1174,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
1174 pe->rid = bus->busn_res.start << 8; 1174 pe->rid = bus->busn_res.start << 8;
1175 1175
1176 if (all) 1176 if (all)
1177 pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n", 1177 pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n",
1178 bus->busn_res.start, bus->busn_res.end, pe->pe_number); 1178 &bus->busn_res.start, &bus->busn_res.end,
1179 pe->pe_number);
1179 else 1180 else
1180 pe_info(pe, "Secondary bus %d associated with PE#%x\n", 1181 pe_info(pe, "Secondary bus %pad associated with PE#%x\n",
1181 bus->busn_res.start, pe->pe_number); 1182 &bus->busn_res.start, pe->pe_number);
1182 1183
1183 if (pnv_ioda_configure_pe(phb, pe)) { 1184 if (pnv_ioda_configure_pe(phb, pe)) {
1184 /* XXX What do we do here ? */ 1185 /* XXX What do we do here ? */
@@ -1448,7 +1449,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
1448 tbl = pe->table_group.tables[0]; 1449 tbl = pe->table_group.tables[0];
1449 rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); 1450 rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
1450 if (rc) 1451 if (rc)
1451 pe_warn(pe, "OPAL error %ld release DMA window\n", rc); 1452 pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
1452 1453
1453 pnv_pci_ioda2_set_bypass(pe, false); 1454 pnv_pci_ioda2_set_bypass(pe, false);
1454 if (pe->table_group.group) { 1455 if (pe->table_group.group) {
@@ -1836,7 +1837,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
1836 struct pnv_ioda_pe *pe; 1837 struct pnv_ioda_pe *pe;
1837 1838
1838 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 1839 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
1839 return -ENODEV; 1840 return false;
1840 1841
1841 pe = &phb->ioda.pe_array[pdn->pe_number]; 1842 pe = &phb->ioda.pe_array[pdn->pe_number];
1842 if (pe->tce_bypass_enabled) { 1843 if (pe->tce_bypass_enabled) {
@@ -1859,7 +1860,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
1859 /* Configure the bypass mode */ 1860 /* Configure the bypass mode */
1860 s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe); 1861 s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
1861 if (rc) 1862 if (rc)
1862 return rc; 1863 return false;
1863 /* 4GB offset bypasses 32-bit space */ 1864 /* 4GB offset bypasses 32-bit space */
1864 pdev->dev.archdata.dma_offset = (1ULL << 32); 1865 pdev->dev.archdata.dma_offset = (1ULL << 32);
1865 return true; 1866 return true;
@@ -2286,8 +2287,8 @@ found:
2286 __pa(addr) + tce32_segsz * i, 2287 __pa(addr) + tce32_segsz * i,
2287 tce32_segsz, IOMMU_PAGE_SIZE_4K); 2288 tce32_segsz, IOMMU_PAGE_SIZE_4K);
2288 if (rc) { 2289 if (rc) {
2289 pe_err(pe, " Failed to configure 32-bit TCE table," 2290 pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n",
2290 " err %ld\n", rc); 2291 rc);
2291 goto fail; 2292 goto fail;
2292 } 2293 }
2293 } 2294 }
@@ -2332,9 +2333,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
2332 const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; 2333 const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
2333 const __u64 win_size = tbl->it_size << tbl->it_page_shift; 2334 const __u64 win_size = tbl->it_size << tbl->it_page_shift;
2334 2335
2335 pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num, 2336 pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n",
2336 start_addr, start_addr + win_size - 1, 2337 num, start_addr, start_addr + win_size - 1,
2337 IOMMU_PAGE_SIZE(tbl)); 2338 IOMMU_PAGE_SIZE(tbl));
2338 2339
2339 /* 2340 /*
2340 * Map TCE table through TVT. The TVE index is the PE number 2341 * Map TCE table through TVT. The TVE index is the PE number
@@ -2348,7 +2349,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
2348 size << 3, 2349 size << 3,
2349 IOMMU_PAGE_SIZE(tbl)); 2350 IOMMU_PAGE_SIZE(tbl));
2350 if (rc) { 2351 if (rc) {
2351 pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); 2352 pe_err(pe, "Failed to configure TCE table, err %lld\n", rc);
2352 return rc; 2353 return rc;
2353 } 2354 }
2354 2355
@@ -3450,7 +3451,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
3450#ifdef CONFIG_IOMMU_API 3451#ifdef CONFIG_IOMMU_API
3451 rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); 3452 rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
3452 if (rc) 3453 if (rc)
3453 pe_warn(pe, "OPAL error %ld release DMA window\n", rc); 3454 pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
3454#endif 3455#endif
3455 3456
3456 pnv_pci_ioda2_set_bypass(pe, false); 3457 pnv_pci_ioda2_set_bypass(pe, false);
@@ -3484,7 +3485,7 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
3484 phb->ioda.reserved_pe_idx, win, 0, idx); 3485 phb->ioda.reserved_pe_idx, win, 0, idx);
3485 3486
3486 if (rc != OPAL_SUCCESS) 3487 if (rc != OPAL_SUCCESS)
3487 pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n", 3488 pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
3488 rc, win, idx); 3489 rc, win, idx);
3489 3490
3490 map[idx] = IODA_INVALID_PE; 3491 map[idx] = IODA_INVALID_PE;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8e36da379252..be26ab3d99e0 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -2,6 +2,7 @@
2#ifndef __POWERNV_PCI_H 2#ifndef __POWERNV_PCI_H
3#define __POWERNV_PCI_H 3#define __POWERNV_PCI_H
4 4
5#include <linux/compiler.h> /* for __printf */
5#include <linux/iommu.h> 6#include <linux/iommu.h>
6#include <asm/iommu.h> 7#include <asm/iommu.h>
7#include <asm/msi_bitmap.h> 8#include <asm/msi_bitmap.h>
@@ -204,6 +205,7 @@ extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
204 __u64 window_size, __u32 levels); 205 __u64 window_size, __u32 levels);
205extern int pnv_eeh_post_init(void); 206extern int pnv_eeh_post_init(void);
206 207
208__printf(3, 4)
207extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, 209extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
208 const char *fmt, ...); 210 const char *fmt, ...);
209#define pe_err(pe, fmt, ...) \ 211#define pe_err(pe, fmt, ...) \
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 14befee4b3f1..3cf40f689aac 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void)
401 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ 401 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
402 ppc_md.machine_check_exception = opal_machine_check; 402 ppc_md.machine_check_exception = opal_machine_check;
403 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 403 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
404 ppc_md.hmi_exception_early = opal_hmi_exception_early; 404 if (opal_check_token(OPAL_HANDLE_HMI2))
405 ppc_md.hmi_exception_early = opal_hmi_exception_early2;
406 else
407 ppc_md.hmi_exception_early = opal_hmi_exception_early;
405 ppc_md.handle_hmi_exception = opal_handle_hmi_exception; 408 ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
406} 409}
407 410
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 45563004feda..1d7a9fd30dd1 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -183,7 +183,7 @@ static void unsplit_core(void)
183 cpu = smp_processor_id(); 183 cpu = smp_processor_id();
184 if (cpu_thread_in_core(cpu) != 0) { 184 if (cpu_thread_in_core(cpu) != 0) {
185 while (mfspr(SPRN_HID0) & mask) 185 while (mfspr(SPRN_HID0) & mask)
186 power7_idle_insn(PNV_THREAD_NAP); 186 power7_idle_type(PNV_THREAD_NAP);
187 187
188 per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; 188 per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
189 return; 189 return;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d291b618a559..47087832f8b2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
379static int dlpar_remove_lmb(struct drmem_lmb *lmb) 379static int dlpar_remove_lmb(struct drmem_lmb *lmb)
380{ 380{
381 unsigned long block_sz; 381 unsigned long block_sz;
382 int nid, rc; 382 int rc;
383 383
384 if (!lmb_is_removable(lmb)) 384 if (!lmb_is_removable(lmb))
385 return -EINVAL; 385 return -EINVAL;
@@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
389 return rc; 389 return rc;
390 390
391 block_sz = pseries_memory_block_size(); 391 block_sz = pseries_memory_block_size();
392 nid = memory_add_physaddr_to_nid(lmb->base_addr);
393 392
394 __remove_memory(nid, lmb->base_addr, block_sz); 393 __remove_memory(lmb->nid, lmb->base_addr, block_sz);
395 394
396 /* Update memory regions for memory remove */ 395 /* Update memory regions for memory remove */
397 memblock_remove(lmb->base_addr, block_sz); 396 memblock_remove(lmb->base_addr, block_sz);
398 397
399 invalidate_lmb_associativity_index(lmb); 398 invalidate_lmb_associativity_index(lmb);
399 lmb_clear_nid(lmb);
400 lmb->flags &= ~DRCONF_MEM_ASSIGNED; 400 lmb->flags &= ~DRCONF_MEM_ASSIGNED;
401 401
402 return 0; 402 return 0;
@@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
653static int dlpar_add_lmb(struct drmem_lmb *lmb) 653static int dlpar_add_lmb(struct drmem_lmb *lmb)
654{ 654{
655 unsigned long block_sz; 655 unsigned long block_sz;
656 int nid, rc; 656 int rc;
657 657
658 if (lmb->flags & DRCONF_MEM_ASSIGNED) 658 if (lmb->flags & DRCONF_MEM_ASSIGNED)
659 return -EINVAL; 659 return -EINVAL;
@@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
664 return rc; 664 return rc;
665 } 665 }
666 666
667 lmb_set_nid(lmb);
667 block_sz = memory_block_size_bytes(); 668 block_sz = memory_block_size_bytes();
668 669
669 /* Find the node id for this address */
670 nid = memory_add_physaddr_to_nid(lmb->base_addr);
671
672 /* Add the memory */ 670 /* Add the memory */
673 rc = __add_memory(nid, lmb->base_addr, block_sz); 671 rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
674 if (rc) { 672 if (rc) {
675 invalidate_lmb_associativity_index(lmb); 673 invalidate_lmb_associativity_index(lmb);
676 return rc; 674 return rc;
@@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
678 676
679 rc = dlpar_online_lmb(lmb); 677 rc = dlpar_online_lmb(lmb);
680 if (rc) { 678 if (rc) {
681 __remove_memory(nid, lmb->base_addr, block_sz); 679 __remove_memory(lmb->nid, lmb->base_addr, block_sz);
682 invalidate_lmb_associativity_index(lmb); 680 invalidate_lmb_associativity_index(lmb);
681 lmb_clear_nid(lmb);
683 } else { 682 } else {
684 lmb->flags |= DRCONF_MEM_ASSIGNED; 683 lmb->flags |= DRCONF_MEM_ASSIGNED;
685 } 684 }
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 36eb1ddbac69..03bbb299320e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -105,7 +105,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
105 unsigned long attrs) 105 unsigned long attrs)
106{ 106{
107 u64 proto_tce; 107 u64 proto_tce;
108 __be64 *tcep, *tces; 108 __be64 *tcep;
109 u64 rpn; 109 u64 rpn;
110 110
111 proto_tce = TCE_PCI_READ; // Read allowed 111 proto_tce = TCE_PCI_READ; // Read allowed
@@ -113,7 +113,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
113 if (direction != DMA_TO_DEVICE) 113 if (direction != DMA_TO_DEVICE)
114 proto_tce |= TCE_PCI_WRITE; 114 proto_tce |= TCE_PCI_WRITE;
115 115
116 tces = tcep = ((__be64 *)tbl->it_base) + index; 116 tcep = ((__be64 *)tbl->it_base) + index;
117 117
118 while (npages--) { 118 while (npages--) {
119 /* can't move this out since we might cross MEMBLOCK boundary */ 119 /* can't move this out since we might cross MEMBLOCK boundary */
@@ -129,9 +129,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
129 129
130static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) 130static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
131{ 131{
132 __be64 *tcep, *tces; 132 __be64 *tcep;
133 133
134 tces = tcep = ((__be64 *)tbl->it_base) + index; 134 tcep = ((__be64 *)tbl->it_base) + index;
135 135
136 while (npages--) 136 while (npages--)
137 *(tcep++) = 0; 137 *(tcep++) = 0;
@@ -945,7 +945,7 @@ static phys_addr_t ddw_memory_hotplug_max(void)
945 945
946 for_each_node_by_type(memory, "memory") { 946 for_each_node_by_type(memory, "memory") {
947 unsigned long start, size; 947 unsigned long start, size;
948 int ranges, n_mem_addr_cells, n_mem_size_cells, len; 948 int n_mem_addr_cells, n_mem_size_cells, len;
949 const __be32 *memcell_buf; 949 const __be32 *memcell_buf;
950 950
951 memcell_buf = of_get_property(memory, "reg", &len); 951 memcell_buf = of_get_property(memory, "reg", &len);
@@ -955,9 +955,6 @@ static phys_addr_t ddw_memory_hotplug_max(void)
955 n_mem_addr_cells = of_n_addr_cells(memory); 955 n_mem_addr_cells = of_n_addr_cells(memory);
956 n_mem_size_cells = of_n_size_cells(memory); 956 n_mem_size_cells = of_n_size_cells(memory);
957 957
958 /* ranges in cell */
959 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
960
961 start = of_read_number(memcell_buf, n_mem_addr_cells); 958 start = of_read_number(memcell_buf, n_mem_addr_cells);
962 memcell_buf += n_mem_addr_cells; 959 memcell_buf += n_mem_addr_cells;
963 size = of_read_number(memcell_buf, n_mem_size_cells); 960 size = of_read_number(memcell_buf, n_mem_size_cells);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f2a9f0adc2d3..1034ef1fe2b4 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
901 break; 901 break;
902 902
903 case H_PARAMETER: 903 case H_PARAMETER:
904 pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
904 return -EINVAL; 905 return -EINVAL;
905 case H_RESOURCE: 906 case H_RESOURCE:
907 pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
906 return -EPERM; 908 return -EPERM;
907 default: 909 default:
908 pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); 910 pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
@@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
918 if (rc != 0) { 920 if (rc != 0) {
919 switch (state.commit_rc) { 921 switch (state.commit_rc) {
920 case H_PTEG_FULL: 922 case H_PTEG_FULL:
921 pr_warn("Hash collision while resizing HPT\n");
922 return -ENOSPC; 923 return -ENOSPC;
923 924
924 default: 925 default:
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index 27f0a915c8a9..f860a897a9e0 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -106,7 +106,7 @@ static ssize_t pmem_drc_remove_node(u32 drc_index)
106 106
107int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) 107int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
108{ 108{
109 u32 count, drc_index; 109 u32 drc_index;
110 int rc; 110 int rc;
111 111
112 /* slim chance, but we might get a hotplug event while booting */ 112 /* slim chance, but we might get a hotplug event while booting */
@@ -123,7 +123,6 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
123 return -EINVAL; 123 return -EINVAL;
124 } 124 }
125 125
126 count = hp_elog->_drc_u.drc_count;
127 drc_index = hp_elog->_drc_u.drc_index; 126 drc_index = hp_elog->_drc_u.drc_index;
128 127
129 lock_device_hotplug(); 128 lock_device_hotplug();
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 452dcfd7e5dd..c97d15352f9f 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -539,44 +539,44 @@ static void pseries_print_mce_info(struct pt_regs *regs,
539 int disposition = rtas_error_disposition(errp); 539 int disposition = rtas_error_disposition(errp);
540 540
541 static const char * const initiators[] = { 541 static const char * const initiators[] = {
542 "Unknown", 542 [0] = "Unknown",
543 "CPU", 543 [1] = "CPU",
544 "PCI", 544 [2] = "PCI",
545 "ISA", 545 [3] = "ISA",
546 "Memory", 546 [4] = "Memory",
547 "Power Mgmt", 547 [5] = "Power Mgmt",
548 }; 548 };
549 static const char * const mc_err_types[] = { 549 static const char * const mc_err_types[] = {
550 "UE", 550 [0] = "UE",
551 "SLB", 551 [1] = "SLB",
552 "ERAT", 552 [2] = "ERAT",
553 "Unknown", 553 [3] = "Unknown",
554 "TLB", 554 [4] = "TLB",
555 "D-Cache", 555 [5] = "D-Cache",
556 "Unknown", 556 [6] = "Unknown",
557 "I-Cache", 557 [7] = "I-Cache",
558 }; 558 };
559 static const char * const mc_ue_types[] = { 559 static const char * const mc_ue_types[] = {
560 "Indeterminate", 560 [0] = "Indeterminate",
561 "Instruction fetch", 561 [1] = "Instruction fetch",
562 "Page table walk ifetch", 562 [2] = "Page table walk ifetch",
563 "Load/Store", 563 [3] = "Load/Store",
564 "Page table walk Load/Store", 564 [4] = "Page table walk Load/Store",
565 }; 565 };
566 566
567 /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ 567 /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
568 static const char * const mc_slb_types[] = { 568 static const char * const mc_slb_types[] = {
569 "Parity", 569 [0] = "Parity",
570 "Multihit", 570 [1] = "Multihit",
571 "Indeterminate", 571 [2] = "Indeterminate",
572 }; 572 };
573 573
574 /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ 574 /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
575 static const char * const mc_soft_types[] = { 575 static const char * const mc_soft_types[] = {
576 "Unknown", 576 [0] = "Unknown",
577 "Parity", 577 [1] = "Parity",
578 "Multihit", 578 [2] = "Multihit",
579 "Indeterminate", 579 [3] = "Indeterminate",
580 }; 580 };
581 581
582 if (!rtas_error_extended(errp)) { 582 if (!rtas_error_extended(errp)) {
@@ -707,6 +707,87 @@ out:
707 return disposition; 707 return disposition;
708} 708}
709 709
710#ifdef CONFIG_MEMORY_FAILURE
711
712static DEFINE_PER_CPU(int, rtas_ue_count);
713static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
714
715#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
716#define UE_LOGICAL_ADDR_PROVIDED 0x20
717
718
719static void pseries_hwpoison_work_fn(struct work_struct *work)
720{
721 unsigned long paddr;
722 int index;
723
724 while (__this_cpu_read(rtas_ue_count) > 0) {
725 index = __this_cpu_read(rtas_ue_count) - 1;
726 paddr = __this_cpu_read(rtas_ue_paddr[index]);
727 memory_failure(paddr >> PAGE_SHIFT, 0);
728 __this_cpu_dec(rtas_ue_count);
729 }
730}
731
732static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
733
734static void queue_ue_paddr(unsigned long paddr)
735{
736 int index;
737
738 index = __this_cpu_inc_return(rtas_ue_count) - 1;
739 if (index >= MAX_MC_EVT) {
740 __this_cpu_dec(rtas_ue_count);
741 return;
742 }
743 this_cpu_write(rtas_ue_paddr[index], paddr);
744 schedule_work(&hwpoison_work);
745}
746
747static void pseries_do_memory_failure(struct pt_regs *regs,
748 struct pseries_mc_errorlog *mce_log)
749{
750 unsigned long paddr;
751
752 if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
753 paddr = be64_to_cpu(mce_log->logical_address);
754 } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
755 unsigned long pfn;
756
757 pfn = addr_to_pfn(regs,
758 be64_to_cpu(mce_log->effective_address));
759 if (pfn == ULONG_MAX)
760 return;
761 paddr = pfn << PAGE_SHIFT;
762 } else {
763 return;
764 }
765 queue_ue_paddr(paddr);
766}
767
768static void pseries_process_ue(struct pt_regs *regs,
769 struct rtas_error_log *errp)
770{
771 struct pseries_errorlog *pseries_log;
772 struct pseries_mc_errorlog *mce_log;
773
774 if (!rtas_error_extended(errp))
775 return;
776
777 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
778 if (!pseries_log)
779 return;
780
781 mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
782
783 if (mce_log->error_type == MC_ERROR_TYPE_UE)
784 pseries_do_memory_failure(regs, mce_log);
785}
786#else
787static inline void pseries_process_ue(struct pt_regs *regs,
788 struct rtas_error_log *errp) { }
789#endif /*CONFIG_MEMORY_FAILURE */
790
710/* 791/*
711 * Process MCE rtas errlog event. 792 * Process MCE rtas errlog event.
712 */ 793 */
@@ -765,6 +846,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
765 recovered = 1; 846 recovered = 1;
766 } 847 }
767 848
849 pseries_process_ue(regs, err);
850
768 /* Queue irq work to log this rtas event later. */ 851 /* Queue irq work to log this rtas event later. */
769 irq_work_queue(&mce_errlog_process_work); 852 irq_work_queue(&mce_errlog_process_work);
770 853
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index 4314ba5baf43..7c6d8b14f440 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -1,4 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2
3KASAN_SANITIZE := n
4
2targets += trampoline.o purgatory.ro kexec-purgatory.c 5targets += trampoline.o purgatory.ro kexec-purgatory.c
3 6
4LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined 7LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1ca127d052a6..0c037e933e55 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -437,6 +437,12 @@ void xive_native_sync_source(u32 hw_irq)
437} 437}
438EXPORT_SYMBOL_GPL(xive_native_sync_source); 438EXPORT_SYMBOL_GPL(xive_native_sync_source);
439 439
440void xive_native_sync_queue(u32 hw_irq)
441{
442 opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq);
443}
444EXPORT_SYMBOL_GPL(xive_native_sync_queue);
445
440static const struct xive_ops xive_native_ops = { 446static const struct xive_ops xive_native_ops = {
441 .populate_irq_data = xive_native_populate_irq_data, 447 .populate_irq_data = xive_native_populate_irq_data,
442 .configure_irq = xive_native_configure_irq, 448 .configure_irq = xive_native_configure_irq,
@@ -711,3 +717,96 @@ bool xive_native_has_single_escalation(void)
711 return xive_has_single_esc; 717 return xive_has_single_esc;
712} 718}
713EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); 719EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
720
721int xive_native_get_queue_info(u32 vp_id, u32 prio,
722 u64 *out_qpage,
723 u64 *out_qsize,
724 u64 *out_qeoi_page,
725 u32 *out_escalate_irq,
726 u64 *out_qflags)
727{
728 __be64 qpage;
729 __be64 qsize;
730 __be64 qeoi_page;
731 __be32 escalate_irq;
732 __be64 qflags;
733 s64 rc;
734
735 rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize,
736 &qeoi_page, &escalate_irq, &qflags);
737 if (rc) {
738 pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n",
739 vp_id, prio, rc);
740 return -EIO;
741 }
742
743 if (out_qpage)
744 *out_qpage = be64_to_cpu(qpage);
745 if (out_qsize)
746 *out_qsize = be32_to_cpu(qsize);
747 if (out_qeoi_page)
748 *out_qeoi_page = be64_to_cpu(qeoi_page);
749 if (out_escalate_irq)
750 *out_escalate_irq = be32_to_cpu(escalate_irq);
751 if (out_qflags)
752 *out_qflags = be64_to_cpu(qflags);
753
754 return 0;
755}
756EXPORT_SYMBOL_GPL(xive_native_get_queue_info);
757
758int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex)
759{
760 __be32 opal_qtoggle;
761 __be32 opal_qindex;
762 s64 rc;
763
764 rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle,
765 &opal_qindex);
766 if (rc) {
767 pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n",
768 vp_id, prio, rc);
769 return -EIO;
770 }
771
772 if (qtoggle)
773 *qtoggle = be32_to_cpu(opal_qtoggle);
774 if (qindex)
775 *qindex = be32_to_cpu(opal_qindex);
776
777 return 0;
778}
779EXPORT_SYMBOL_GPL(xive_native_get_queue_state);
780
781int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
782{
783 s64 rc;
784
785 rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex);
786 if (rc) {
787 pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n",
788 vp_id, prio, rc);
789 return -EIO;
790 }
791
792 return 0;
793}
794EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
795
796int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
797{
798 __be64 state;
799 s64 rc;
800
801 rc = opal_xive_get_vp_state(vp_id, &state);
802 if (rc) {
803 pr_err("OPAL failed to get vp state for VCPU %d : %lld\n",
804 vp_id, rc);
805 return -EIO;
806 }
807
808 if (out_state)
809 *out_state = be64_to_cpu(state);
810 return 0;
811}
812EXPORT_SYMBOL_GPL(xive_native_get_vp_state);
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 3050f9323254..f142570ad860 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -7,6 +7,7 @@ subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
7GCOV_PROFILE := n 7GCOV_PROFILE := n
8KCOV_INSTRUMENT := n 8KCOV_INSTRUMENT := n
9UBSAN_SANITIZE := n 9UBSAN_SANITIZE := n
10KASAN_SANITIZE := n
10 11
11# Disable ftrace for the entire directory 12# Disable ftrace for the entire directory
12ORIG_CFLAGS := $(KBUILD_CFLAGS) 13ORIG_CFLAGS := $(KBUILD_CFLAGS)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 13c6a47e6150..1b0149b2bb6c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -80,6 +80,7 @@ static int set_indicator_token = RTAS_UNKNOWN_SERVICE;
80#endif 80#endif
81static unsigned long in_xmon __read_mostly = 0; 81static unsigned long in_xmon __read_mostly = 0;
82static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT); 82static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT);
83static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE);
83 84
84static unsigned long adrs; 85static unsigned long adrs;
85static int size = 1; 86static int size = 1;
@@ -202,6 +203,8 @@ static void dump_tlb_book3e(void);
202#define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3]) 203#define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3])
203#endif 204#endif
204 205
206static const char *xmon_ro_msg = "Operation disabled: xmon in read-only mode\n";
207
205static char *help_string = "\ 208static char *help_string = "\
206Commands:\n\ 209Commands:\n\
207 b show breakpoints\n\ 210 b show breakpoints\n\
@@ -989,6 +992,10 @@ cmds(struct pt_regs *excp)
989 memlocate(); 992 memlocate();
990 break; 993 break;
991 case 'z': 994 case 'z':
995 if (xmon_is_ro) {
996 printf(xmon_ro_msg);
997 break;
998 }
992 memzcan(); 999 memzcan();
993 break; 1000 break;
994 case 'i': 1001 case 'i':
@@ -1042,6 +1049,10 @@ cmds(struct pt_regs *excp)
1042 set_lpp_cmd(); 1049 set_lpp_cmd();
1043 break; 1050 break;
1044 case 'b': 1051 case 'b':
1052 if (xmon_is_ro) {
1053 printf(xmon_ro_msg);
1054 break;
1055 }
1045 bpt_cmds(); 1056 bpt_cmds();
1046 break; 1057 break;
1047 case 'C': 1058 case 'C':
@@ -1055,6 +1066,10 @@ cmds(struct pt_regs *excp)
1055 bootcmds(); 1066 bootcmds();
1056 break; 1067 break;
1057 case 'p': 1068 case 'p':
1069 if (xmon_is_ro) {
1070 printf(xmon_ro_msg);
1071 break;
1072 }
1058 proccall(); 1073 proccall();
1059 break; 1074 break;
1060 case 'P': 1075 case 'P':
@@ -1777,6 +1792,11 @@ read_spr(int n, unsigned long *vp)
1777static void 1792static void
1778write_spr(int n, unsigned long val) 1793write_spr(int n, unsigned long val)
1779{ 1794{
1795 if (xmon_is_ro) {
1796 printf(xmon_ro_msg);
1797 return;
1798 }
1799
1780 if (setjmp(bus_error_jmp) == 0) { 1800 if (setjmp(bus_error_jmp) == 0) {
1781 catch_spr_faults = 1; 1801 catch_spr_faults = 1;
1782 sync(); 1802 sync();
@@ -2016,6 +2036,12 @@ mwrite(unsigned long adrs, void *buf, int size)
2016 char *p, *q; 2036 char *p, *q;
2017 2037
2018 n = 0; 2038 n = 0;
2039
2040 if (xmon_is_ro) {
2041 printf(xmon_ro_msg);
2042 return n;
2043 }
2044
2019 if (setjmp(bus_error_jmp) == 0) { 2045 if (setjmp(bus_error_jmp) == 0) {
2020 catch_memory_errors = 1; 2046 catch_memory_errors = 1;
2021 sync(); 2047 sync();
@@ -2434,7 +2460,6 @@ static void dump_one_paca(int cpu)
2434 DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x"); 2460 DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x");
2435#endif 2461#endif
2436 DUMP(p, irq_work_pending, "%#-*x"); 2462 DUMP(p, irq_work_pending, "%#-*x");
2437 DUMP(p, nap_state_lost, "%#-*x");
2438 DUMP(p, sprg_vdso, "%#-*llx"); 2463 DUMP(p, sprg_vdso, "%#-*llx");
2439 2464
2440#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2465#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -2442,19 +2467,16 @@ static void dump_one_paca(int cpu)
2442#endif 2467#endif
2443 2468
2444#ifdef CONFIG_PPC_POWERNV 2469#ifdef CONFIG_PPC_POWERNV
2445 DUMP(p, core_idle_state_ptr, "%-*px"); 2470 DUMP(p, idle_state, "%#-*lx");
2446 DUMP(p, thread_idle_state, "%#-*x"); 2471 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
2447 DUMP(p, thread_mask, "%#-*x"); 2472 DUMP(p, thread_idle_state, "%#-*x");
2448 DUMP(p, subcore_sibling_mask, "%#-*x"); 2473 DUMP(p, subcore_sibling_mask, "%#-*x");
2449 DUMP(p, requested_psscr, "%#-*llx"); 2474 } else {
2450 DUMP(p, stop_sprs.pid, "%#-*llx"); 2475#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
2451 DUMP(p, stop_sprs.ldbar, "%#-*llx"); 2476 DUMP(p, requested_psscr, "%#-*llx");
2452 DUMP(p, stop_sprs.fscr, "%#-*llx"); 2477 DUMP(p, dont_stop.counter, "%#-*x");
2453 DUMP(p, stop_sprs.hfscr, "%#-*llx"); 2478#endif
2454 DUMP(p, stop_sprs.mmcr1, "%#-*llx"); 2479 }
2455 DUMP(p, stop_sprs.mmcr2, "%#-*llx");
2456 DUMP(p, stop_sprs.mmcra, "%#-*llx");
2457 DUMP(p, dont_stop.counter, "%#-*x");
2458#endif 2480#endif
2459 2481
2460 DUMP(p, accounting.utime, "%#-*lx"); 2482 DUMP(p, accounting.utime, "%#-*lx");
@@ -2887,9 +2909,17 @@ memops(int cmd)
2887 scanhex((void *)&mcount); 2909 scanhex((void *)&mcount);
2888 switch( cmd ){ 2910 switch( cmd ){
2889 case 'm': 2911 case 'm':
2912 if (xmon_is_ro) {
2913 printf(xmon_ro_msg);
2914 break;
2915 }
2890 memmove((void *)mdest, (void *)msrc, mcount); 2916 memmove((void *)mdest, (void *)msrc, mcount);
2891 break; 2917 break;
2892 case 's': 2918 case 's':
2919 if (xmon_is_ro) {
2920 printf(xmon_ro_msg);
2921 break;
2922 }
2893 memset((void *)mdest, mval, mcount); 2923 memset((void *)mdest, mval, mcount);
2894 break; 2924 break;
2895 case 'd': 2925 case 'd':
@@ -3799,6 +3829,14 @@ static int __init early_parse_xmon(char *p)
3799 } else if (strncmp(p, "on", 2) == 0) { 3829 } else if (strncmp(p, "on", 2) == 0) {
3800 xmon_init(1); 3830 xmon_init(1);
3801 xmon_on = 1; 3831 xmon_on = 1;
3832 } else if (strncmp(p, "rw", 2) == 0) {
3833 xmon_init(1);
3834 xmon_on = 1;
3835 xmon_is_ro = false;
3836 } else if (strncmp(p, "ro", 2) == 0) {
3837 xmon_init(1);
3838 xmon_on = 1;
3839 xmon_is_ro = true;
3802 } else if (strncmp(p, "off", 3) == 0) 3840 } else if (strncmp(p, "off", 3) == 0)
3803 xmon_on = 0; 3841 xmon_on = 0;
3804 else 3842 else
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index dc7b34174f85..a4d17a5a9763 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -168,7 +168,7 @@ int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
168 if (dsisr & CXL_PSL_DSISR_An_S) 168 if (dsisr & CXL_PSL_DSISR_An_S)
169 access |= _PAGE_WRITE; 169 access |= _PAGE_WRITE;
170 170
171 if (!mm && (REGION_ID(dar) != USER_REGION_ID)) 171 if (!mm && (get_region_id(dar) != USER_REGION_ID))
172 access |= _PAGE_PRIVILEGED; 172 access |= _PAGE_PRIVILEGED;
173 173
174 if (dsisr & DSISR_NOHPTE) 174 if (dsisr & DSISR_NOHPTE)
diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
index 5229dcda8297..d07d1bb8e8d4 100644
--- a/drivers/misc/ocxl/Makefile
+++ b/drivers/misc/ocxl/Makefile
@@ -1,8 +1,9 @@
1# SPDX-License-Identifier: GPL-2.0+ 1# SPDX-License-Identifier: GPL-2.0+
2ccflags-$(CONFIG_PPC_WERROR) += -Werror 2ccflags-$(CONFIG_PPC_WERROR) += -Werror
3 3
4ocxl-y += main.o pci.o config.o file.o pasid.o 4ocxl-y += main.o pci.o config.o file.o pasid.o mmio.o
5ocxl-y += link.o context.o afu_irq.o sysfs.o trace.o 5ocxl-y += link.o context.o afu_irq.o sysfs.o trace.o
6ocxl-y += core.o
6obj-$(CONFIG_OCXL) += ocxl.o 7obj-$(CONFIG_OCXL) += ocxl.o
7 8
8# For tracepoints to include our trace.h from tracepoint infrastructure: 9# For tracepoints to include our trace.h from tracepoint infrastructure:
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
index 11ab996657a2..70f8f1c3929d 100644
--- a/drivers/misc/ocxl/afu_irq.c
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -1,7 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0+ 1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp. 2// Copyright 2017 IBM Corp.
3#include <linux/interrupt.h> 3#include <linux/interrupt.h>
4#include <linux/eventfd.h> 4#include <asm/pnv-ocxl.h>
5#include "ocxl_internal.h" 5#include "ocxl_internal.h"
6#include "trace.h" 6#include "trace.h"
7 7
@@ -11,27 +11,59 @@ struct afu_irq {
11 unsigned int virq; 11 unsigned int virq;
12 char *name; 12 char *name;
13 u64 trigger_page; 13 u64 trigger_page;
14 struct eventfd_ctx *ev_ctx; 14 irqreturn_t (*handler)(void *private);
15 void (*free_private)(void *private);
16 void *private;
15}; 17};
16 18
17static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset) 19int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
18{ 20{
19 return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT; 21 return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
20} 22}
21 23
22static u64 irq_id_to_offset(struct ocxl_context *ctx, int id) 24u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int irq_id)
23{ 25{
24 return ctx->afu->irq_base_offset + (id << PAGE_SHIFT); 26 return ctx->afu->irq_base_offset + (irq_id << PAGE_SHIFT);
25} 27}
26 28
29int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id,
30 irqreturn_t (*handler)(void *private),
31 void (*free_private)(void *private),
32 void *private)
33{
34 struct afu_irq *irq;
35 int rc;
36
37 mutex_lock(&ctx->irq_lock);
38 irq = idr_find(&ctx->irq_idr, irq_id);
39 if (!irq) {
40 rc = -EINVAL;
41 goto unlock;
42 }
43
44 irq->handler = handler;
45 irq->private = private;
46 irq->free_private = free_private;
47
48 rc = 0;
49 // Fall through to unlock
50
51unlock:
52 mutex_unlock(&ctx->irq_lock);
53 return rc;
54}
55EXPORT_SYMBOL_GPL(ocxl_irq_set_handler);
56
27static irqreturn_t afu_irq_handler(int virq, void *data) 57static irqreturn_t afu_irq_handler(int virq, void *data)
28{ 58{
29 struct afu_irq *irq = (struct afu_irq *) data; 59 struct afu_irq *irq = (struct afu_irq *) data;
30 60
31 trace_ocxl_afu_irq_receive(virq); 61 trace_ocxl_afu_irq_receive(virq);
32 if (irq->ev_ctx) 62
33 eventfd_signal(irq->ev_ctx, 1); 63 if (irq->handler)
34 return IRQ_HANDLED; 64 return irq->handler(irq->private);
65
66 return IRQ_HANDLED; // Just drop it on the ground
35} 67}
36 68
37static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq) 69static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)
@@ -69,7 +101,7 @@ static void release_afu_irq(struct afu_irq *irq)
69 kfree(irq->name); 101 kfree(irq->name);
70} 102}
71 103
72int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset) 104int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id)
73{ 105{
74 struct afu_irq *irq; 106 struct afu_irq *irq;
75 int rc; 107 int rc;
@@ -101,11 +133,11 @@ int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset)
101 if (rc) 133 if (rc)
102 goto err_alloc; 134 goto err_alloc;
103 135
104 *irq_offset = irq_id_to_offset(ctx, irq->id); 136 trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq);
105
106 trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq,
107 *irq_offset);
108 mutex_unlock(&ctx->irq_lock); 137 mutex_unlock(&ctx->irq_lock);
138
139 *irq_id = irq->id;
140
109 return 0; 141 return 0;
110 142
111err_alloc: 143err_alloc:
@@ -117,29 +149,29 @@ err_unlock:
117 kfree(irq); 149 kfree(irq);
118 return rc; 150 return rc;
119} 151}
152EXPORT_SYMBOL_GPL(ocxl_afu_irq_alloc);
120 153
121static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx) 154static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx)
122{ 155{
123 trace_ocxl_afu_irq_free(ctx->pasid, irq->id); 156 trace_ocxl_afu_irq_free(ctx->pasid, irq->id);
124 if (ctx->mapping) 157 if (ctx->mapping)
125 unmap_mapping_range(ctx->mapping, 158 unmap_mapping_range(ctx->mapping,
126 irq_id_to_offset(ctx, irq->id), 159 ocxl_irq_id_to_offset(ctx, irq->id),
127 1 << PAGE_SHIFT, 1); 160 1 << PAGE_SHIFT, 1);
128 release_afu_irq(irq); 161 release_afu_irq(irq);
129 if (irq->ev_ctx) 162 if (irq->free_private)
130 eventfd_ctx_put(irq->ev_ctx); 163 irq->free_private(irq->private);
131 ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); 164 ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
132 kfree(irq); 165 kfree(irq);
133} 166}
134 167
135int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset) 168int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id)
136{ 169{
137 struct afu_irq *irq; 170 struct afu_irq *irq;
138 int id = irq_offset_to_id(ctx, irq_offset);
139 171
140 mutex_lock(&ctx->irq_lock); 172 mutex_lock(&ctx->irq_lock);
141 173
142 irq = idr_find(&ctx->irq_idr, id); 174 irq = idr_find(&ctx->irq_idr, irq_id);
143 if (!irq) { 175 if (!irq) {
144 mutex_unlock(&ctx->irq_lock); 176 mutex_unlock(&ctx->irq_lock);
145 return -EINVAL; 177 return -EINVAL;
@@ -149,6 +181,7 @@ int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset)
149 mutex_unlock(&ctx->irq_lock); 181 mutex_unlock(&ctx->irq_lock);
150 return 0; 182 return 0;
151} 183}
184EXPORT_SYMBOL_GPL(ocxl_afu_irq_free);
152 185
153void ocxl_afu_irq_free_all(struct ocxl_context *ctx) 186void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
154{ 187{
@@ -161,41 +194,16 @@ void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
161 mutex_unlock(&ctx->irq_lock); 194 mutex_unlock(&ctx->irq_lock);
162} 195}
163 196
164int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd) 197u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id)
165{
166 struct afu_irq *irq;
167 struct eventfd_ctx *ev_ctx;
168 int rc = 0, id = irq_offset_to_id(ctx, irq_offset);
169
170 mutex_lock(&ctx->irq_lock);
171 irq = idr_find(&ctx->irq_idr, id);
172 if (!irq) {
173 rc = -EINVAL;
174 goto unlock;
175 }
176
177 ev_ctx = eventfd_ctx_fdget(eventfd);
178 if (IS_ERR(ev_ctx)) {
179 rc = -EINVAL;
180 goto unlock;
181 }
182
183 irq->ev_ctx = ev_ctx;
184unlock:
185 mutex_unlock(&ctx->irq_lock);
186 return rc;
187}
188
189u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset)
190{ 198{
191 struct afu_irq *irq; 199 struct afu_irq *irq;
192 int id = irq_offset_to_id(ctx, irq_offset);
193 u64 addr = 0; 200 u64 addr = 0;
194 201
195 mutex_lock(&ctx->irq_lock); 202 mutex_lock(&ctx->irq_lock);
196 irq = idr_find(&ctx->irq_idr, id); 203 irq = idr_find(&ctx->irq_idr, irq_id);
197 if (irq) 204 if (irq)
198 addr = irq->trigger_page; 205 addr = irq->trigger_page;
199 mutex_unlock(&ctx->irq_lock); 206 mutex_unlock(&ctx->irq_lock);
200 return addr; 207 return addr;
201} 208}
209EXPORT_SYMBOL_GPL(ocxl_afu_irq_get_addr);
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 8f2c5d8bd2ee..5e65acb8e134 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -2,8 +2,8 @@
2// Copyright 2017 IBM Corp. 2// Copyright 2017 IBM Corp.
3#include <linux/pci.h> 3#include <linux/pci.h>
4#include <asm/pnv-ocxl.h> 4#include <asm/pnv-ocxl.h>
5#include <misc/ocxl.h>
6#include <misc/ocxl-config.h> 5#include <misc/ocxl-config.h>
6#include "ocxl_internal.h"
7 7
8#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit))) 8#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
9#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s) 9#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
@@ -68,7 +68,7 @@ static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
68 return 0; 68 return 0;
69} 69}
70 70
71static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn) 71static void read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
72{ 72{
73 u16 val; 73 u16 val;
74 int pos; 74 int pos;
@@ -89,7 +89,6 @@ static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
89out: 89out:
90 dev_dbg(&dev->dev, "PASID capability:\n"); 90 dev_dbg(&dev->dev, "PASID capability:\n");
91 dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log); 91 dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log);
92 return 0;
93} 92}
94 93
95static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn) 94static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
@@ -205,11 +204,7 @@ int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
205{ 204{
206 int rc; 205 int rc;
207 206
208 rc = read_pasid(dev, fn); 207 read_pasid(dev, fn);
209 if (rc) {
210 dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
211 return -ENODEV;
212 }
213 208
214 rc = read_dvsec_tl(dev, fn); 209 rc = read_dvsec_tl(dev, fn);
215 if (rc) { 210 if (rc) {
@@ -304,7 +299,6 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
304 } 299 }
305 return 1; 300 return 1;
306} 301}
307EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index);
308 302
309static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn, 303static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
310 struct ocxl_afu_config *afu) 304 struct ocxl_afu_config *afu)
@@ -540,7 +534,6 @@ int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
540{ 534{
541 return pnv_ocxl_get_pasid_count(dev, count); 535 return pnv_ocxl_get_pasid_count(dev, count);
542} 536}
543EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info);
544 537
545void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, 538void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
546 u32 pasid_count_log) 539 u32 pasid_count_log)
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index c10a940e3b38..bab9c9364184 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -4,15 +4,17 @@
4#include "trace.h" 4#include "trace.h"
5#include "ocxl_internal.h" 5#include "ocxl_internal.h"
6 6
7struct ocxl_context *ocxl_context_alloc(void) 7int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
8{
9 return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
10}
11
12int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
13 struct address_space *mapping) 8 struct address_space *mapping)
14{ 9{
15 int pasid; 10 int pasid;
11 struct ocxl_context *ctx;
12
13 *context = kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
14 if (!*context)
15 return -ENOMEM;
16
17 ctx = *context;
16 18
17 ctx->afu = afu; 19 ctx->afu = afu;
18 mutex_lock(&afu->contexts_lock); 20 mutex_lock(&afu->contexts_lock);
@@ -43,6 +45,7 @@ int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
43 ocxl_afu_get(afu); 45 ocxl_afu_get(afu);
44 return 0; 46 return 0;
45} 47}
48EXPORT_SYMBOL_GPL(ocxl_context_alloc);
46 49
47/* 50/*
48 * Callback for when a translation fault triggers an error 51 * Callback for when a translation fault triggers an error
@@ -63,7 +66,7 @@ static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
63 wake_up_all(&ctx->events_wq); 66 wake_up_all(&ctx->events_wq);
64} 67}
65 68
66int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) 69int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm)
67{ 70{
68 int rc; 71 int rc;
69 72
@@ -75,7 +78,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
75 } 78 }
76 79
77 rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, 80 rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
78 current->mm->context.id, ctx->tidr, amr, current->mm, 81 mm->context.id, ctx->tidr, amr, mm,
79 xsl_fault_error, ctx); 82 xsl_fault_error, ctx);
80 if (rc) 83 if (rc)
81 goto out; 84 goto out;
@@ -85,13 +88,15 @@ out:
85 mutex_unlock(&ctx->status_mutex); 88 mutex_unlock(&ctx->status_mutex);
86 return rc; 89 return rc;
87} 90}
91EXPORT_SYMBOL_GPL(ocxl_context_attach);
88 92
89static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address, 93static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address,
90 u64 offset, struct ocxl_context *ctx) 94 u64 offset, struct ocxl_context *ctx)
91{ 95{
92 u64 trigger_addr; 96 u64 trigger_addr;
97 int irq_id = ocxl_irq_offset_to_id(ctx, offset);
93 98
94 trigger_addr = ocxl_afu_irq_get_addr(ctx, offset); 99 trigger_addr = ocxl_afu_irq_get_addr(ctx, irq_id);
95 if (!trigger_addr) 100 if (!trigger_addr)
96 return VM_FAULT_SIGBUS; 101 return VM_FAULT_SIGBUS;
97 102
@@ -151,12 +156,14 @@ static const struct vm_operations_struct ocxl_vmops = {
151static int check_mmap_afu_irq(struct ocxl_context *ctx, 156static int check_mmap_afu_irq(struct ocxl_context *ctx,
152 struct vm_area_struct *vma) 157 struct vm_area_struct *vma)
153{ 158{
159 int irq_id = ocxl_irq_offset_to_id(ctx, vma->vm_pgoff << PAGE_SHIFT);
160
154 /* only one page */ 161 /* only one page */
155 if (vma_pages(vma) != 1) 162 if (vma_pages(vma) != 1)
156 return -EINVAL; 163 return -EINVAL;
157 164
158 /* check offset validty */ 165 /* check offset validty */
159 if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT)) 166 if (!ocxl_afu_irq_get_addr(ctx, irq_id))
160 return -EINVAL; 167 return -EINVAL;
161 168
162 /* 169 /*
@@ -238,11 +245,12 @@ int ocxl_context_detach(struct ocxl_context *ctx)
238 } 245 }
239 rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid); 246 rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
240 if (rc) { 247 if (rc) {
241 dev_warn(&ctx->afu->dev, 248 dev_warn(&dev->dev,
242 "Couldn't remove PE entry cleanly: %d\n", rc); 249 "Couldn't remove PE entry cleanly: %d\n", rc);
243 } 250 }
244 return 0; 251 return 0;
245} 252}
253EXPORT_SYMBOL_GPL(ocxl_context_detach);
246 254
247void ocxl_context_detach_all(struct ocxl_afu *afu) 255void ocxl_context_detach_all(struct ocxl_afu *afu)
248{ 256{
@@ -280,3 +288,4 @@ void ocxl_context_free(struct ocxl_context *ctx)
280 ocxl_afu_put(ctx->afu); 288 ocxl_afu_put(ctx->afu);
281 kfree(ctx); 289 kfree(ctx);
282} 290}
291EXPORT_SYMBOL_GPL(ocxl_context_free);
diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
new file mode 100644
index 000000000000..b7a09b21ab36
--- /dev/null
+++ b/drivers/misc/ocxl/core.c
@@ -0,0 +1,574 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2019 IBM Corp.
3#include <linux/idr.h>
4#include "ocxl_internal.h"
5
6static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
7{
8 return (get_device(&fn->dev) == NULL) ? NULL : fn;
9}
10
11static void ocxl_fn_put(struct ocxl_fn *fn)
12{
13 put_device(&fn->dev);
14}
15
16static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
17{
18 struct ocxl_afu *afu;
19
20 afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
21 if (!afu)
22 return NULL;
23
24 kref_init(&afu->kref);
25 mutex_init(&afu->contexts_lock);
26 mutex_init(&afu->afu_control_lock);
27 idr_init(&afu->contexts_idr);
28 afu->fn = fn;
29 ocxl_fn_get(fn);
30 return afu;
31}
32
33static void free_afu(struct kref *kref)
34{
35 struct ocxl_afu *afu = container_of(kref, struct ocxl_afu, kref);
36
37 idr_destroy(&afu->contexts_idr);
38 ocxl_fn_put(afu->fn);
39 kfree(afu);
40}
41
42void ocxl_afu_get(struct ocxl_afu *afu)
43{
44 kref_get(&afu->kref);
45}
46EXPORT_SYMBOL_GPL(ocxl_afu_get);
47
48void ocxl_afu_put(struct ocxl_afu *afu)
49{
50 kref_put(&afu->kref, free_afu);
51}
52EXPORT_SYMBOL_GPL(ocxl_afu_put);
53
54static int assign_afu_actag(struct ocxl_afu *afu)
55{
56 struct ocxl_fn *fn = afu->fn;
57 int actag_count, actag_offset;
58 struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
59
60 /*
61 * if there were not enough actags for the function, each afu
62 * reduces its count as well
63 */
64 actag_count = afu->config.actag_supported *
65 fn->actag_enabled / fn->actag_supported;
66 actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
67 if (actag_offset < 0) {
68 dev_err(&pci_dev->dev, "Can't allocate %d actags for AFU: %d\n",
69 actag_count, actag_offset);
70 return actag_offset;
71 }
72 afu->actag_base = fn->actag_base + actag_offset;
73 afu->actag_enabled = actag_count;
74
75 ocxl_config_set_afu_actag(pci_dev, afu->config.dvsec_afu_control_pos,
76 afu->actag_base, afu->actag_enabled);
77 dev_dbg(&pci_dev->dev, "actag base=%d enabled=%d\n",
78 afu->actag_base, afu->actag_enabled);
79 return 0;
80}
81
82static void reclaim_afu_actag(struct ocxl_afu *afu)
83{
84 struct ocxl_fn *fn = afu->fn;
85 int start_offset, size;
86
87 start_offset = afu->actag_base - fn->actag_base;
88 size = afu->actag_enabled;
89 ocxl_actag_afu_free(afu->fn, start_offset, size);
90}
91
92static int assign_afu_pasid(struct ocxl_afu *afu)
93{
94 struct ocxl_fn *fn = afu->fn;
95 int pasid_count, pasid_offset;
96 struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
97
98 /*
99 * We only support the case where the function configuration
100 * requested enough PASIDs to cover all AFUs.
101 */
102 pasid_count = 1 << afu->config.pasid_supported_log;
103 pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
104 if (pasid_offset < 0) {
105 dev_err(&pci_dev->dev, "Can't allocate %d PASIDs for AFU: %d\n",
106 pasid_count, pasid_offset);
107 return pasid_offset;
108 }
109 afu->pasid_base = fn->pasid_base + pasid_offset;
110 afu->pasid_count = 0;
111 afu->pasid_max = pasid_count;
112
113 ocxl_config_set_afu_pasid(pci_dev, afu->config.dvsec_afu_control_pos,
114 afu->pasid_base,
115 afu->config.pasid_supported_log);
116 dev_dbg(&pci_dev->dev, "PASID base=%d, enabled=%d\n",
117 afu->pasid_base, pasid_count);
118 return 0;
119}
120
121static void reclaim_afu_pasid(struct ocxl_afu *afu)
122{
123 struct ocxl_fn *fn = afu->fn;
124 int start_offset, size;
125
126 start_offset = afu->pasid_base - fn->pasid_base;
127 size = 1 << afu->config.pasid_supported_log;
128 ocxl_pasid_afu_free(afu->fn, start_offset, size);
129}
130
131static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
132{
133 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
134 int rc, idx;
135
136 if (bar != 0 && bar != 2 && bar != 4)
137 return -EINVAL;
138
139 idx = bar >> 1;
140 if (fn->bar_used[idx]++ == 0) {
141 rc = pci_request_region(dev, bar, "ocxl");
142 if (rc)
143 return rc;
144 }
145 return 0;
146}
147
148static void release_fn_bar(struct ocxl_fn *fn, int bar)
149{
150 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
151 int idx;
152
153 if (bar != 0 && bar != 2 && bar != 4)
154 return;
155
156 idx = bar >> 1;
157 if (--fn->bar_used[idx] == 0)
158 pci_release_region(dev, bar);
159 WARN_ON(fn->bar_used[idx] < 0);
160}
161
162static int map_mmio_areas(struct ocxl_afu *afu)
163{
164 int rc;
165 struct pci_dev *pci_dev = to_pci_dev(afu->fn->dev.parent);
166
167 rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
168 if (rc)
169 return rc;
170
171 rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
172 if (rc) {
173 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
174 return rc;
175 }
176
177 afu->global_mmio_start =
178 pci_resource_start(pci_dev, afu->config.global_mmio_bar) +
179 afu->config.global_mmio_offset;
180 afu->pp_mmio_start =
181 pci_resource_start(pci_dev, afu->config.pp_mmio_bar) +
182 afu->config.pp_mmio_offset;
183
184 afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
185 afu->config.global_mmio_size);
186 if (!afu->global_mmio_ptr) {
187 release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
188 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
189 dev_err(&pci_dev->dev, "Error mapping global mmio area\n");
190 return -ENOMEM;
191 }
192
193 /*
194 * Leave an empty page between the per-process mmio area and
195 * the AFU interrupt mappings
196 */
197 afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
198 return 0;
199}
200
201static void unmap_mmio_areas(struct ocxl_afu *afu)
202{
203 if (afu->global_mmio_ptr) {
204 iounmap(afu->global_mmio_ptr);
205 afu->global_mmio_ptr = NULL;
206 }
207 afu->global_mmio_start = 0;
208 afu->pp_mmio_start = 0;
209 release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
210 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
211}
212
213static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
214{
215 int rc;
216
217 rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
218 if (rc)
219 return rc;
220
221 rc = assign_afu_actag(afu);
222 if (rc)
223 return rc;
224
225 rc = assign_afu_pasid(afu);
226 if (rc)
227 goto err_free_actag;
228
229 rc = map_mmio_areas(afu);
230 if (rc)
231 goto err_free_pasid;
232
233 return 0;
234
235err_free_pasid:
236 reclaim_afu_pasid(afu);
237err_free_actag:
238 reclaim_afu_actag(afu);
239 return rc;
240}
241
242static void deconfigure_afu(struct ocxl_afu *afu)
243{
244 unmap_mmio_areas(afu);
245 reclaim_afu_pasid(afu);
246 reclaim_afu_actag(afu);
247}
248
249static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
250{
251 ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
252
253 return 0;
254}
255
256static void deactivate_afu(struct ocxl_afu *afu)
257{
258 struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
259
260 ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
261}
262
263static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
264{
265 int rc;
266 struct ocxl_afu *afu;
267
268 afu = alloc_afu(fn);
269 if (!afu)
270 return -ENOMEM;
271
272 rc = configure_afu(afu, afu_idx, dev);
273 if (rc) {
274 ocxl_afu_put(afu);
275 return rc;
276 }
277
278 rc = activate_afu(dev, afu);
279 if (rc) {
280 deconfigure_afu(afu);
281 ocxl_afu_put(afu);
282 return rc;
283 }
284
285 list_add_tail(&afu->list, &fn->afu_list);
286
287 return 0;
288}
289
290static void remove_afu(struct ocxl_afu *afu)
291{
292 list_del(&afu->list);
293 ocxl_context_detach_all(afu);
294 deactivate_afu(afu);
295 deconfigure_afu(afu);
296 ocxl_afu_put(afu); // matches the implicit get in alloc_afu
297}
298
299static struct ocxl_fn *alloc_function(void)
300{
301 struct ocxl_fn *fn;
302
303 fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
304 if (!fn)
305 return NULL;
306
307 INIT_LIST_HEAD(&fn->afu_list);
308 INIT_LIST_HEAD(&fn->pasid_list);
309 INIT_LIST_HEAD(&fn->actag_list);
310
311 return fn;
312}
313
314static void free_function(struct ocxl_fn *fn)
315{
316 WARN_ON(!list_empty(&fn->afu_list));
317 WARN_ON(!list_empty(&fn->pasid_list));
318 kfree(fn);
319}
320
321static void free_function_dev(struct device *dev)
322{
323 struct ocxl_fn *fn = container_of(dev, struct ocxl_fn, dev);
324
325 free_function(fn);
326}
327
328static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
329{
330 int rc;
331
332 fn->dev.parent = &dev->dev;
333 fn->dev.release = free_function_dev;
334 rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
335 if (rc)
336 return rc;
337 return 0;
338}
339
340static int assign_function_actag(struct ocxl_fn *fn)
341{
342 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
343 u16 base, enabled, supported;
344 int rc;
345
346 rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
347 if (rc)
348 return rc;
349
350 fn->actag_base = base;
351 fn->actag_enabled = enabled;
352 fn->actag_supported = supported;
353
354 ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
355 fn->actag_base, fn->actag_enabled);
356 dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
357 fn->actag_base, fn->actag_enabled);
358 return 0;
359}
360
361static int set_function_pasid(struct ocxl_fn *fn)
362{
363 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
364 int rc, desired_count, max_count;
365
366 /* A function may not require any PASID */
367 if (fn->config.max_pasid_log < 0)
368 return 0;
369
370 rc = ocxl_config_get_pasid_info(dev, &max_count);
371 if (rc)
372 return rc;
373
374 desired_count = 1 << fn->config.max_pasid_log;
375
376 if (desired_count > max_count) {
377 dev_err(&fn->dev,
378 "Function requires more PASIDs than is available (%d vs. %d)\n",
379 desired_count, max_count);
380 return -ENOSPC;
381 }
382
383 fn->pasid_base = 0;
384 return 0;
385}
386
387static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
388{
389 int rc;
390
391 rc = pci_enable_device(dev);
392 if (rc) {
393 dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
394 return rc;
395 }
396
397 /*
398 * Once it has been confirmed to work on our hardware, we
399 * should reset the function, to force the adapter to restart
400 * from scratch.
401 * A function reset would also reset all its AFUs.
402 *
403 * Some hints for implementation:
404 *
405 * - there's not status bit to know when the reset is done. We
406 * should try reading the config space to know when it's
407 * done.
408 * - probably something like:
409 * Reset
410 * wait 100ms
411 * issue config read
412 * allow device up to 1 sec to return success on config
413 * read before declaring it broken
414 *
415 * Some shared logic on the card (CFG, TLX) won't be reset, so
416 * there's no guarantee that it will be enough.
417 */
418 rc = ocxl_config_read_function(dev, &fn->config);
419 if (rc)
420 return rc;
421
422 rc = set_function_device(fn, dev);
423 if (rc)
424 return rc;
425
426 rc = assign_function_actag(fn);
427 if (rc)
428 return rc;
429
430 rc = set_function_pasid(fn);
431 if (rc)
432 return rc;
433
434 rc = ocxl_link_setup(dev, 0, &fn->link);
435 if (rc)
436 return rc;
437
438 rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
439 if (rc) {
440 ocxl_link_release(dev, fn->link);
441 return rc;
442 }
443 return 0;
444}
445
446static void deconfigure_function(struct ocxl_fn *fn)
447{
448 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
449
450 ocxl_link_release(dev, fn->link);
451 pci_disable_device(dev);
452}
453
454static struct ocxl_fn *init_function(struct pci_dev *dev)
455{
456 struct ocxl_fn *fn;
457 int rc;
458
459 fn = alloc_function();
460 if (!fn)
461 return ERR_PTR(-ENOMEM);
462
463 rc = configure_function(fn, dev);
464 if (rc) {
465 free_function(fn);
466 return ERR_PTR(rc);
467 }
468
469 rc = device_register(&fn->dev);
470 if (rc) {
471 deconfigure_function(fn);
472 put_device(&fn->dev);
473 return ERR_PTR(rc);
474 }
475 return fn;
476}
477
478// Device detection & initialisation
479
480struct ocxl_fn *ocxl_function_open(struct pci_dev *dev)
481{
482 int rc, afu_count = 0;
483 u8 afu;
484 struct ocxl_fn *fn;
485
486 if (!radix_enabled()) {
487 dev_err(&dev->dev, "Unsupported memory model (hash)\n");
488 return ERR_PTR(-ENODEV);
489 }
490
491 fn = init_function(dev);
492 if (IS_ERR(fn)) {
493 dev_err(&dev->dev, "function init failed: %li\n",
494 PTR_ERR(fn));
495 return fn;
496 }
497
498 for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
499 rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
500 if (rc > 0) {
501 rc = init_afu(dev, fn, afu);
502 if (rc) {
503 dev_err(&dev->dev,
504 "Can't initialize AFU index %d\n", afu);
505 continue;
506 }
507 afu_count++;
508 }
509 }
510 dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
511 return fn;
512}
513EXPORT_SYMBOL_GPL(ocxl_function_open);
514
515struct list_head *ocxl_function_afu_list(struct ocxl_fn *fn)
516{
517 return &fn->afu_list;
518}
519EXPORT_SYMBOL_GPL(ocxl_function_afu_list);
520
521struct ocxl_afu *ocxl_function_fetch_afu(struct ocxl_fn *fn, u8 afu_idx)
522{
523 struct ocxl_afu *afu;
524
525 list_for_each_entry(afu, &fn->afu_list, list) {
526 if (afu->config.idx == afu_idx)
527 return afu;
528 }
529
530 return NULL;
531}
532EXPORT_SYMBOL_GPL(ocxl_function_fetch_afu);
533
534const struct ocxl_fn_config *ocxl_function_config(struct ocxl_fn *fn)
535{
536 return &fn->config;
537}
538EXPORT_SYMBOL_GPL(ocxl_function_config);
539
540void ocxl_function_close(struct ocxl_fn *fn)
541{
542 struct ocxl_afu *afu, *tmp;
543
544 list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
545 remove_afu(afu);
546 }
547
548 deconfigure_function(fn);
549 device_unregister(&fn->dev);
550}
551EXPORT_SYMBOL_GPL(ocxl_function_close);
552
553// AFU Metadata
554
555struct ocxl_afu_config *ocxl_afu_config(struct ocxl_afu *afu)
556{
557 return &afu->config;
558}
559EXPORT_SYMBOL_GPL(ocxl_afu_config);
560
561void ocxl_afu_set_private(struct ocxl_afu *afu, void *private)
562{
563 afu->private = private;
564}
565EXPORT_SYMBOL_GPL(ocxl_afu_set_private);
566
567void *ocxl_afu_get_private(struct ocxl_afu *afu)
568{
569 if (afu)
570 return afu->private;
571
572 return NULL;
573}
574EXPORT_SYMBOL_GPL(ocxl_afu_get_private);
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index e6a607488f8a..2870c25da166 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -3,6 +3,7 @@
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/poll.h> 4#include <linux/poll.h>
5#include <linux/sched/signal.h> 5#include <linux/sched/signal.h>
6#include <linux/eventfd.h>
6#include <linux/uaccess.h> 7#include <linux/uaccess.h>
7#include <uapi/misc/ocxl.h> 8#include <uapi/misc/ocxl.h>
8#include <asm/reg.h> 9#include <asm/reg.h>
@@ -17,70 +18,56 @@ static struct class *ocxl_class;
17static struct mutex minors_idr_lock; 18static struct mutex minors_idr_lock;
18static struct idr minors_idr; 19static struct idr minors_idr;
19 20
20static struct ocxl_afu *find_and_get_afu(dev_t devno) 21static struct ocxl_file_info *find_file_info(dev_t devno)
21{ 22{
22 struct ocxl_afu *afu; 23 struct ocxl_file_info *info;
23 int afu_minor;
24 24
25 afu_minor = MINOR(devno);
26 /* 25 /*
27 * We don't declare an RCU critical section here, as our AFU 26 * We don't declare an RCU critical section here, as our AFU
28 * is protected by a reference counter on the device. By the time the 27 * is protected by a reference counter on the device. By the time the
29 * minor number of a device is removed from the idr, the ref count of 28 * info reference is removed from the idr, the ref count of
30 * the device is already at 0, so no user API will access that AFU and 29 * the device is already at 0, so no user API will access that AFU and
31 * this function can't return it. 30 * this function can't return it.
32 */ 31 */
33 afu = idr_find(&minors_idr, afu_minor); 32 info = idr_find(&minors_idr, MINOR(devno));
34 if (afu) 33 return info;
35 ocxl_afu_get(afu);
36 return afu;
37} 34}
38 35
39static int allocate_afu_minor(struct ocxl_afu *afu) 36static int allocate_minor(struct ocxl_file_info *info)
40{ 37{
41 int minor; 38 int minor;
42 39
43 mutex_lock(&minors_idr_lock); 40 mutex_lock(&minors_idr_lock);
44 minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL); 41 minor = idr_alloc(&minors_idr, info, 0, OCXL_NUM_MINORS, GFP_KERNEL);
45 mutex_unlock(&minors_idr_lock); 42 mutex_unlock(&minors_idr_lock);
46 return minor; 43 return minor;
47} 44}
48 45
49static void free_afu_minor(struct ocxl_afu *afu) 46static void free_minor(struct ocxl_file_info *info)
50{ 47{
51 mutex_lock(&minors_idr_lock); 48 mutex_lock(&minors_idr_lock);
52 idr_remove(&minors_idr, MINOR(afu->dev.devt)); 49 idr_remove(&minors_idr, MINOR(info->dev.devt));
53 mutex_unlock(&minors_idr_lock); 50 mutex_unlock(&minors_idr_lock);
54} 51}
55 52
56static int afu_open(struct inode *inode, struct file *file) 53static int afu_open(struct inode *inode, struct file *file)
57{ 54{
58 struct ocxl_afu *afu; 55 struct ocxl_file_info *info;
59 struct ocxl_context *ctx; 56 struct ocxl_context *ctx;
60 int rc; 57 int rc;
61 58
62 pr_debug("%s for device %x\n", __func__, inode->i_rdev); 59 pr_debug("%s for device %x\n", __func__, inode->i_rdev);
63 60
64 afu = find_and_get_afu(inode->i_rdev); 61 info = find_file_info(inode->i_rdev);
65 if (!afu) 62 if (!info)
66 return -ENODEV; 63 return -ENODEV;
67 64
68 ctx = ocxl_context_alloc(); 65 rc = ocxl_context_alloc(&ctx, info->afu, inode->i_mapping);
69 if (!ctx) {
70 rc = -ENOMEM;
71 goto put_afu;
72 }
73
74 rc = ocxl_context_init(ctx, afu, inode->i_mapping);
75 if (rc) 66 if (rc)
76 goto put_afu; 67 return rc;
68
77 file->private_data = ctx; 69 file->private_data = ctx;
78 ocxl_afu_put(afu);
79 return 0; 70 return 0;
80
81put_afu:
82 ocxl_afu_put(afu);
83 return rc;
84} 71}
85 72
86static long afu_ioctl_attach(struct ocxl_context *ctx, 73static long afu_ioctl_attach(struct ocxl_context *ctx,
@@ -100,7 +87,7 @@ static long afu_ioctl_attach(struct ocxl_context *ctx,
100 return -EINVAL; 87 return -EINVAL;
101 88
102 amr = arg.amr & mfspr(SPRN_UAMOR); 89 amr = arg.amr & mfspr(SPRN_UAMOR);
103 rc = ocxl_context_attach(ctx, amr); 90 rc = ocxl_context_attach(ctx, amr, current->mm);
104 return rc; 91 return rc;
105} 92}
106 93
@@ -151,10 +138,9 @@ static long afu_ioctl_enable_p9_wait(struct ocxl_context *ctx,
151 mutex_unlock(&ctx->status_mutex); 138 mutex_unlock(&ctx->status_mutex);
152 139
153 if (status == ATTACHED) { 140 if (status == ATTACHED) {
154 int rc; 141 int rc = ocxl_link_update_pe(ctx->afu->fn->link,
155 struct link *link = ctx->afu->fn->link; 142 ctx->pasid, ctx->tidr);
156 143
157 rc = ocxl_link_update_pe(link, ctx->pasid, ctx->tidr);
158 if (rc) 144 if (rc)
159 return rc; 145 return rc;
160 } 146 }
@@ -198,18 +184,40 @@ static long afu_ioctl_get_features(struct ocxl_context *ctx,
198 x == OCXL_IOCTL_GET_FEATURES ? "GET_FEATURES" : \ 184 x == OCXL_IOCTL_GET_FEATURES ? "GET_FEATURES" : \
199 "UNKNOWN") 185 "UNKNOWN")
200 186
187static irqreturn_t irq_handler(void *private)
188{
189 struct eventfd_ctx *ev_ctx = private;
190
191 eventfd_signal(ev_ctx, 1);
192 return IRQ_HANDLED;
193}
194
195static void irq_free(void *private)
196{
197 struct eventfd_ctx *ev_ctx = private;
198
199 eventfd_ctx_put(ev_ctx);
200}
201
201static long afu_ioctl(struct file *file, unsigned int cmd, 202static long afu_ioctl(struct file *file, unsigned int cmd,
202 unsigned long args) 203 unsigned long args)
203{ 204{
204 struct ocxl_context *ctx = file->private_data; 205 struct ocxl_context *ctx = file->private_data;
205 struct ocxl_ioctl_irq_fd irq_fd; 206 struct ocxl_ioctl_irq_fd irq_fd;
207 struct eventfd_ctx *ev_ctx;
208 int irq_id;
206 u64 irq_offset; 209 u64 irq_offset;
207 long rc; 210 long rc;
211 bool closed;
208 212
209 pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid, 213 pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
210 CMD_STR(cmd)); 214 CMD_STR(cmd));
211 215
212 if (ctx->status == CLOSED) 216 mutex_lock(&ctx->status_mutex);
217 closed = (ctx->status == CLOSED);
218 mutex_unlock(&ctx->status_mutex);
219
220 if (closed)
213 return -EIO; 221 return -EIO;
214 222
215 switch (cmd) { 223 switch (cmd) {
@@ -219,12 +227,13 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
219 break; 227 break;
220 228
221 case OCXL_IOCTL_IRQ_ALLOC: 229 case OCXL_IOCTL_IRQ_ALLOC:
222 rc = ocxl_afu_irq_alloc(ctx, &irq_offset); 230 rc = ocxl_afu_irq_alloc(ctx, &irq_id);
223 if (!rc) { 231 if (!rc) {
232 irq_offset = ocxl_irq_id_to_offset(ctx, irq_id);
224 rc = copy_to_user((u64 __user *) args, &irq_offset, 233 rc = copy_to_user((u64 __user *) args, &irq_offset,
225 sizeof(irq_offset)); 234 sizeof(irq_offset));
226 if (rc) { 235 if (rc) {
227 ocxl_afu_irq_free(ctx, irq_offset); 236 ocxl_afu_irq_free(ctx, irq_id);
228 return -EFAULT; 237 return -EFAULT;
229 } 238 }
230 } 239 }
@@ -235,7 +244,8 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
235 sizeof(irq_offset)); 244 sizeof(irq_offset));
236 if (rc) 245 if (rc)
237 return -EFAULT; 246 return -EFAULT;
238 rc = ocxl_afu_irq_free(ctx, irq_offset); 247 irq_id = ocxl_irq_offset_to_id(ctx, irq_offset);
248 rc = ocxl_afu_irq_free(ctx, irq_id);
239 break; 249 break;
240 250
241 case OCXL_IOCTL_IRQ_SET_FD: 251 case OCXL_IOCTL_IRQ_SET_FD:
@@ -245,8 +255,11 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
245 return -EFAULT; 255 return -EFAULT;
246 if (irq_fd.reserved) 256 if (irq_fd.reserved)
247 return -EINVAL; 257 return -EINVAL;
248 rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset, 258 irq_id = ocxl_irq_offset_to_id(ctx, irq_fd.irq_offset);
249 irq_fd.eventfd); 259 ev_ctx = eventfd_ctx_fdget(irq_fd.eventfd);
260 if (IS_ERR(ev_ctx))
261 return PTR_ERR(ev_ctx);
262 rc = ocxl_irq_set_handler(ctx, irq_id, irq_handler, irq_free, ev_ctx);
250 break; 263 break;
251 264
252 case OCXL_IOCTL_GET_METADATA: 265 case OCXL_IOCTL_GET_METADATA:
@@ -469,39 +482,102 @@ static const struct file_operations ocxl_afu_fops = {
469 .release = afu_release, 482 .release = afu_release,
470}; 483};
471 484
472int ocxl_create_cdev(struct ocxl_afu *afu) 485// Free the info struct
486static void info_release(struct device *dev)
487{
488 struct ocxl_file_info *info = container_of(dev, struct ocxl_file_info, dev);
489
490 free_minor(info);
491 ocxl_afu_put(info->afu);
492 kfree(info);
493}
494
495static int ocxl_file_make_visible(struct ocxl_file_info *info)
473{ 496{
474 int rc; 497 int rc;
475 498
476 cdev_init(&afu->cdev, &ocxl_afu_fops); 499 cdev_init(&info->cdev, &ocxl_afu_fops);
477 rc = cdev_add(&afu->cdev, afu->dev.devt, 1); 500 rc = cdev_add(&info->cdev, info->dev.devt, 1);
478 if (rc) { 501 if (rc) {
479 dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc); 502 dev_err(&info->dev, "Unable to add afu char device: %d\n", rc);
480 return rc; 503 return rc;
481 } 504 }
505
482 return 0; 506 return 0;
483} 507}
484 508
485void ocxl_destroy_cdev(struct ocxl_afu *afu) 509static void ocxl_file_make_invisible(struct ocxl_file_info *info)
486{ 510{
487 cdev_del(&afu->cdev); 511 cdev_del(&info->cdev);
488} 512}
489 513
490int ocxl_register_afu(struct ocxl_afu *afu) 514int ocxl_file_register_afu(struct ocxl_afu *afu)
491{ 515{
492 int minor; 516 int minor;
517 int rc;
518 struct ocxl_file_info *info;
519 struct ocxl_fn *fn = afu->fn;
520 struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
493 521
494 minor = allocate_afu_minor(afu); 522 info = kzalloc(sizeof(*info), GFP_KERNEL);
495 if (minor < 0) 523 if (info == NULL)
524 return -ENOMEM;
525
526 minor = allocate_minor(info);
527 if (minor < 0) {
528 kfree(info);
496 return minor; 529 return minor;
497 afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor); 530 }
498 afu->dev.class = ocxl_class; 531
499 return device_register(&afu->dev); 532 info->dev.parent = &fn->dev;
533 info->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
534 info->dev.class = ocxl_class;
535 info->dev.release = info_release;
536
537 info->afu = afu;
538 ocxl_afu_get(afu);
539
540 rc = dev_set_name(&info->dev, "%s.%s.%hhu",
541 afu->config.name, dev_name(&pci_dev->dev), afu->config.idx);
542 if (rc)
543 goto err_put;
544
545 rc = device_register(&info->dev);
546 if (rc)
547 goto err_put;
548
549 rc = ocxl_sysfs_register_afu(info);
550 if (rc)
551 goto err_unregister;
552
553 rc = ocxl_file_make_visible(info);
554 if (rc)
555 goto err_unregister;
556
557 ocxl_afu_set_private(afu, info);
558
559 return 0;
560
561err_unregister:
562 ocxl_sysfs_unregister_afu(info); // safe to call even if register failed
563 device_unregister(&info->dev);
564err_put:
565 ocxl_afu_put(afu);
566 free_minor(info);
567 kfree(info);
568 return rc;
500} 569}
501 570
502void ocxl_unregister_afu(struct ocxl_afu *afu) 571void ocxl_file_unregister_afu(struct ocxl_afu *afu)
503{ 572{
504 free_afu_minor(afu); 573 struct ocxl_file_info *info = ocxl_afu_get_private(afu);
574
575 if (!info)
576 return;
577
578 ocxl_file_make_invisible(info);
579 ocxl_sysfs_unregister_afu(info);
580 device_unregister(&info->dev);
505} 581}
506 582
507static char *ocxl_devnode(struct device *dev, umode_t *mode) 583static char *ocxl_devnode(struct device *dev, umode_t *mode)
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index d50b861d7e57..cce5b0d64505 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -76,7 +76,7 @@ struct spa {
76 * limited number of opencapi slots on a system and lookup is only 76 * limited number of opencapi slots on a system and lookup is only
77 * done when the device is probed 77 * done when the device is probed
78 */ 78 */
79struct link { 79struct ocxl_link {
80 struct list_head list; 80 struct list_head list;
81 struct kref ref; 81 struct kref ref;
82 int domain; 82 int domain;
@@ -163,7 +163,7 @@ static void xsl_fault_handler_bh(struct work_struct *fault_work)
163 if (fault->dsisr & SPA_XSL_S) 163 if (fault->dsisr & SPA_XSL_S)
164 access |= _PAGE_WRITE; 164 access |= _PAGE_WRITE;
165 165
166 if (REGION_ID(fault->dar) != USER_REGION_ID) 166 if (get_region_id(fault->dar) != USER_REGION_ID)
167 access |= _PAGE_PRIVILEGED; 167 access |= _PAGE_PRIVILEGED;
168 168
169 local_irq_save(flags); 169 local_irq_save(flags);
@@ -179,12 +179,12 @@ ack:
179 179
180static irqreturn_t xsl_fault_handler(int irq, void *data) 180static irqreturn_t xsl_fault_handler(int irq, void *data)
181{ 181{
182 struct link *link = (struct link *) data; 182 struct ocxl_link *link = (struct ocxl_link *) data;
183 struct spa *spa = link->spa; 183 struct spa *spa = link->spa;
184 u64 dsisr, dar, pe_handle; 184 u64 dsisr, dar, pe_handle;
185 struct pe_data *pe_data; 185 struct pe_data *pe_data;
186 struct ocxl_process_element *pe; 186 struct ocxl_process_element *pe;
187 int lpid, pid, tid; 187 int pid;
188 bool schedule = false; 188 bool schedule = false;
189 189
190 read_irq(spa, &dsisr, &dar, &pe_handle); 190 read_irq(spa, &dsisr, &dar, &pe_handle);
@@ -192,9 +192,7 @@ static irqreturn_t xsl_fault_handler(int irq, void *data)
192 192
193 WARN_ON(pe_handle > SPA_PE_MASK); 193 WARN_ON(pe_handle > SPA_PE_MASK);
194 pe = spa->spa_mem + pe_handle; 194 pe = spa->spa_mem + pe_handle;
195 lpid = be32_to_cpu(pe->lpid);
196 pid = be32_to_cpu(pe->pid); 195 pid = be32_to_cpu(pe->pid);
197 tid = be32_to_cpu(pe->tid);
198 /* We could be reading all null values here if the PE is being 196 /* We could be reading all null values here if the PE is being
199 * removed while an interrupt kicks in. It's not supposed to 197 * removed while an interrupt kicks in. It's not supposed to
200 * happen if the driver notified the AFU to terminate the 198 * happen if the driver notified the AFU to terminate the
@@ -256,7 +254,7 @@ static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
256 &spa->reg_tfc, &spa->reg_pe_handle); 254 &spa->reg_tfc, &spa->reg_pe_handle);
257} 255}
258 256
259static int setup_xsl_irq(struct pci_dev *dev, struct link *link) 257static int setup_xsl_irq(struct pci_dev *dev, struct ocxl_link *link)
260{ 258{
261 struct spa *spa = link->spa; 259 struct spa *spa = link->spa;
262 int rc; 260 int rc;
@@ -311,7 +309,7 @@ err_xsl:
311 return rc; 309 return rc;
312} 310}
313 311
314static void release_xsl_irq(struct link *link) 312static void release_xsl_irq(struct ocxl_link *link)
315{ 313{
316 struct spa *spa = link->spa; 314 struct spa *spa = link->spa;
317 315
@@ -323,7 +321,7 @@ static void release_xsl_irq(struct link *link)
323 unmap_irq_registers(spa); 321 unmap_irq_registers(spa);
324} 322}
325 323
326static int alloc_spa(struct pci_dev *dev, struct link *link) 324static int alloc_spa(struct pci_dev *dev, struct ocxl_link *link)
327{ 325{
328 struct spa *spa; 326 struct spa *spa;
329 327
@@ -350,7 +348,7 @@ static int alloc_spa(struct pci_dev *dev, struct link *link)
350 return 0; 348 return 0;
351} 349}
352 350
353static void free_spa(struct link *link) 351static void free_spa(struct ocxl_link *link)
354{ 352{
355 struct spa *spa = link->spa; 353 struct spa *spa = link->spa;
356 354
@@ -364,12 +362,12 @@ static void free_spa(struct link *link)
364 } 362 }
365} 363}
366 364
367static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link) 365static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_link)
368{ 366{
369 struct link *link; 367 struct ocxl_link *link;
370 int rc; 368 int rc;
371 369
372 link = kzalloc(sizeof(struct link), GFP_KERNEL); 370 link = kzalloc(sizeof(struct ocxl_link), GFP_KERNEL);
373 if (!link) 371 if (!link)
374 return -ENOMEM; 372 return -ENOMEM;
375 373
@@ -405,7 +403,7 @@ err_free:
405 return rc; 403 return rc;
406} 404}
407 405
408static void free_link(struct link *link) 406static void free_link(struct ocxl_link *link)
409{ 407{
410 release_xsl_irq(link); 408 release_xsl_irq(link);
411 free_spa(link); 409 free_spa(link);
@@ -415,7 +413,7 @@ static void free_link(struct link *link)
415int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle) 413int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
416{ 414{
417 int rc = 0; 415 int rc = 0;
418 struct link *link; 416 struct ocxl_link *link;
419 417
420 mutex_lock(&links_list_lock); 418 mutex_lock(&links_list_lock);
421 list_for_each_entry(link, &links_list, list) { 419 list_for_each_entry(link, &links_list, list) {
@@ -442,7 +440,7 @@ EXPORT_SYMBOL_GPL(ocxl_link_setup);
442 440
443static void release_xsl(struct kref *ref) 441static void release_xsl(struct kref *ref)
444{ 442{
445 struct link *link = container_of(ref, struct link, ref); 443 struct ocxl_link *link = container_of(ref, struct ocxl_link, ref);
446 444
447 list_del(&link->list); 445 list_del(&link->list);
448 /* call platform code before releasing data */ 446 /* call platform code before releasing data */
@@ -452,7 +450,7 @@ static void release_xsl(struct kref *ref)
452 450
453void ocxl_link_release(struct pci_dev *dev, void *link_handle) 451void ocxl_link_release(struct pci_dev *dev, void *link_handle)
454{ 452{
455 struct link *link = (struct link *) link_handle; 453 struct ocxl_link *link = (struct ocxl_link *) link_handle;
456 454
457 mutex_lock(&links_list_lock); 455 mutex_lock(&links_list_lock);
458 kref_put(&link->ref, release_xsl); 456 kref_put(&link->ref, release_xsl);
@@ -488,7 +486,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
488 void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), 486 void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
489 void *xsl_err_data) 487 void *xsl_err_data)
490{ 488{
491 struct link *link = (struct link *) link_handle; 489 struct ocxl_link *link = (struct ocxl_link *) link_handle;
492 struct spa *spa = link->spa; 490 struct spa *spa = link->spa;
493 struct ocxl_process_element *pe; 491 struct ocxl_process_element *pe;
494 int pe_handle, rc = 0; 492 int pe_handle, rc = 0;
@@ -558,7 +556,7 @@ EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
558 556
559int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid) 557int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
560{ 558{
561 struct link *link = (struct link *) link_handle; 559 struct ocxl_link *link = (struct ocxl_link *) link_handle;
562 struct spa *spa = link->spa; 560 struct spa *spa = link->spa;
563 struct ocxl_process_element *pe; 561 struct ocxl_process_element *pe;
564 int pe_handle, rc; 562 int pe_handle, rc;
@@ -594,7 +592,7 @@ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
594 592
595int ocxl_link_remove_pe(void *link_handle, int pasid) 593int ocxl_link_remove_pe(void *link_handle, int pasid)
596{ 594{
597 struct link *link = (struct link *) link_handle; 595 struct ocxl_link *link = (struct ocxl_link *) link_handle;
598 struct spa *spa = link->spa; 596 struct spa *spa = link->spa;
599 struct ocxl_process_element *pe; 597 struct ocxl_process_element *pe;
600 struct pe_data *pe_data; 598 struct pe_data *pe_data;
@@ -666,7 +664,7 @@ EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
666 664
667int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) 665int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
668{ 666{
669 struct link *link = (struct link *) link_handle; 667 struct ocxl_link *link = (struct ocxl_link *) link_handle;
670 int rc, irq; 668 int rc, irq;
671 u64 addr; 669 u64 addr;
672 670
@@ -687,7 +685,7 @@ EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
687 685
688void ocxl_link_free_irq(void *link_handle, int hw_irq) 686void ocxl_link_free_irq(void *link_handle, int hw_irq)
689{ 687{
690 struct link *link = (struct link *) link_handle; 688 struct ocxl_link *link = (struct ocxl_link *) link_handle;
691 689
692 pnv_ocxl_free_xive_irq(hw_irq); 690 pnv_ocxl_free_xive_irq(hw_irq);
693 atomic_inc(&link->irq_available); 691 atomic_inc(&link->irq_available);
diff --git a/drivers/misc/ocxl/mmio.c b/drivers/misc/ocxl/mmio.c
new file mode 100644
index 000000000000..aae713db4ebe
--- /dev/null
+++ b/drivers/misc/ocxl/mmio.c
@@ -0,0 +1,234 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2019 IBM Corp.
3#include <linux/sched/mm.h>
4#include "trace.h"
5#include "ocxl_internal.h"
6
7int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset,
8 enum ocxl_endian endian, u32 *val)
9{
10 if (offset > afu->config.global_mmio_size - 4)
11 return -EINVAL;
12
13#ifdef __BIG_ENDIAN__
14 if (endian == OCXL_HOST_ENDIAN)
15 endian = OCXL_BIG_ENDIAN;
16#endif
17
18 switch (endian) {
19 case OCXL_BIG_ENDIAN:
20 *val = readl_be((char *)afu->global_mmio_ptr + offset);
21 break;
22
23 default:
24 *val = readl((char *)afu->global_mmio_ptr + offset);
25 break;
26 }
27
28 return 0;
29}
30EXPORT_SYMBOL_GPL(ocxl_global_mmio_read32);
31
32int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset,
33 enum ocxl_endian endian, u64 *val)
34{
35 if (offset > afu->config.global_mmio_size - 8)
36 return -EINVAL;
37
38#ifdef __BIG_ENDIAN__
39 if (endian == OCXL_HOST_ENDIAN)
40 endian = OCXL_BIG_ENDIAN;
41#endif
42
43 switch (endian) {
44 case OCXL_BIG_ENDIAN:
45 *val = readq_be((char *)afu->global_mmio_ptr + offset);
46 break;
47
48 default:
49 *val = readq((char *)afu->global_mmio_ptr + offset);
50 break;
51 }
52
53 return 0;
54}
55EXPORT_SYMBOL_GPL(ocxl_global_mmio_read64);
56
57int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset,
58 enum ocxl_endian endian, u32 val)
59{
60 if (offset > afu->config.global_mmio_size - 4)
61 return -EINVAL;
62
63#ifdef __BIG_ENDIAN__
64 if (endian == OCXL_HOST_ENDIAN)
65 endian = OCXL_BIG_ENDIAN;
66#endif
67
68 switch (endian) {
69 case OCXL_BIG_ENDIAN:
70 writel_be(val, (char *)afu->global_mmio_ptr + offset);
71 break;
72
73 default:
74 writel(val, (char *)afu->global_mmio_ptr + offset);
75 break;
76 }
77
78
79 return 0;
80}
81EXPORT_SYMBOL_GPL(ocxl_global_mmio_write32);
82
83int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset,
84 enum ocxl_endian endian, u64 val)
85{
86 if (offset > afu->config.global_mmio_size - 8)
87 return -EINVAL;
88
89#ifdef __BIG_ENDIAN__
90 if (endian == OCXL_HOST_ENDIAN)
91 endian = OCXL_BIG_ENDIAN;
92#endif
93
94 switch (endian) {
95 case OCXL_BIG_ENDIAN:
96 writeq_be(val, (char *)afu->global_mmio_ptr + offset);
97 break;
98
99 default:
100 writeq(val, (char *)afu->global_mmio_ptr + offset);
101 break;
102 }
103
104
105 return 0;
106}
107EXPORT_SYMBOL_GPL(ocxl_global_mmio_write64);
108
109int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset,
110 enum ocxl_endian endian, u32 mask)
111{
112 u32 tmp;
113
114 if (offset > afu->config.global_mmio_size - 4)
115 return -EINVAL;
116
117#ifdef __BIG_ENDIAN__
118 if (endian == OCXL_HOST_ENDIAN)
119 endian = OCXL_BIG_ENDIAN;
120#endif
121
122 switch (endian) {
123 case OCXL_BIG_ENDIAN:
124 tmp = readl_be((char *)afu->global_mmio_ptr + offset);
125 tmp |= mask;
126 writel_be(tmp, (char *)afu->global_mmio_ptr + offset);
127 break;
128
129 default:
130 tmp = readl((char *)afu->global_mmio_ptr + offset);
131 tmp |= mask;
132 writel(tmp, (char *)afu->global_mmio_ptr + offset);
133 break;
134 }
135
136 return 0;
137}
138EXPORT_SYMBOL_GPL(ocxl_global_mmio_set32);
139
140int ocxl_global_mmio_set64(struct ocxl_afu *afu, size_t offset,
141 enum ocxl_endian endian, u64 mask)
142{
143 u64 tmp;
144
145 if (offset > afu->config.global_mmio_size - 8)
146 return -EINVAL;
147
148#ifdef __BIG_ENDIAN__
149 if (endian == OCXL_HOST_ENDIAN)
150 endian = OCXL_BIG_ENDIAN;
151#endif
152
153 switch (endian) {
154 case OCXL_BIG_ENDIAN:
155 tmp = readq_be((char *)afu->global_mmio_ptr + offset);
156 tmp |= mask;
157 writeq_be(tmp, (char *)afu->global_mmio_ptr + offset);
158 break;
159
160 default:
161 tmp = readq((char *)afu->global_mmio_ptr + offset);
162 tmp |= mask;
163 writeq(tmp, (char *)afu->global_mmio_ptr + offset);
164 break;
165 }
166
167 return 0;
168}
169EXPORT_SYMBOL_GPL(ocxl_global_mmio_set64);
170
171int ocxl_global_mmio_clear32(struct ocxl_afu *afu, size_t offset,
172 enum ocxl_endian endian, u32 mask)
173{
174 u32 tmp;
175
176 if (offset > afu->config.global_mmio_size - 4)
177 return -EINVAL;
178
179#ifdef __BIG_ENDIAN__
180 if (endian == OCXL_HOST_ENDIAN)
181 endian = OCXL_BIG_ENDIAN;
182#endif
183
184 switch (endian) {
185 case OCXL_BIG_ENDIAN:
186 tmp = readl_be((char *)afu->global_mmio_ptr + offset);
187 tmp &= ~mask;
188 writel_be(tmp, (char *)afu->global_mmio_ptr + offset);
189 break;
190
191 default:
192 tmp = readl((char *)afu->global_mmio_ptr + offset);
193 tmp &= ~mask;
194 writel(tmp, (char *)afu->global_mmio_ptr + offset);
195 break;
196 }
197
198
199 return 0;
200}
201EXPORT_SYMBOL_GPL(ocxl_global_mmio_clear32);
202
203int ocxl_global_mmio_clear64(struct ocxl_afu *afu, size_t offset,
204 enum ocxl_endian endian, u64 mask)
205{
206 u64 tmp;
207
208 if (offset > afu->config.global_mmio_size - 8)
209 return -EINVAL;
210
211#ifdef __BIG_ENDIAN__
212 if (endian == OCXL_HOST_ENDIAN)
213 endian = OCXL_BIG_ENDIAN;
214#endif
215
216 switch (endian) {
217 case OCXL_BIG_ENDIAN:
218 tmp = readq_be((char *)afu->global_mmio_ptr + offset);
219 tmp &= ~mask;
220 writeq_be(tmp, (char *)afu->global_mmio_ptr + offset);
221 break;
222
223 default:
224 tmp = readq((char *)afu->global_mmio_ptr + offset);
225 tmp &= ~mask;
226 writeq(tmp, (char *)afu->global_mmio_ptr + offset);
227 break;
228 }
229
230 writeq(tmp, (char *)afu->global_mmio_ptr + offset);
231
232 return 0;
233}
234EXPORT_SYMBOL_GPL(ocxl_global_mmio_clear64);
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
index a32f2151029f..97415afd79f3 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -11,12 +11,8 @@
11#define MAX_IRQ_PER_LINK 2000 11#define MAX_IRQ_PER_LINK 2000
12#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK 12#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK
13 13
14#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev)
15#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev)
16
17extern struct pci_driver ocxl_pci_driver; 14extern struct pci_driver ocxl_pci_driver;
18 15
19
20struct ocxl_fn { 16struct ocxl_fn {
21 struct device dev; 17 struct device dev;
22 int bar_used[3]; 18 int bar_used[3];
@@ -31,11 +27,17 @@ struct ocxl_fn {
31 void *link; 27 void *link;
32}; 28};
33 29
30struct ocxl_file_info {
31 struct ocxl_afu *afu;
32 struct device dev;
33 struct cdev cdev;
34 struct bin_attribute attr_global_mmio;
35};
36
34struct ocxl_afu { 37struct ocxl_afu {
38 struct kref kref;
35 struct ocxl_fn *fn; 39 struct ocxl_fn *fn;
36 struct list_head list; 40 struct list_head list;
37 struct device dev;
38 struct cdev cdev;
39 struct ocxl_afu_config config; 41 struct ocxl_afu_config config;
40 int pasid_base; 42 int pasid_base;
41 int pasid_count; /* opened contexts */ 43 int pasid_count; /* opened contexts */
@@ -49,7 +51,7 @@ struct ocxl_afu {
49 u64 irq_base_offset; 51 u64 irq_base_offset;
50 void __iomem *global_mmio_ptr; 52 void __iomem *global_mmio_ptr;
51 u64 pp_mmio_start; 53 u64 pp_mmio_start;
52 struct bin_attribute attr_global_mmio; 54 void *private;
53}; 55};
54 56
55enum ocxl_context_status { 57enum ocxl_context_status {
@@ -92,41 +94,51 @@ struct ocxl_process_element {
92 __be32 software_state; 94 __be32 software_state;
93}; 95};
94 96
97int ocxl_create_cdev(struct ocxl_afu *afu);
98void ocxl_destroy_cdev(struct ocxl_afu *afu);
99int ocxl_file_register_afu(struct ocxl_afu *afu);
100void ocxl_file_unregister_afu(struct ocxl_afu *afu);
101
102int ocxl_file_init(void);
103void ocxl_file_exit(void);
104
105int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
106void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
107int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
108void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
109
110/*
111 * Get the max PASID value that can be used by the function
112 */
113int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
114
115/*
116 * Check if an AFU index is valid for the given function.
117 *
118 * AFU indexes can be sparse, so a driver should check all indexes up
119 * to the maximum found in the function description
120 */
121int ocxl_config_check_afu_index(struct pci_dev *dev,
122 struct ocxl_fn_config *fn, int afu_idx);
123
124/**
125 * Update values within a Process Element
126 *
127 * link_handle: the link handle associated with the process element
128 * pasid: the PASID for the AFU context
129 * tid: the new thread id for the process element
130 */
131int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
132
133int ocxl_context_mmap(struct ocxl_context *ctx,
134 struct vm_area_struct *vma);
135void ocxl_context_detach_all(struct ocxl_afu *afu);
95 136
96extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu); 137int ocxl_sysfs_register_afu(struct ocxl_file_info *info);
97extern void ocxl_afu_put(struct ocxl_afu *afu); 138void ocxl_sysfs_unregister_afu(struct ocxl_file_info *info);
98
99extern int ocxl_create_cdev(struct ocxl_afu *afu);
100extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
101extern int ocxl_register_afu(struct ocxl_afu *afu);
102extern void ocxl_unregister_afu(struct ocxl_afu *afu);
103
104extern int ocxl_file_init(void);
105extern void ocxl_file_exit(void);
106
107extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
108extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
109extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
110extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
111 139
112extern struct ocxl_context *ocxl_context_alloc(void); 140int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset);
113extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, 141u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int irq_id);
114 struct address_space *mapping); 142void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
115extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
116extern int ocxl_context_mmap(struct ocxl_context *ctx,
117 struct vm_area_struct *vma);
118extern int ocxl_context_detach(struct ocxl_context *ctx);
119extern void ocxl_context_detach_all(struct ocxl_afu *afu);
120extern void ocxl_context_free(struct ocxl_context *ctx);
121
122extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
123extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
124
125extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
126extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
127extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
128extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
129 int eventfd);
130extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
131 143
132#endif /* _OCXL_INTERNAL_H_ */ 144#endif /* _OCXL_INTERNAL_H_ */
diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
index 21f425472a82..f2a3ef4b9bdd 100644
--- a/drivers/misc/ocxl/pci.c
+++ b/drivers/misc/ocxl/pci.c
@@ -1,9 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0+ 1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp. 2// Copyright 2019 IBM Corp.
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/pci.h>
5#include <linux/idr.h>
6#include <asm/pnv-ocxl.h>
7#include "ocxl_internal.h" 4#include "ocxl_internal.h"
8 5
9/* 6/*
@@ -17,563 +14,47 @@ static const struct pci_device_id ocxl_pci_tbl[] = {
17}; 14};
18MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl); 15MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
19 16
20 17static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
21static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
22{
23 return (get_device(&fn->dev) == NULL) ? NULL : fn;
24}
25
26static void ocxl_fn_put(struct ocxl_fn *fn)
27{
28 put_device(&fn->dev);
29}
30
31struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
32{
33 return (get_device(&afu->dev) == NULL) ? NULL : afu;
34}
35
36void ocxl_afu_put(struct ocxl_afu *afu)
37{
38 put_device(&afu->dev);
39}
40
41static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
42{
43 struct ocxl_afu *afu;
44
45 afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
46 if (!afu)
47 return NULL;
48
49 mutex_init(&afu->contexts_lock);
50 mutex_init(&afu->afu_control_lock);
51 idr_init(&afu->contexts_idr);
52 afu->fn = fn;
53 ocxl_fn_get(fn);
54 return afu;
55}
56
57static void free_afu(struct ocxl_afu *afu)
58{
59 idr_destroy(&afu->contexts_idr);
60 ocxl_fn_put(afu->fn);
61 kfree(afu);
62}
63
64static void free_afu_dev(struct device *dev)
65{
66 struct ocxl_afu *afu = to_ocxl_afu(dev);
67
68 ocxl_unregister_afu(afu);
69 free_afu(afu);
70}
71
72static int set_afu_device(struct ocxl_afu *afu, const char *location)
73{
74 struct ocxl_fn *fn = afu->fn;
75 int rc;
76
77 afu->dev.parent = &fn->dev;
78 afu->dev.release = free_afu_dev;
79 rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
80 afu->config.idx);
81 return rc;
82}
83
84static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
85{
86 struct ocxl_fn *fn = afu->fn;
87 int actag_count, actag_offset;
88
89 /*
90 * if there were not enough actags for the function, each afu
91 * reduces its count as well
92 */
93 actag_count = afu->config.actag_supported *
94 fn->actag_enabled / fn->actag_supported;
95 actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
96 if (actag_offset < 0) {
97 dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
98 actag_count, actag_offset);
99 return actag_offset;
100 }
101 afu->actag_base = fn->actag_base + actag_offset;
102 afu->actag_enabled = actag_count;
103
104 ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
105 afu->actag_base, afu->actag_enabled);
106 dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
107 afu->actag_base, afu->actag_enabled);
108 return 0;
109}
110
111static void reclaim_afu_actag(struct ocxl_afu *afu)
112{
113 struct ocxl_fn *fn = afu->fn;
114 int start_offset, size;
115
116 start_offset = afu->actag_base - fn->actag_base;
117 size = afu->actag_enabled;
118 ocxl_actag_afu_free(afu->fn, start_offset, size);
119}
120
121static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
122{
123 struct ocxl_fn *fn = afu->fn;
124 int pasid_count, pasid_offset;
125
126 /*
127 * We only support the case where the function configuration
128 * requested enough PASIDs to cover all AFUs.
129 */
130 pasid_count = 1 << afu->config.pasid_supported_log;
131 pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
132 if (pasid_offset < 0) {
133 dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
134 pasid_count, pasid_offset);
135 return pasid_offset;
136 }
137 afu->pasid_base = fn->pasid_base + pasid_offset;
138 afu->pasid_count = 0;
139 afu->pasid_max = pasid_count;
140
141 ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,
142 afu->pasid_base,
143 afu->config.pasid_supported_log);
144 dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
145 afu->pasid_base, pasid_count);
146 return 0;
147}
148
149static void reclaim_afu_pasid(struct ocxl_afu *afu)
150{
151 struct ocxl_fn *fn = afu->fn;
152 int start_offset, size;
153
154 start_offset = afu->pasid_base - fn->pasid_base;
155 size = 1 << afu->config.pasid_supported_log;
156 ocxl_pasid_afu_free(afu->fn, start_offset, size);
157}
158
159static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
160{
161 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
162 int rc, idx;
163
164 if (bar != 0 && bar != 2 && bar != 4)
165 return -EINVAL;
166
167 idx = bar >> 1;
168 if (fn->bar_used[idx]++ == 0) {
169 rc = pci_request_region(dev, bar, "ocxl");
170 if (rc)
171 return rc;
172 }
173 return 0;
174}
175
176static void release_fn_bar(struct ocxl_fn *fn, int bar)
177{
178 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
179 int idx;
180
181 if (bar != 0 && bar != 2 && bar != 4)
182 return;
183
184 idx = bar >> 1;
185 if (--fn->bar_used[idx] == 0)
186 pci_release_region(dev, bar);
187 WARN_ON(fn->bar_used[idx] < 0);
188}
189
190static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)
191{
192 int rc;
193
194 rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
195 if (rc)
196 return rc;
197
198 rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
199 if (rc) {
200 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
201 return rc;
202 }
203
204 afu->global_mmio_start =
205 pci_resource_start(dev, afu->config.global_mmio_bar) +
206 afu->config.global_mmio_offset;
207 afu->pp_mmio_start =
208 pci_resource_start(dev, afu->config.pp_mmio_bar) +
209 afu->config.pp_mmio_offset;
210
211 afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
212 afu->config.global_mmio_size);
213 if (!afu->global_mmio_ptr) {
214 release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
215 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
216 dev_err(&dev->dev, "Error mapping global mmio area\n");
217 return -ENOMEM;
218 }
219
220 /*
221 * Leave an empty page between the per-process mmio area and
222 * the AFU interrupt mappings
223 */
224 afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
225 return 0;
226}
227
228static void unmap_mmio_areas(struct ocxl_afu *afu)
229{
230 if (afu->global_mmio_ptr) {
231 iounmap(afu->global_mmio_ptr);
232 afu->global_mmio_ptr = NULL;
233 }
234 afu->global_mmio_start = 0;
235 afu->pp_mmio_start = 0;
236 release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
237 release_fn_bar(afu->fn, afu->config.global_mmio_bar);
238}
239
240static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
241{
242 int rc;
243
244 rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
245 if (rc)
246 return rc;
247
248 rc = set_afu_device(afu, dev_name(&dev->dev));
249 if (rc)
250 return rc;
251
252 rc = assign_afu_actag(afu, dev);
253 if (rc)
254 return rc;
255
256 rc = assign_afu_pasid(afu, dev);
257 if (rc) {
258 reclaim_afu_actag(afu);
259 return rc;
260 }
261
262 rc = map_mmio_areas(afu, dev);
263 if (rc) {
264 reclaim_afu_pasid(afu);
265 reclaim_afu_actag(afu);
266 return rc;
267 }
268 return 0;
269}
270
271static void deconfigure_afu(struct ocxl_afu *afu)
272{
273 unmap_mmio_areas(afu);
274 reclaim_afu_pasid(afu);
275 reclaim_afu_actag(afu);
276}
277
278static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
279{
280 int rc;
281
282 ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
283 /*
284 * Char device creation is the last step, as processes can
285 * call our driver immediately, so all our inits must be finished.
286 */
287 rc = ocxl_create_cdev(afu);
288 if (rc)
289 return rc;
290 return 0;
291}
292
293static void deactivate_afu(struct ocxl_afu *afu)
294{
295 struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
296
297 ocxl_destroy_cdev(afu);
298 ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
299}
300
301static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
302{ 18{
303 int rc; 19 int rc;
304 struct ocxl_afu *afu; 20 struct ocxl_afu *afu, *tmp;
305
306 afu = alloc_afu(fn);
307 if (!afu)
308 return -ENOMEM;
309
310 rc = configure_afu(afu, afu_idx, dev);
311 if (rc) {
312 free_afu(afu);
313 return rc;
314 }
315
316 rc = ocxl_register_afu(afu);
317 if (rc)
318 goto err;
319
320 rc = ocxl_sysfs_add_afu(afu);
321 if (rc)
322 goto err;
323
324 rc = activate_afu(dev, afu);
325 if (rc)
326 goto err_sys;
327
328 list_add_tail(&afu->list, &fn->afu_list);
329 return 0;
330
331err_sys:
332 ocxl_sysfs_remove_afu(afu);
333err:
334 deconfigure_afu(afu);
335 device_unregister(&afu->dev);
336 return rc;
337}
338
339static void remove_afu(struct ocxl_afu *afu)
340{
341 list_del(&afu->list);
342 ocxl_context_detach_all(afu);
343 deactivate_afu(afu);
344 ocxl_sysfs_remove_afu(afu);
345 deconfigure_afu(afu);
346 device_unregister(&afu->dev);
347}
348
349static struct ocxl_fn *alloc_function(struct pci_dev *dev)
350{
351 struct ocxl_fn *fn; 21 struct ocxl_fn *fn;
22 struct list_head *afu_list;
352 23
353 fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL); 24 fn = ocxl_function_open(dev);
354 if (!fn) 25 if (IS_ERR(fn))
355 return NULL; 26 return PTR_ERR(fn);
356
357 INIT_LIST_HEAD(&fn->afu_list);
358 INIT_LIST_HEAD(&fn->pasid_list);
359 INIT_LIST_HEAD(&fn->actag_list);
360 return fn;
361}
362
363static void free_function(struct ocxl_fn *fn)
364{
365 WARN_ON(!list_empty(&fn->afu_list));
366 WARN_ON(!list_empty(&fn->pasid_list));
367 kfree(fn);
368}
369
370static void free_function_dev(struct device *dev)
371{
372 struct ocxl_fn *fn = to_ocxl_function(dev);
373
374 free_function(fn);
375}
376
377static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
378{
379 int rc;
380 27
381 fn->dev.parent = &dev->dev;
382 fn->dev.release = free_function_dev;
383 rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
384 if (rc)
385 return rc;
386 pci_set_drvdata(dev, fn); 28 pci_set_drvdata(dev, fn);
387 return 0;
388}
389
390static int assign_function_actag(struct ocxl_fn *fn)
391{
392 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
393 u16 base, enabled, supported;
394 int rc;
395
396 rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
397 if (rc)
398 return rc;
399
400 fn->actag_base = base;
401 fn->actag_enabled = enabled;
402 fn->actag_supported = supported;
403
404 ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
405 fn->actag_base, fn->actag_enabled);
406 dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
407 fn->actag_base, fn->actag_enabled);
408 return 0;
409}
410 29
411static int set_function_pasid(struct ocxl_fn *fn) 30 afu_list = ocxl_function_afu_list(fn);
412{
413 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
414 int rc, desired_count, max_count;
415
416 /* A function may not require any PASID */
417 if (fn->config.max_pasid_log < 0)
418 return 0;
419
420 rc = ocxl_config_get_pasid_info(dev, &max_count);
421 if (rc)
422 return rc;
423
424 desired_count = 1 << fn->config.max_pasid_log;
425 31
426 if (desired_count > max_count) { 32 list_for_each_entry_safe(afu, tmp, afu_list, list) {
427 dev_err(&fn->dev, 33 // Cleanup handled within ocxl_file_register_afu()
428 "Function requires more PASIDs than is available (%d vs. %d)\n", 34 rc = ocxl_file_register_afu(afu);
429 desired_count, max_count); 35 if (rc) {
430 return -ENOSPC; 36 dev_err(&dev->dev, "Failed to register AFU '%s' index %d",
37 afu->config.name, afu->config.idx);
38 }
431 } 39 }
432 40
433 fn->pasid_base = 0;
434 return 0; 41 return 0;
435} 42}
436 43
437static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev) 44void ocxl_remove(struct pci_dev *dev)
438{
439 int rc;
440
441 rc = pci_enable_device(dev);
442 if (rc) {
443 dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
444 return rc;
445 }
446
447 /*
448 * Once it has been confirmed to work on our hardware, we
449 * should reset the function, to force the adapter to restart
450 * from scratch.
451 * A function reset would also reset all its AFUs.
452 *
453 * Some hints for implementation:
454 *
455 * - there's not status bit to know when the reset is done. We
456 * should try reading the config space to know when it's
457 * done.
458 * - probably something like:
459 * Reset
460 * wait 100ms
461 * issue config read
462 * allow device up to 1 sec to return success on config
463 * read before declaring it broken
464 *
465 * Some shared logic on the card (CFG, TLX) won't be reset, so
466 * there's no guarantee that it will be enough.
467 */
468 rc = ocxl_config_read_function(dev, &fn->config);
469 if (rc)
470 return rc;
471
472 rc = set_function_device(fn, dev);
473 if (rc)
474 return rc;
475
476 rc = assign_function_actag(fn);
477 if (rc)
478 return rc;
479
480 rc = set_function_pasid(fn);
481 if (rc)
482 return rc;
483
484 rc = ocxl_link_setup(dev, 0, &fn->link);
485 if (rc)
486 return rc;
487
488 rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
489 if (rc) {
490 ocxl_link_release(dev, fn->link);
491 return rc;
492 }
493 return 0;
494}
495
496static void deconfigure_function(struct ocxl_fn *fn)
497{
498 struct pci_dev *dev = to_pci_dev(fn->dev.parent);
499
500 ocxl_link_release(dev, fn->link);
501 pci_disable_device(dev);
502}
503
504static struct ocxl_fn *init_function(struct pci_dev *dev)
505{ 45{
506 struct ocxl_fn *fn; 46 struct ocxl_fn *fn;
507 int rc; 47 struct ocxl_afu *afu;
508 48 struct list_head *afu_list;
509 fn = alloc_function(dev);
510 if (!fn)
511 return ERR_PTR(-ENOMEM);
512
513 rc = configure_function(fn, dev);
514 if (rc) {
515 free_function(fn);
516 return ERR_PTR(rc);
517 }
518
519 rc = device_register(&fn->dev);
520 if (rc) {
521 deconfigure_function(fn);
522 put_device(&fn->dev);
523 return ERR_PTR(rc);
524 }
525 return fn;
526}
527
528static void remove_function(struct ocxl_fn *fn)
529{
530 deconfigure_function(fn);
531 device_unregister(&fn->dev);
532}
533
534static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
535{
536 int rc, afu_count = 0;
537 u8 afu;
538 struct ocxl_fn *fn;
539
540 if (!radix_enabled()) {
541 dev_err(&dev->dev, "Unsupported memory model (hash)\n");
542 return -ENODEV;
543 }
544 49
545 fn = init_function(dev); 50 fn = pci_get_drvdata(dev);
546 if (IS_ERR(fn)) { 51 afu_list = ocxl_function_afu_list(fn);
547 dev_err(&dev->dev, "function init failed: %li\n",
548 PTR_ERR(fn));
549 return PTR_ERR(fn);
550 }
551 52
552 for (afu = 0; afu <= fn->config.max_afu_index; afu++) { 53 list_for_each_entry(afu, afu_list, list) {
553 rc = ocxl_config_check_afu_index(dev, &fn->config, afu); 54 ocxl_file_unregister_afu(afu);
554 if (rc > 0) {
555 rc = init_afu(dev, fn, afu);
556 if (rc) {
557 dev_err(&dev->dev,
558 "Can't initialize AFU index %d\n", afu);
559 continue;
560 }
561 afu_count++;
562 }
563 } 55 }
564 dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
565 return 0;
566}
567
568static void ocxl_remove(struct pci_dev *dev)
569{
570 struct ocxl_afu *afu, *tmp;
571 struct ocxl_fn *fn = pci_get_drvdata(dev);
572 56
573 list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) { 57 ocxl_function_close(fn);
574 remove_afu(afu);
575 }
576 remove_function(fn);
577} 58}
578 59
579struct pci_driver ocxl_pci_driver = { 60struct pci_driver ocxl_pci_driver = {
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
index 0ab1fd1b2682..58f1ba264206 100644
--- a/drivers/misc/ocxl/sysfs.c
+++ b/drivers/misc/ocxl/sysfs.c
@@ -3,11 +3,18 @@
3#include <linux/sysfs.h> 3#include <linux/sysfs.h>
4#include "ocxl_internal.h" 4#include "ocxl_internal.h"
5 5
6static inline struct ocxl_afu *to_afu(struct device *device)
7{
8 struct ocxl_file_info *info = container_of(device, struct ocxl_file_info, dev);
9
10 return info->afu;
11}
12
6static ssize_t global_mmio_size_show(struct device *device, 13static ssize_t global_mmio_size_show(struct device *device,
7 struct device_attribute *attr, 14 struct device_attribute *attr,
8 char *buf) 15 char *buf)
9{ 16{
10 struct ocxl_afu *afu = to_ocxl_afu(device); 17 struct ocxl_afu *afu = to_afu(device);
11 18
12 return scnprintf(buf, PAGE_SIZE, "%d\n", 19 return scnprintf(buf, PAGE_SIZE, "%d\n",
13 afu->config.global_mmio_size); 20 afu->config.global_mmio_size);
@@ -17,7 +24,7 @@ static ssize_t pp_mmio_size_show(struct device *device,
17 struct device_attribute *attr, 24 struct device_attribute *attr,
18 char *buf) 25 char *buf)
19{ 26{
20 struct ocxl_afu *afu = to_ocxl_afu(device); 27 struct ocxl_afu *afu = to_afu(device);
21 28
22 return scnprintf(buf, PAGE_SIZE, "%d\n", 29 return scnprintf(buf, PAGE_SIZE, "%d\n",
23 afu->config.pp_mmio_stride); 30 afu->config.pp_mmio_stride);
@@ -27,7 +34,7 @@ static ssize_t afu_version_show(struct device *device,
27 struct device_attribute *attr, 34 struct device_attribute *attr,
28 char *buf) 35 char *buf)
29{ 36{
30 struct ocxl_afu *afu = to_ocxl_afu(device); 37 struct ocxl_afu *afu = to_afu(device);
31 38
32 return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n", 39 return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
33 afu->config.version_major, 40 afu->config.version_major,
@@ -38,7 +45,7 @@ static ssize_t contexts_show(struct device *device,
38 struct device_attribute *attr, 45 struct device_attribute *attr,
39 char *buf) 46 char *buf)
40{ 47{
41 struct ocxl_afu *afu = to_ocxl_afu(device); 48 struct ocxl_afu *afu = to_afu(device);
42 49
43 return scnprintf(buf, PAGE_SIZE, "%d/%d\n", 50 return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
44 afu->pasid_count, afu->pasid_max); 51 afu->pasid_count, afu->pasid_max);
@@ -55,7 +62,7 @@ static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
55 struct bin_attribute *bin_attr, char *buf, 62 struct bin_attribute *bin_attr, char *buf,
56 loff_t off, size_t count) 63 loff_t off, size_t count)
57{ 64{
58 struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); 65 struct ocxl_afu *afu = to_afu(kobj_to_dev(kobj));
59 66
60 if (count == 0 || off < 0 || 67 if (count == 0 || off < 0 ||
61 off >= afu->config.global_mmio_size) 68 off >= afu->config.global_mmio_size)
@@ -86,7 +93,7 @@ static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
86 struct bin_attribute *bin_attr, 93 struct bin_attribute *bin_attr,
87 struct vm_area_struct *vma) 94 struct vm_area_struct *vma)
88{ 95{
89 struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); 96 struct ocxl_afu *afu = to_afu(kobj_to_dev(kobj));
90 97
91 if ((vma_pages(vma) + vma->vm_pgoff) > 98 if ((vma_pages(vma) + vma->vm_pgoff) >
92 (afu->config.global_mmio_size >> PAGE_SHIFT)) 99 (afu->config.global_mmio_size >> PAGE_SHIFT))
@@ -99,27 +106,25 @@ static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
99 return 0; 106 return 0;
100} 107}
101 108
102int ocxl_sysfs_add_afu(struct ocxl_afu *afu) 109int ocxl_sysfs_register_afu(struct ocxl_file_info *info)
103{ 110{
104 int i, rc; 111 int i, rc;
105 112
106 for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { 113 for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
107 rc = device_create_file(&afu->dev, &afu_attrs[i]); 114 rc = device_create_file(&info->dev, &afu_attrs[i]);
108 if (rc) 115 if (rc)
109 goto err; 116 goto err;
110 } 117 }
111 118
112 sysfs_attr_init(&afu->attr_global_mmio.attr); 119 sysfs_attr_init(&info->attr_global_mmio.attr);
113 afu->attr_global_mmio.attr.name = "global_mmio_area"; 120 info->attr_global_mmio.attr.name = "global_mmio_area";
114 afu->attr_global_mmio.attr.mode = 0600; 121 info->attr_global_mmio.attr.mode = 0600;
115 afu->attr_global_mmio.size = afu->config.global_mmio_size; 122 info->attr_global_mmio.size = info->afu->config.global_mmio_size;
116 afu->attr_global_mmio.read = global_mmio_read; 123 info->attr_global_mmio.read = global_mmio_read;
117 afu->attr_global_mmio.mmap = global_mmio_mmap; 124 info->attr_global_mmio.mmap = global_mmio_mmap;
118 rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio); 125 rc = device_create_bin_file(&info->dev, &info->attr_global_mmio);
119 if (rc) { 126 if (rc) {
120 dev_err(&afu->dev, 127 dev_err(&info->dev, "Unable to create global mmio attr for afu: %d\n", rc);
121 "Unable to create global mmio attr for afu: %d\n",
122 rc);
123 goto err; 128 goto err;
124 } 129 }
125 130
@@ -127,15 +132,20 @@ int ocxl_sysfs_add_afu(struct ocxl_afu *afu)
127 132
128err: 133err:
129 for (i--; i >= 0; i--) 134 for (i--; i >= 0; i--)
130 device_remove_file(&afu->dev, &afu_attrs[i]); 135 device_remove_file(&info->dev, &afu_attrs[i]);
136
131 return rc; 137 return rc;
132} 138}
133 139
134void ocxl_sysfs_remove_afu(struct ocxl_afu *afu) 140void ocxl_sysfs_unregister_afu(struct ocxl_file_info *info)
135{ 141{
136 int i; 142 int i;
137 143
144 /*
145 * device_remove_bin_file is safe to call if the file is not added as
146 * the files are removed by name, and early exit if not found
147 */
138 for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) 148 for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
139 device_remove_file(&afu->dev, &afu_attrs[i]); 149 device_remove_file(&info->dev, &afu_attrs[i]);
140 device_remove_bin_file(&afu->dev, &afu->attr_global_mmio); 150 device_remove_bin_file(&info->dev, &info->attr_global_mmio);
141} 151}
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
index bcb7ff330c1e..024f417e7e01 100644
--- a/drivers/misc/ocxl/trace.h
+++ b/drivers/misc/ocxl/trace.h
@@ -107,16 +107,14 @@ DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
107); 107);
108 108
109TRACE_EVENT(ocxl_afu_irq_alloc, 109TRACE_EVENT(ocxl_afu_irq_alloc,
110 TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq, 110 TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq),
111 u64 irq_offset), 111 TP_ARGS(pasid, irq_id, virq, hw_irq),
112 TP_ARGS(pasid, irq_id, virq, hw_irq, irq_offset),
113 112
114 TP_STRUCT__entry( 113 TP_STRUCT__entry(
115 __field(int, pasid) 114 __field(int, pasid)
116 __field(int, irq_id) 115 __field(int, irq_id)
117 __field(unsigned int, virq) 116 __field(unsigned int, virq)
118 __field(int, hw_irq) 117 __field(int, hw_irq)
119 __field(u64, irq_offset)
120 ), 118 ),
121 119
122 TP_fast_assign( 120 TP_fast_assign(
@@ -124,15 +122,13 @@ TRACE_EVENT(ocxl_afu_irq_alloc,
124 __entry->irq_id = irq_id; 122 __entry->irq_id = irq_id;
125 __entry->virq = virq; 123 __entry->virq = virq;
126 __entry->hw_irq = hw_irq; 124 __entry->hw_irq = hw_irq;
127 __entry->irq_offset = irq_offset;
128 ), 125 ),
129 126
130 TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d irq_offset=0x%llx", 127 TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d",
131 __entry->pasid, 128 __entry->pasid,
132 __entry->irq_id, 129 __entry->irq_id,
133 __entry->virq, 130 __entry->virq,
134 __entry->hw_irq, 131 __entry->hw_irq
135 __entry->irq_offset
136 ) 132 )
137); 133);
138 134
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index dbfdd0fadbef..6a381594608c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -171,6 +171,7 @@ enum cpuhp_state {
171 CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, 171 CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
172 CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, 172 CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
173 CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, 173 CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
174 CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
174 CPUHP_AP_WATCHDOG_ONLINE, 175 CPUHP_AP_WATCHDOG_ONLINE,
175 CPUHP_AP_WORKQUEUE_ONLINE, 176 CPUHP_AP_WORKQUEUE_ONLINE,
176 CPUHP_AP_RCUTREE_ONLINE, 177 CPUHP_AP_RCUTREE_ONLINE,
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 9ff6ddc28e22..5c4b4916e6be 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -16,11 +16,7 @@
16 16
17#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */ 17#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */
18 18
19/* 19
20 * The following 2 structures are a fairly generic way of representing
21 * the configuration data for a function and AFU, as read from the
22 * configuration space.
23 */
24struct ocxl_afu_config { 20struct ocxl_afu_config {
25 u8 idx; 21 u8 idx;
26 int dvsec_afu_control_pos; /* offset of AFU control DVSEC */ 22 int dvsec_afu_control_pos; /* offset of AFU control DVSEC */
@@ -49,37 +45,314 @@ struct ocxl_fn_config {
49 s8 max_afu_index; 45 s8 max_afu_index;
50}; 46};
51 47
52/* 48enum ocxl_endian {
53 * Read the configuration space of a function and fill in a 49 OCXL_BIG_ENDIAN = 0, /**< AFU data is big-endian */
54 * ocxl_fn_config structure with all the function details 50 OCXL_LITTLE_ENDIAN = 1, /**< AFU data is little-endian */
51 OCXL_HOST_ENDIAN = 2, /**< AFU data is the same endianness as the host */
52};
53
54// These are opaque outside the ocxl driver
55struct ocxl_afu;
56struct ocxl_fn;
57struct ocxl_context;
58
59// Device detection & initialisation
60
61/**
62 * Open an OpenCAPI function on an OpenCAPI device
63 *
64 * @dev: The PCI device that contains the function
65 *
66 * Returns an opaque pointer to the function, or an error pointer (check with IS_ERR)
55 */ 67 */
56extern int ocxl_config_read_function(struct pci_dev *dev, 68struct ocxl_fn *ocxl_function_open(struct pci_dev *dev);
57 struct ocxl_fn_config *fn);
58 69
59/* 70/**
60 * Check if an AFU index is valid for the given function. 71 * Get the list of AFUs associated with a PCI function device
72 *
73 * Returns a list of struct ocxl_afu *
74 *
75 * @fn: The OpenCAPI function containing the AFUs
76 */
77struct list_head *ocxl_function_afu_list(struct ocxl_fn *fn);
78
79/**
80 * Fetch an AFU instance from an OpenCAPI function
81 *
82 * @fn: The OpenCAPI function to get the AFU from
83 * @afu_idx: The index of the AFU to get
84 *
85 * If successful, the AFU should be released with ocxl_afu_put()
86 *
87 * Returns a pointer to the AFU, or NULL on error
88 */
89struct ocxl_afu *ocxl_function_fetch_afu(struct ocxl_fn *fn, u8 afu_idx);
90
91/**
92 * Take a reference to an AFU
93 *
94 * @afu: The AFU to increment the reference count on
95 */
96void ocxl_afu_get(struct ocxl_afu *afu);
97
98/**
99 * Release a reference to an AFU
100 *
101 * @afu: The AFU to decrement the reference count on
102 */
103void ocxl_afu_put(struct ocxl_afu *afu);
104
105
106/**
107 * Get the configuration information for an OpenCAPI function
108 *
109 * @fn: The OpenCAPI function to get the config for
110 *
111 * Returns the function config, or NULL on error
112 */
113const struct ocxl_fn_config *ocxl_function_config(struct ocxl_fn *fn);
114
115/**
116 * Close an OpenCAPI function
117 *
118 * This will free any AFUs previously retrieved from the function, and
119 * detach and associated contexts. The contexts must by freed by the caller.
120 *
121 * @fn: The OpenCAPI function to close
122 *
123 */
124void ocxl_function_close(struct ocxl_fn *fn);
125
126// Context allocation
127
128/**
129 * Allocate an OpenCAPI context
130 *
131 * @context: The OpenCAPI context to allocate, must be freed with ocxl_context_free
132 * @afu: The AFU the context belongs to
133 * @mapping: The mapping to unmap when the context is closed (may be NULL)
134 */
135int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
136 struct address_space *mapping);
137
138/**
139 * Free an OpenCAPI context
140 *
141 * @ctx: The OpenCAPI context to free
142 */
143void ocxl_context_free(struct ocxl_context *ctx);
144
145/**
146 * Grant access to an MM to an OpenCAPI context
147 * @ctx: The OpenCAPI context to attach
148 * @amr: The value of the AMR register to restrict access
149 * @mm: The mm to attach to the context
150 *
151 * Returns 0 on success, negative on failure
152 */
153int ocxl_context_attach(struct ocxl_context *ctx, u64 amr,
154 struct mm_struct *mm);
155
156/**
157 * Detach an MM from an OpenCAPI context
158 * @ctx: The OpenCAPI context to attach
159 *
160 * Returns 0 on success, negative on failure
161 */
162int ocxl_context_detach(struct ocxl_context *ctx);
163
164// AFU IRQs
165
166/**
167 * Allocate an IRQ associated with an AFU context
168 * @ctx: the AFU context
169 * @irq_id: out, the IRQ ID
170 *
171 * Returns 0 on success, negative on failure
172 */
173extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id);
174
175/**
176 * Frees an IRQ associated with an AFU context
177 * @ctx: the AFU context
178 * @irq_id: the IRQ ID
179 *
180 * Returns 0 on success, negative on failure
181 */
182extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id);
183
184/**
185 * Gets the address of the trigger page for an IRQ
186 * This can then be provided to an AFU which will write to that
187 * page to trigger the IRQ.
188 * @ctx: The AFU context that the IRQ is associated with
189 * @irq_id: The IRQ ID
61 * 190 *
62 * AFU indexes can be sparse, so a driver should check all indexes up 191 * returns the trigger page address, or 0 if the IRQ is not valid
63 * to the maximum found in the function description
64 */ 192 */
65extern int ocxl_config_check_afu_index(struct pci_dev *dev, 193extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id);
66 struct ocxl_fn_config *fn, int afu_idx); 194
195/**
196 * Provide a callback to be called when an IRQ is triggered
197 * @ctx: The AFU context that the IRQ is associated with
198 * @irq_id: The IRQ ID
199 * @handler: the callback to be called when the IRQ is triggered
200 * @free_private: the callback to be called when the IRQ is freed (may be NULL)
201 * @private: Private data to be passed to the callbacks
202 *
203 * Returns 0 on success, negative on failure
204 */
205int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id,
206 irqreturn_t (*handler)(void *private),
207 void (*free_private)(void *private),
208 void *private);
209
210// AFU Metadata
211
212/**
213 * Get a pointer to the config for an AFU
214 *
215 * @afu: a pointer to the AFU to get the config for
216 *
217 * Returns a pointer to the AFU config
218 */
219struct ocxl_afu_config *ocxl_afu_config(struct ocxl_afu *afu);
220
221/**
222 * Assign opaque hardware specific information to an OpenCAPI AFU.
223 *
224 * @dev: The PCI device associated with the OpenCAPI device
225 * @private: the opaque hardware specific information to assign to the driver
226 */
227void ocxl_afu_set_private(struct ocxl_afu *afu, void *private);
228
229/**
230 * Fetch the hardware specific information associated with an external OpenCAPI
231 * AFU. This may be consumed by an external OpenCAPI driver.
232 *
233 * @afu: The AFU
234 *
235 * Returns the opaque pointer associated with the device, or NULL if not set
236 */
237void *ocxl_afu_get_private(struct ocxl_afu *dev);
238
239// Global MMIO
240/**
241 * Read a 32 bit value from global MMIO
242 *
243 * @afu: The AFU
244 * @offset: The Offset from the start of MMIO
245 * @endian: the endianness that the MMIO data is in
246 * @val: returns the value
247 *
248 * Returns 0 for success, negative on error
249 */
250int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset,
251 enum ocxl_endian endian, u32 *val);
252
253/**
254 * Read a 64 bit value from global MMIO
255 *
256 * @afu: The AFU
257 * @offset: The Offset from the start of MMIO
258 * @endian: the endianness that the MMIO data is in
259 * @val: returns the value
260 *
261 * Returns 0 for success, negative on error
262 */
263int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset,
264 enum ocxl_endian endian, u64 *val);
265
266/**
267 * Write a 32 bit value to global MMIO
268 *
269 * @afu: The AFU
270 * @offset: The Offset from the start of MMIO
271 * @endian: the endianness that the MMIO data is in
272 * @val: The value to write
273 *
274 * Returns 0 for success, negative on error
275 */
276int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset,
277 enum ocxl_endian endian, u32 val);
278
279/**
280 * Write a 64 bit value to global MMIO
281 *
282 * @afu: The AFU
283 * @offset: The Offset from the start of MMIO
284 * @endian: the endianness that the MMIO data is in
285 * @val: The value to write
286 *
287 * Returns 0 for success, negative on error
288 */
289int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset,
290 enum ocxl_endian endian, u64 val);
291
292/**
293 * Set bits in a 32 bit global MMIO register
294 *
295 * @afu: The AFU
296 * @offset: The Offset from the start of MMIO
297 * @endian: the endianness that the MMIO data is in
298 * @mask: a mask of the bits to set
299 *
300 * Returns 0 for success, negative on error
301 */
302int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset,
303 enum ocxl_endian endian, u32 mask);
304
305/**
306 * Set bits in a 64 bit global MMIO register
307 *
308 * @afu: The AFU
309 * @offset: The Offset from the start of MMIO
310 * @endian: the endianness that the MMIO data is in
311 * @mask: a mask of the bits to set
312 *
313 * Returns 0 for success, negative on error
314 */
315int ocxl_global_mmio_set64(struct ocxl_afu *afu, size_t offset,
316 enum ocxl_endian endian, u64 mask);
317
318/**
319 * Set bits in a 32 bit global MMIO register
320 *
321 * @afu: The AFU
322 * @offset: The Offset from the start of MMIO
323 * @endian: the endianness that the MMIO data is in
324 * @mask: a mask of the bits to set
325 *
326 * Returns 0 for success, negative on error
327 */
328int ocxl_global_mmio_clear32(struct ocxl_afu *afu, size_t offset,
329 enum ocxl_endian endian, u32 mask);
330
331/**
332 * Set bits in a 64 bit global MMIO register
333 *
334 * @afu: The AFU
335 * @offset: The Offset from the start of MMIO
336 * @endian: the endianness that the MMIO data is in
337 * @mask: a mask of the bits to set
338 *
339 * Returns 0 for success, negative on error
340 */
341int ocxl_global_mmio_clear64(struct ocxl_afu *afu, size_t offset,
342 enum ocxl_endian endian, u64 mask);
343
344// Functions left here are for compatibility with the cxlflash driver
67 345
68/* 346/*
69 * Read the configuration space of a function for the AFU specified by 347 * Read the configuration space of a function for the AFU specified by
70 * the index 'afu_idx'. Fills in a ocxl_afu_config structure 348 * the index 'afu_idx'. Fills in a ocxl_afu_config structure
71 */ 349 */
72extern int ocxl_config_read_afu(struct pci_dev *dev, 350int ocxl_config_read_afu(struct pci_dev *dev,
73 struct ocxl_fn_config *fn, 351 struct ocxl_fn_config *fn,
74 struct ocxl_afu_config *afu, 352 struct ocxl_afu_config *afu,
75 u8 afu_idx); 353 u8 afu_idx);
76 354
77/* 355/*
78 * Get the max PASID value that can be used by the function
79 */
80extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
81
82/*
83 * Tell an AFU, by writing in the configuration space, the PASIDs that 356 * Tell an AFU, by writing in the configuration space, the PASIDs that
84 * it can use. Range starts at 'pasid_base' and its size is a multiple 357 * it can use. Range starts at 'pasid_base' and its size is a multiple
85 * of 2 358 * of 2
@@ -87,7 +360,7 @@ extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
87 * 'afu_control_offset' is the offset of the AFU control DVSEC which 360 * 'afu_control_offset' is the offset of the AFU control DVSEC which
88 * can be found in the function configuration 361 * can be found in the function configuration
89 */ 362 */
90extern void ocxl_config_set_afu_pasid(struct pci_dev *dev, 363void ocxl_config_set_afu_pasid(struct pci_dev *dev,
91 int afu_control_offset, 364 int afu_control_offset,
92 int pasid_base, u32 pasid_count_log); 365 int pasid_base, u32 pasid_count_log);
93 366
@@ -98,7 +371,7 @@ extern void ocxl_config_set_afu_pasid(struct pci_dev *dev,
98 * 'supported' is the total number of actags desired by all the AFUs 371 * 'supported' is the total number of actags desired by all the AFUs
99 * of the function. 372 * of the function.
100 */ 373 */
101extern int ocxl_config_get_actag_info(struct pci_dev *dev, 374int ocxl_config_get_actag_info(struct pci_dev *dev,
102 u16 *base, u16 *enabled, u16 *supported); 375 u16 *base, u16 *enabled, u16 *supported);
103 376
104/* 377/*
@@ -108,7 +381,7 @@ extern int ocxl_config_get_actag_info(struct pci_dev *dev,
108 * 'func_offset' is the offset of the Function DVSEC that can found in 381 * 'func_offset' is the offset of the Function DVSEC that can found in
109 * the function configuration 382 * the function configuration
110 */ 383 */
111extern void ocxl_config_set_actag(struct pci_dev *dev, int func_offset, 384void ocxl_config_set_actag(struct pci_dev *dev, int func_offset,
112 u32 actag_base, u32 actag_count); 385 u32 actag_base, u32 actag_count);
113 386
114/* 387/*
@@ -118,7 +391,7 @@ extern void ocxl_config_set_actag(struct pci_dev *dev, int func_offset,
118 * 'afu_control_offset' is the offset of the AFU control DVSEC for the 391 * 'afu_control_offset' is the offset of the AFU control DVSEC for the
119 * desired AFU. It can be found in the AFU configuration 392 * desired AFU. It can be found in the AFU configuration
120 */ 393 */
121extern void ocxl_config_set_afu_actag(struct pci_dev *dev, 394void ocxl_config_set_afu_actag(struct pci_dev *dev,
122 int afu_control_offset, 395 int afu_control_offset,
123 int actag_base, int actag_count); 396 int actag_base, int actag_count);
124 397
@@ -128,7 +401,7 @@ extern void ocxl_config_set_afu_actag(struct pci_dev *dev,
128 * 'afu_control_offset' is the offset of the AFU control DVSEC for the 401 * 'afu_control_offset' is the offset of the AFU control DVSEC for the
129 * desired AFU. It can be found in the AFU configuration 402 * desired AFU. It can be found in the AFU configuration
130 */ 403 */
131extern void ocxl_config_set_afu_state(struct pci_dev *dev, 404void ocxl_config_set_afu_state(struct pci_dev *dev,
132 int afu_control_offset, int enable); 405 int afu_control_offset, int enable);
133 406
134/* 407/*
@@ -139,7 +412,7 @@ extern void ocxl_config_set_afu_state(struct pci_dev *dev,
139 * between the host and device, and set the Transaction Layer on both 412 * between the host and device, and set the Transaction Layer on both
140 * accordingly. 413 * accordingly.
141 */ 414 */
142extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec); 415int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
143 416
144/* 417/*
145 * Request an AFU to terminate a PASID. 418 * Request an AFU to terminate a PASID.
@@ -152,10 +425,17 @@ extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
152 * 'afu_control_offset' is the offset of the AFU control DVSEC for the 425 * 'afu_control_offset' is the offset of the AFU control DVSEC for the
153 * desired AFU. It can be found in the AFU configuration 426 * desired AFU. It can be found in the AFU configuration
154 */ 427 */
155extern int ocxl_config_terminate_pasid(struct pci_dev *dev, 428int ocxl_config_terminate_pasid(struct pci_dev *dev,
156 int afu_control_offset, int pasid); 429 int afu_control_offset, int pasid);
157 430
158/* 431/*
432 * Read the configuration space of a function and fill in a
433 * ocxl_fn_config structure with all the function details
434 */
435int ocxl_config_read_function(struct pci_dev *dev,
436 struct ocxl_fn_config *fn);
437
438/*
159 * Set up the opencapi link for the function. 439 * Set up the opencapi link for the function.
160 * 440 *
161 * When called for the first time for a link, it sets up the Shared 441 * When called for the first time for a link, it sets up the Shared
@@ -165,13 +445,13 @@ extern int ocxl_config_terminate_pasid(struct pci_dev *dev,
165 * Returns a 'link handle' that should be used for further calls for 445 * Returns a 'link handle' that should be used for further calls for
166 * the link 446 * the link
167 */ 447 */
168extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask, 448int ocxl_link_setup(struct pci_dev *dev, int PE_mask,
169 void **link_handle); 449 void **link_handle);
170 450
171/* 451/*
172 * Remove the association between the function and its link. 452 * Remove the association between the function and its link.
173 */ 453 */
174extern void ocxl_link_release(struct pci_dev *dev, void *link_handle); 454void ocxl_link_release(struct pci_dev *dev, void *link_handle);
175 455
176/* 456/*
177 * Add a Process Element to the Shared Process Area for a link. 457 * Add a Process Element to the Shared Process Area for a link.
@@ -183,24 +463,15 @@ extern void ocxl_link_release(struct pci_dev *dev, void *link_handle);
183 * 'xsl_err_data' is an argument passed to the above callback, if 463 * 'xsl_err_data' is an argument passed to the above callback, if
184 * defined 464 * defined
185 */ 465 */
186extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, 466int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
187 u64 amr, struct mm_struct *mm, 467 u64 amr, struct mm_struct *mm,
188 void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), 468 void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
189 void *xsl_err_data); 469 void *xsl_err_data);
190 470
191/**
192 * Update values within a Process Element
193 *
194 * link_handle: the link handle associated with the process element
195 * pasid: the PASID for the AFU context
196 * tid: the new thread id for the process element
197 */
198extern int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
199
200/* 471/*
201 * Remove a Process Element from the Shared Process Area for a link 472 * Remove a Process Element from the Shared Process Area for a link
202 */ 473 */
203extern int ocxl_link_remove_pe(void *link_handle, int pasid); 474int ocxl_link_remove_pe(void *link_handle, int pasid);
204 475
205/* 476/*
206 * Allocate an AFU interrupt associated to the link. 477 * Allocate an AFU interrupt associated to the link.
@@ -212,12 +483,12 @@ extern int ocxl_link_remove_pe(void *link_handle, int pasid);
212 * interrupt. It is an MMIO address which needs to be remapped (one 483 * interrupt. It is an MMIO address which needs to be remapped (one
213 * page). 484 * page).
214 */ 485 */
215extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, 486int ocxl_link_irq_alloc(void *link_handle, int *hw_irq,
216 u64 *obj_handle); 487 u64 *obj_handle);
217 488
218/* 489/*
219 * Free a previously allocated AFU interrupt 490 * Free a previously allocated AFU interrupt
220 */ 491 */
221extern void ocxl_link_free_irq(void *link_handle, int hw_irq); 492void ocxl_link_free_irq(void *link_handle, int hw_irq);
222 493
223#endif /* _MISC_OCXL_H_ */ 494#endif /* _MISC_OCXL_H_ */
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h
index 0bab35f6777a..05c1663c89b0 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/export.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/export.h
@@ -1,2 +1,3 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2#define EXPORT_SYMBOL(x) 2#define EXPORT_SYMBOL(x)
3#define EXPORT_SYMBOL_KASAN(x)
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/kasan.h b/tools/testing/selftests/powerpc/copyloops/asm/kasan.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/kasan.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index 0605df807593..58c1cef3e399 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -25,6 +25,7 @@
25 25
26#define _GLOBAL(A) FUNC_START(test_ ## A) 26#define _GLOBAL(A) FUNC_START(test_ ## A)
27#define _GLOBAL_TOC(A) _GLOBAL(A) 27#define _GLOBAL_TOC(A) _GLOBAL(A)
28#define _GLOBAL_TOC_KASAN(A) _GLOBAL(A)
28 29
29#define PPC_MTOCRF(A, B) mtocrf A, B 30#define PPC_MTOCRF(A, B) mtocrf A, B
30 31
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 9d7166dfad1e..ba89353abfcc 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -21,6 +21,7 @@
21 21
22#define KILL_TIMEOUT 5 22#define KILL_TIMEOUT 5
23 23
24/* Setting timeout to -1 disables the alarm */
24static uint64_t timeout = 120; 25static uint64_t timeout = 120;
25 26
26int run_test(int (test_function)(void), char *name) 27int run_test(int (test_function)(void), char *name)
@@ -43,8 +44,9 @@ int run_test(int (test_function)(void), char *name)
43 44
44 setpgid(pid, pid); 45 setpgid(pid, pid);
45 46
46 /* Wake us up in timeout seconds */ 47 if (timeout != -1)
47 alarm(timeout); 48 /* Wake us up in timeout seconds */
49 alarm(timeout);
48 terminated = false; 50 terminated = false;
49 51
50wait: 52wait:
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 96043b9b9829..1e797ae396ee 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -79,11 +79,13 @@
79 79
80/* MSR register bits */ 80/* MSR register bits */
81#define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ 81#define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */
82#define MSR_TS_T_LG 34 /* Trans Mem state: Active */
82 83
83#define __MASK(X) (1UL<<(X)) 84#define __MASK(X) (1UL<<(X))
84 85
85/* macro to check TM MSR bits */ 86/* macro to check TM MSR bits */
86#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */ 87#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */
88#define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */
87 89
88/* Vector Instructions */ 90/* Vector Instructions */
89#define VSX_XX1(xs, ra, rb) (((xs) & 0x1f) << 21 | ((ra) << 16) | \ 91#define VSX_XX1(xs, ra, rb) (((xs) & 0x1f) << 21 | ((ra) << 16) | \
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index 1b89224a8aab..dca5852a1546 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -1,2 +1,3 @@
1signal 1signal
2signal_tm 2signal_tm
3sigfuz
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 209a958dca12..113838fbbe7f 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,8 +1,9 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2TEST_GEN_PROGS := signal signal_tm 2TEST_GEN_PROGS := signal signal_tm sigfuz
3 3
4CFLAGS += -maltivec 4CFLAGS += -maltivec
5$(OUTPUT)/signal_tm: CFLAGS += -mhtm 5$(OUTPUT)/signal_tm: CFLAGS += -mhtm
6$(OUTPUT)/sigfuz: CFLAGS += -pthread -m64
6 7
7top_srcdir = ../../../../.. 8top_srcdir = ../../../../..
8include ../../lib.mk 9include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c
new file mode 100644
index 000000000000..dade00c698c2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigfuz.c
@@ -0,0 +1,325 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2018, Breno Leitao, IBM Corp.
4 * Licensed under GPLv2.
5 *
6 * Sigfuz(tm): A PowerPC TM-aware signal fuzzer.
7 *
8 * This is a new selftest that raises SIGUSR1 signals and handles it in a set
9 * of different ways, trying to create different scenario for testing
10 * purpose.
11 *
12 * This test works raising a signal and calling sigreturn interleaved with
13 * TM operations, as starting, suspending and terminating a transaction. The
14 * test depends on random numbers, and, based on them, it sets different TM
15 * states.
16 *
17 * Other than that, the test fills out the user context struct that is passed
18 * to the sigreturn system call with random data, in order to make sure that
19 * the signal handler syscall can handle different and invalid states
20 * properly.
21 *
22 * This selftest has command line parameters to control what kind of tests the
23 * user wants to run, as for example, if a transaction should be started prior
24 * to signal being raised, or, after the signal being raised and before the
25 * sigreturn. If no parameter is given, the default is enabling all options.
26 *
27 * This test does not check if the user context is being read and set
28 * properly by the kernel. Its purpose, at this time, is basically
29 * guaranteeing that the kernel does not crash on invalid scenarios.
30 */
31
32#include <stdio.h>
33#include <limits.h>
34#include <sys/wait.h>
35#include <unistd.h>
36#include <stdlib.h>
37#include <signal.h>
38#include <string.h>
39#include <ucontext.h>
40#include <sys/mman.h>
41#include <pthread.h>
42#include "utils.h"
43
44/* Selftest defaults */
45#define COUNT_MAX 4000 /* Number of interactions */
46#define THREADS 16 /* Number of threads */
47
48/* Arguments options */
49#define ARG_MESS_WITH_TM_AT 0x1
50#define ARG_MESS_WITH_TM_BEFORE 0x2
51#define ARG_MESS_WITH_MSR_AT 0x4
52#define ARG_FOREVER 0x10
53#define ARG_COMPLETE (ARG_MESS_WITH_TM_AT | \
54 ARG_MESS_WITH_TM_BEFORE | \
55 ARG_MESS_WITH_MSR_AT)
56
57static int args;
58static int nthread = THREADS;
59static int count_max = COUNT_MAX;
60
61/* checkpoint context */
62static ucontext_t *tmp_uc;
63
64/* Return true with 1/x probability */
65static int one_in_chance(int x)
66{
67 return rand() % x == 0;
68}
69
70/* Change TM states */
71static void mess_with_tm(void)
72{
73 /* Starts a transaction 33% of the time */
74 if (one_in_chance(3)) {
75 asm ("tbegin. ;"
76 "beq 8 ;");
77
78 /* And suspended half of them */
79 if (one_in_chance(2))
80 asm("tsuspend. ;");
81 }
82
83 /* Call 'tend' in 5% of the runs */
84 if (one_in_chance(20))
85 asm("tend. ;");
86}
87
88/* Signal handler that will be invoked with raise() */
89static void trap_signal_handler(int signo, siginfo_t *si, void *uc)
90{
91 ucontext_t *ucp = uc;
92
93 ucp->uc_link = tmp_uc;
94
95 /*
96 * Set uc_link in three possible ways:
97 * - Setting a single 'int' in the whole chunk
98 * - Cloning ucp into uc_link
99 * - Allocating a new memory chunk
100 */
101 if (one_in_chance(3)) {
102 memset(ucp->uc_link, rand(), sizeof(ucontext_t));
103 } else if (one_in_chance(2)) {
104 memcpy(ucp->uc_link, uc, sizeof(ucontext_t));
105 } else if (one_in_chance(2)) {
106 if (tmp_uc) {
107 free(tmp_uc);
108 tmp_uc = NULL;
109 }
110 tmp_uc = malloc(sizeof(ucontext_t));
111 ucp->uc_link = tmp_uc;
112 /* Trying to cause a major page fault at Kernel level */
113 madvise(ucp->uc_link, sizeof(ucontext_t), MADV_DONTNEED);
114 }
115
116 if (args & ARG_MESS_WITH_MSR_AT) {
117 /* Changing the checkpointed registers */
118 if (one_in_chance(4)) {
119 ucp->uc_link->uc_mcontext.gp_regs[PT_MSR] |= MSR_TS_S;
120 } else {
121 if (one_in_chance(2)) {
122 ucp->uc_link->uc_mcontext.gp_regs[PT_MSR] |=
123 MSR_TS_T;
124 } else if (one_in_chance(2)) {
125 ucp->uc_link->uc_mcontext.gp_regs[PT_MSR] |=
126 MSR_TS_T | MSR_TS_S;
127 }
128 }
129
130 /* Checking the current register context */
131 if (one_in_chance(2)) {
132 ucp->uc_mcontext.gp_regs[PT_MSR] |= MSR_TS_S;
133 } else if (one_in_chance(2)) {
134 if (one_in_chance(2))
135 ucp->uc_mcontext.gp_regs[PT_MSR] |=
136 MSR_TS_T;
137 else if (one_in_chance(2))
138 ucp->uc_mcontext.gp_regs[PT_MSR] |=
139 MSR_TS_T | MSR_TS_S;
140 }
141 }
142
143 if (one_in_chance(20)) {
144 /* Nested transaction start */
145 if (one_in_chance(5))
146 mess_with_tm();
147
148 /* Return without changing any other context info */
149 return;
150 }
151
152 if (one_in_chance(10))
153 ucp->uc_mcontext.gp_regs[PT_MSR] = random();
154 if (one_in_chance(10))
155 ucp->uc_mcontext.gp_regs[PT_NIP] = random();
156 if (one_in_chance(10))
157 ucp->uc_link->uc_mcontext.gp_regs[PT_MSR] = random();
158 if (one_in_chance(10))
159 ucp->uc_link->uc_mcontext.gp_regs[PT_NIP] = random();
160
161 ucp->uc_mcontext.gp_regs[PT_TRAP] = random();
162 ucp->uc_mcontext.gp_regs[PT_DSISR] = random();
163 ucp->uc_mcontext.gp_regs[PT_DAR] = random();
164 ucp->uc_mcontext.gp_regs[PT_ORIG_R3] = random();
165 ucp->uc_mcontext.gp_regs[PT_XER] = random();
166 ucp->uc_mcontext.gp_regs[PT_RESULT] = random();
167 ucp->uc_mcontext.gp_regs[PT_SOFTE] = random();
168 ucp->uc_mcontext.gp_regs[PT_DSCR] = random();
169 ucp->uc_mcontext.gp_regs[PT_CTR] = random();
170 ucp->uc_mcontext.gp_regs[PT_LNK] = random();
171 ucp->uc_mcontext.gp_regs[PT_CCR] = random();
172 ucp->uc_mcontext.gp_regs[PT_REGS_COUNT] = random();
173
174 ucp->uc_link->uc_mcontext.gp_regs[PT_TRAP] = random();
175 ucp->uc_link->uc_mcontext.gp_regs[PT_DSISR] = random();
176 ucp->uc_link->uc_mcontext.gp_regs[PT_DAR] = random();
177 ucp->uc_link->uc_mcontext.gp_regs[PT_ORIG_R3] = random();
178 ucp->uc_link->uc_mcontext.gp_regs[PT_XER] = random();
179 ucp->uc_link->uc_mcontext.gp_regs[PT_RESULT] = random();
180 ucp->uc_link->uc_mcontext.gp_regs[PT_SOFTE] = random();
181 ucp->uc_link->uc_mcontext.gp_regs[PT_DSCR] = random();
182 ucp->uc_link->uc_mcontext.gp_regs[PT_CTR] = random();
183 ucp->uc_link->uc_mcontext.gp_regs[PT_LNK] = random();
184 ucp->uc_link->uc_mcontext.gp_regs[PT_CCR] = random();
185 ucp->uc_link->uc_mcontext.gp_regs[PT_REGS_COUNT] = random();
186
187 if (args & ARG_MESS_WITH_TM_BEFORE) {
188 if (one_in_chance(2))
189 mess_with_tm();
190 }
191}
192
193static void seg_signal_handler(int signo, siginfo_t *si, void *uc)
194{
195 /* Clear exit for process that segfaults */
196 exit(0);
197}
198
199static void *sigfuz_test(void *thrid)
200{
201 struct sigaction trap_sa, seg_sa;
202 int ret, i = 0;
203 pid_t t;
204
205 tmp_uc = malloc(sizeof(ucontext_t));
206
207 /* Main signal handler */
208 trap_sa.sa_flags = SA_SIGINFO;
209 trap_sa.sa_sigaction = trap_signal_handler;
210
211 /* SIGSEGV signal handler */
212 seg_sa.sa_flags = SA_SIGINFO;
213 seg_sa.sa_sigaction = seg_signal_handler;
214
215 /* The signal handler will enable MSR_TS */
216 sigaction(SIGUSR1, &trap_sa, NULL);
217
218 /* If it does not crash, it will segfault, avoid it to retest */
219 sigaction(SIGSEGV, &seg_sa, NULL);
220
221 while (i < count_max) {
222 t = fork();
223
224 if (t == 0) {
225 /* Once seed per process */
226 srand(time(NULL) + getpid());
227 if (args & ARG_MESS_WITH_TM_AT) {
228 if (one_in_chance(2))
229 mess_with_tm();
230 }
231 raise(SIGUSR1);
232 exit(0);
233 } else {
234 waitpid(t, &ret, 0);
235 }
236 if (!(args & ARG_FOREVER))
237 i++;
238 }
239
240 /* If not freed already, free now */
241 if (tmp_uc) {
242 free(tmp_uc);
243 tmp_uc = NULL;
244 }
245
246 return NULL;
247}
248
249static int signal_fuzzer(void)
250{
251 int t, rc;
252 pthread_t *threads;
253
254 threads = malloc(nthread * sizeof(pthread_t));
255
256 for (t = 0; t < nthread; t++) {
257 rc = pthread_create(&threads[t], NULL, sigfuz_test,
258 (void *)&t);
259 if (rc)
260 perror("Thread creation error\n");
261 }
262
263 for (t = 0; t < nthread; t++) {
264 rc = pthread_join(threads[t], NULL);
265 if (rc)
266 perror("Thread join error\n");
267 }
268
269 free(threads);
270
271 return EXIT_SUCCESS;
272}
273
274static void show_help(char *name)
275{
276 printf("%s: Sigfuzzer for powerpc\n", name);
277 printf("Usage:\n");
278 printf("\t-b\t Mess with TM before raising a SIGUSR1 signal\n");
279 printf("\t-a\t Mess with TM after raising a SIGUSR1 signal\n");
280 printf("\t-m\t Mess with MSR[TS] bits at mcontext\n");
281 printf("\t-x\t Mess with everything above\n");
282 printf("\t-f\t Run forever (Press ^C to Quit)\n");
283 printf("\t-i\t Amount of interactions. (Default = %d)\n", COUNT_MAX);
284 printf("\t-t\t Amount of threads. (Default = %d)\n", THREADS);
285 exit(-1);
286}
287
288int main(int argc, char **argv)
289{
290 int opt;
291
292 while ((opt = getopt(argc, argv, "bamxt:fi:h")) != -1) {
293 if (opt == 'b') {
294 printf("Mess with TM before signal\n");
295 args |= ARG_MESS_WITH_TM_BEFORE;
296 } else if (opt == 'a') {
297 printf("Mess with TM at signal handler\n");
298 args |= ARG_MESS_WITH_TM_AT;
299 } else if (opt == 'm') {
300 printf("Mess with MSR[TS] bits in mcontext\n");
301 args |= ARG_MESS_WITH_MSR_AT;
302 } else if (opt == 'x') {
303 printf("Running with all options enabled\n");
304 args |= ARG_COMPLETE;
305 } else if (opt == 't') {
306 nthread = atoi(optarg);
307 printf("Threads = %d\n", nthread);
308 } else if (opt == 'f') {
309 args |= ARG_FOREVER;
310 printf("Press ^C to stop\n");
311 test_harness_set_timeout(-1);
312 } else if (opt == 'i') {
313 count_max = atoi(optarg);
314 printf("Running for %d interactions\n", count_max);
315 } else if (opt == 'h') {
316 show_help(argv[0]);
317 }
318 }
319
320 /* Default test suite */
321 if (!args)
322 args = ARG_COMPLETE;
323
324 test_harness(signal_fuzzer, "signal_fuzzer");
325}
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c
index 186b906e66d5..1d1f5f2be3b2 120000
--- a/tools/testing/selftests/powerpc/vphn/vphn.c
+++ b/tools/testing/selftests/powerpc/vphn/vphn.c
@@ -1 +1 @@
../../../../../arch/powerpc/mm/vphn.c \ No newline at end of file ../../../../../arch/powerpc/mm/book3s64/vphn.c \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.h b/tools/testing/selftests/powerpc/vphn/vphn.h
index 7131efe38c65..45fe160f8288 120000
--- a/tools/testing/selftests/powerpc/vphn/vphn.h
+++ b/tools/testing/selftests/powerpc/vphn/vphn.h
@@ -1 +1 @@
../../../../../arch/powerpc/mm/vphn.h \ No newline at end of file ../../../../../arch/powerpc/mm/book3s64/vphn.h \ No newline at end of file