aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kbuild16
-rw-r--r--arch/powerpc/Kconfig21
-rw-r--r--arch/powerpc/Kconfig.debug6
-rw-r--r--arch/powerpc/Makefile94
-rw-r--r--arch/powerpc/boot/.gitignore1
-rw-r--r--arch/powerpc/boot/Makefile66
-rw-r--r--arch/powerpc/boot/crt0.S4
-rw-r--r--arch/powerpc/boot/dts/Makefile6
-rw-r--r--arch/powerpc/boot/dts/fsl/Makefile4
-rw-r--r--arch/powerpc/boot/libfdt_env.h2
-rw-r--r--arch/powerpc/boot/opal.c8
-rw-r--r--arch/powerpc/boot/serial.c1
-rw-r--r--arch/powerpc/configs/g5_defconfig1
-rw-r--r--arch/powerpc/configs/maple_defconfig1
-rw-r--r--arch/powerpc/configs/powernv_defconfig4
-rw-r--r--arch/powerpc/configs/ppc64_defconfig4
-rw-r--r--arch/powerpc/configs/ps3_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig154
-rw-r--r--arch/powerpc/include/asm/accounting.h4
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h24
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h158
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h107
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h186
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h1
-rw-r--r--arch/powerpc/include/asm/bug.h2
-rw-r--r--arch/powerpc/include/asm/compat.h24
-rw-r--r--arch/powerpc/include/asm/cputhreads.h2
-rw-r--r--arch/powerpc/include/asm/cputime.h1
-rw-r--r--arch/powerpc/include/asm/drmem.h5
-rw-r--r--arch/powerpc/include/asm/eeh.h24
-rw-r--r--arch/powerpc/include/asm/error-injection.h13
-rw-r--r--arch/powerpc/include/asm/exception-64s.h17
-rw-r--r--arch/powerpc/include/asm/firmware.h5
-rw-r--r--arch/powerpc/include/asm/fixmap.h2
-rw-r--r--arch/powerpc/include/asm/hugetlb.h43
-rw-r--r--arch/powerpc/include/asm/hvcall.h52
-rw-r--r--arch/powerpc/include/asm/io.h33
-rw-r--r--arch/powerpc/include/asm/iommu.h2
-rw-r--r--arch/powerpc/include/asm/kgdb.h5
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h45
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h118
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h3
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h16
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h8
-rw-r--r--arch/powerpc/include/asm/machdep.h3
-rw-r--r--arch/powerpc/include/asm/mce.h3
-rw-r--r--arch/powerpc/include/asm/mmu.h15
-rw-r--r--arch/powerpc/include/asm/mmu_context.h2
-rw-r--r--arch/powerpc/include/asm/mpic.h7
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h75
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-40x.h43
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-44x.h30
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-8xx.h87
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h33
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h46
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h100
-rw-r--r--arch/powerpc/include/asm/nohash/pte-book3e.h41
-rw-r--r--arch/powerpc/include/asm/opal-api.h1
-rw-r--r--arch/powerpc/include/asm/paca.h18
-rw-r--r--arch/powerpc/include/asm/pgtable.h29
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h1
-rw-r--r--arch/powerpc/include/asm/processor.h13
-rw-r--r--arch/powerpc/include/asm/pte-common.h219
-rw-r--r--arch/powerpc/include/asm/ptrace.h38
-rw-r--r--arch/powerpc/include/asm/reg.h9
-rw-r--r--arch/powerpc/include/asm/rtas.h15
-rw-r--r--arch/powerpc/include/asm/slice.h1
-rw-r--r--arch/powerpc/include/asm/smp.h11
-rw-r--r--arch/powerpc/include/asm/sparsemem.h11
-rw-r--r--arch/powerpc/include/asm/stackprotector.h38
-rw-r--r--arch/powerpc/include/asm/thread_info.h17
-rw-r--r--arch/powerpc/include/asm/trace.h15
-rw-r--r--arch/powerpc/include/asm/uaccess.h6
-rw-r--r--arch/powerpc/include/asm/unistd.h3
-rw-r--r--arch/powerpc/include/asm/user.h2
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h1
-rw-r--r--arch/powerpc/include/uapi/asm/ptrace.h11
-rw-r--r--arch/powerpc/include/uapi/asm/sigcontext.h6
-rw-r--r--arch/powerpc/include/uapi/asm/siginfo.h18
-rw-r--r--arch/powerpc/kernel/Makefile13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c32
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/cacheinfo.c37
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S4
-rw-r--r--arch/powerpc/kernel/crash_dump.c2
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c4
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c4
-rw-r--r--arch/powerpc/kernel/eeh.c42
-rw-r--r--arch/powerpc/kernel/eeh_dev.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c237
-rw-r--r--arch/powerpc/kernel/eeh_pe.c160
-rw-r--r--arch/powerpc/kernel/entry_32.S4
-rw-r--r--arch/powerpc/kernel/entry_64.S33
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S244
-rw-r--r--arch/powerpc/kernel/fadump.c4
-rw-r--r--arch/powerpc/kernel/head_8xx.S6
-rw-r--r--arch/powerpc/kernel/io-workarounds.c4
-rw-r--r--arch/powerpc/kernel/iommu.c2
-rw-r--r--arch/powerpc/kernel/isa-bridge.c6
-rw-r--r--arch/powerpc/kernel/kgdb.c43
-rw-r--r--arch/powerpc/kernel/mce.c9
-rw-r--r--arch/powerpc/kernel/mce_power.c9
-rw-r--r--arch/powerpc/kernel/module.c8
-rw-r--r--arch/powerpc/kernel/module_64.c14
-rw-r--r--arch/powerpc/kernel/paca.c2
-rw-r--r--arch/powerpc/kernel/pci_32.c6
-rw-r--r--arch/powerpc/kernel/pci_64.c2
-rw-r--r--arch/powerpc/kernel/process.c99
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c223
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh16
-rw-r--r--arch/powerpc/kernel/ptrace.c68
-rw-r--r--arch/powerpc/kernel/rtas.c13
-rw-r--r--arch/powerpc/kernel/rtasd.c25
-rw-r--r--arch/powerpc/kernel/setup-common.c5
-rw-r--r--arch/powerpc/kernel/setup_32.c10
-rw-r--r--arch/powerpc/kernel/setup_64.c29
-rw-r--r--arch/powerpc/kernel/smp.c245
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/time.c104
-rw-r--r--arch/powerpc/kernel/tm.S75
-rw-r--r--arch/powerpc/kernel/trace/Makefile4
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c261
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64.S12
-rw-r--r--arch/powerpc/kernel/traps.c157
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S1
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S1
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S1
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S1
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S18
-rw-r--r--arch/powerpc/kvm/Makefile5
-rw-r--r--arch/powerpc/kvm/book3s.c46
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c718
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c94
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c87
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c13
-rw-r--r--arch/powerpc/kvm/book3s_hv.c873
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c92
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S95
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c1291
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c13
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S809
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm_builtin.c5
-rw-r--r--arch/powerpc/kvm/book3s_pr.c5
-rw-r--r--arch/powerpc/kvm/book3s_xics.c14
-rw-r--r--arch/powerpc/kvm/book3s_xive.c63
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c8
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S8
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c1
-rw-r--r--arch/powerpc/kvm/powerpc.c15
-rw-r--r--arch/powerpc/kvm/tm.S250
-rw-r--r--arch/powerpc/kvm/trace_book3s.h1
-rw-r--r--arch/powerpc/lib/Makefile4
-rw-r--r--arch/powerpc/lib/alloc.c4
-rw-r--r--arch/powerpc/lib/code-patching.c3
-rw-r--r--arch/powerpc/lib/error-inject.c16
-rw-r--r--arch/powerpc/lib/mem_64.S4
-rw-r--r--arch/powerpc/mm/8xx_mmu.c5
-rw-r--r--arch/powerpc/mm/Makefile13
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables-8xx.c82
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables-book3s64.c120
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables-generic.c82
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c167
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.h19
-rw-r--r--arch/powerpc/mm/fault.c55
-rw-r--r--arch/powerpc/mm/hash_native_64.c4
-rw-r--r--arch/powerpc/mm/hash_utils_64.c13
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c14
-rw-r--r--arch/powerpc/mm/mem.c18
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c9
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c9
-rw-r--r--arch/powerpc/mm/mmu_decl.h6
-rw-r--r--arch/powerpc/mm/numa.c11
-rw-r--r--arch/powerpc/mm/pgtable-book3e.c9
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c11
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c7
-rw-r--r--arch/powerpc/mm/pgtable-radix.c65
-rw-r--r--arch/powerpc/mm/pgtable.c32
-rw-r--r--arch/powerpc/mm/pgtable_32.c72
-rw-r--r--arch/powerpc/mm/pgtable_64.c57
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c4
-rw-r--r--arch/powerpc/mm/slb.c784
-rw-r--r--arch/powerpc/mm/slb_low.S335
-rw-r--r--arch/powerpc/mm/slice.c38
-rw-r--r--arch/powerpc/mm/tlb-radix.c11
-rw-r--r--arch/powerpc/mm/tlb_nohash.c3
-rw-r--r--arch/powerpc/oprofile/Makefile1
-rw-r--r--arch/powerpc/oprofile/backtrace.c2
-rw-r--r--arch/powerpc/perf/Makefile1
-rw-r--r--arch/powerpc/perf/imc-pmu.c2
-rw-r--r--arch/powerpc/perf/power7-pmu.c1
-rw-r--r--arch/powerpc/platforms/40x/Kconfig9
-rw-r--r--arch/powerpc/platforms/44x/Kconfig22
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c8
-rw-r--r--arch/powerpc/platforms/4xx/ocm.c7
-rw-r--r--arch/powerpc/platforms/4xx/soc.c2
-rw-r--r--arch/powerpc/platforms/82xx/Kconfig1
-rw-r--r--arch/powerpc/platforms/85xx/smp.c4
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c5
-rw-r--r--arch/powerpc/platforms/8xx/machine_check.c4
-rw-r--r--arch/powerpc/platforms/Kconfig21
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype5
-rw-r--r--arch/powerpc/platforms/Makefile2
-rw-r--r--arch/powerpc/platforms/cell/Kconfig3
-rw-r--r--arch/powerpc/platforms/cell/cpufreq_spudemand.c2
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c4
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c25
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c26
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c9
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c2
-rw-r--r--arch/powerpc/platforms/maple/Kconfig1
-rw-r--r--arch/powerpc/platforms/pasemi/Kconfig1
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c2
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c2
-rw-r--r--arch/powerpc/platforms/powermac/Makefile3
-rw-r--r--arch/powerpc/platforms/powermac/feature.c51
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c4
-rw-r--r--arch/powerpc/platforms/powermac/setup.c15
-rw-r--r--arch/powerpc/platforms/powermac/time.c126
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig6
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c62
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c25
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c198
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c9
-rw-r--r--arch/powerpc/platforms/powernv/setup.c47
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig2
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c2
-rw-r--r--arch/powerpc/platforms/ps3/setup.c4
-rw-r--r--arch/powerpc/platforms/ps3/spu.c3
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig9
-rw-r--r--arch/powerpc/platforms/pseries/Makefile3
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c41
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c4
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c66
-rw-r--r--arch/powerpc/platforms/pseries/event_sources.c40
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c28
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c124
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c295
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c5
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c23
-rw-r--r--arch/powerpc/platforms/pseries/msi.c3
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c345
-rw-r--r--arch/powerpc/platforms/pseries/pci.c1
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c164
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h11
-rw-r--r--arch/powerpc/platforms/pseries/ras.c308
-rw-r--r--arch/powerpc/platforms/pseries/setup.c14
-rw-r--r--arch/powerpc/platforms/pseries/vio.c27
-rw-r--r--arch/powerpc/sysdev/Kconfig5
-rw-r--r--arch/powerpc/sysdev/Makefile3
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c2
-rw-r--r--arch/powerpc/sysdev/fsl_85xx_cache_sram.c8
-rw-r--r--arch/powerpc/sysdev/ipic.c2
-rw-r--r--arch/powerpc/sysdev/msi_bitmap.c4
-rw-r--r--arch/powerpc/sysdev/xics/Makefile1
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig3
-rw-r--r--arch/powerpc/sysdev/xive/Makefile1
-rw-r--r--arch/powerpc/sysdev/xive/common.c7
-rw-r--r--arch/powerpc/sysdev/xive/native.c11
-rw-r--r--arch/powerpc/xmon/Makefile5
-rw-r--r--arch/powerpc/xmon/xmon.c56
285 files changed, 9355 insertions, 4243 deletions
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild
new file mode 100644
index 000000000000..1625a06802ca
--- /dev/null
+++ b/arch/powerpc/Kbuild
@@ -0,0 +1,16 @@
1subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
2
3obj-y += kernel/
4obj-y += mm/
5obj-y += lib/
6obj-y += sysdev/
7obj-y += platforms/
8obj-y += math-emu/
9obj-y += crypto/
10obj-y += net/
11
12obj-$(CONFIG_XMON) += xmon/
13obj-$(CONFIG_KVM) += kvm/
14
15obj-$(CONFIG_PERF_EVENTS) += perf/
16obj-$(CONFIG_KEXEC_FILE) += purgatory/
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a80669209155..2d51b2bd4aa1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,7 +137,7 @@ config PPC
137 select ARCH_HAS_PMEM_API if PPC64 137 select ARCH_HAS_PMEM_API if PPC64
138 select ARCH_HAS_PTE_SPECIAL 138 select ARCH_HAS_PTE_SPECIAL
139 select ARCH_HAS_MEMBARRIER_CALLBACKS 139 select ARCH_HAS_MEMBARRIER_CALLBACKS
140 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE 140 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
141 select ARCH_HAS_SG_CHAIN 141 select ARCH_HAS_SG_CHAIN
142 select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) 142 select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
143 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 143 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@@ -180,6 +180,8 @@ config PPC
180 select HAVE_ARCH_SECCOMP_FILTER 180 select HAVE_ARCH_SECCOMP_FILTER
181 select HAVE_ARCH_TRACEHOOK 181 select HAVE_ARCH_TRACEHOOK
182 select HAVE_CBPF_JIT if !PPC64 182 select HAVE_CBPF_JIT if !PPC64
183 select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
184 select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
183 select HAVE_CONTEXT_TRACKING if PPC64 185 select HAVE_CONTEXT_TRACKING if PPC64
184 select HAVE_DEBUG_KMEMLEAK 186 select HAVE_DEBUG_KMEMLEAK
185 select HAVE_DEBUG_STACKOVERFLOW 187 select HAVE_DEBUG_STACKOVERFLOW
@@ -188,6 +190,7 @@ config PPC
188 select HAVE_EBPF_JIT if PPC64 190 select HAVE_EBPF_JIT if PPC64
189 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) 191 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
190 select HAVE_FTRACE_MCOUNT_RECORD 192 select HAVE_FTRACE_MCOUNT_RECORD
193 select HAVE_FUNCTION_ERROR_INJECTION
191 select HAVE_FUNCTION_GRAPH_TRACER 194 select HAVE_FUNCTION_GRAPH_TRACER
192 select HAVE_FUNCTION_TRACER 195 select HAVE_FUNCTION_TRACER
193 select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC 196 select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
@@ -203,7 +206,6 @@ config PPC
203 select HAVE_KRETPROBES 206 select HAVE_KRETPROBES
204 select HAVE_LD_DEAD_CODE_DATA_ELIMINATION 207 select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
205 select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS 208 select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
206 select HAVE_MEMBLOCK
207 select HAVE_MEMBLOCK_NODE_MAP 209 select HAVE_MEMBLOCK_NODE_MAP
208 select HAVE_MOD_ARCH_SPECIFIC 210 select HAVE_MOD_ARCH_SPECIFIC
209 select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) 211 select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
@@ -228,7 +230,6 @@ config PPC
228 select MODULES_USE_ELF_RELA 230 select MODULES_USE_ELF_RELA
229 select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE 231 select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
230 select NEED_SG_DMA_LENGTH 232 select NEED_SG_DMA_LENGTH
231 select NO_BOOTMEM
232 select OF 233 select OF
233 select OF_EARLY_FLATTREE 234 select OF_EARLY_FLATTREE
234 select OF_RESERVED_MEM 235 select OF_RESERVED_MEM
@@ -285,12 +286,10 @@ config ARCH_MAY_HAVE_PC_FDC
285 286
286config PPC_UDBG_16550 287config PPC_UDBG_16550
287 bool 288 bool
288 default n
289 289
290config GENERIC_TBSYNC 290config GENERIC_TBSYNC
291 bool 291 bool
292 default y if PPC32 && SMP 292 default y if PPC32 && SMP
293 default n
294 293
295config AUDIT_ARCH 294config AUDIT_ARCH
296 bool 295 bool
@@ -309,13 +308,11 @@ config EPAPR_BOOT
309 bool 308 bool
310 help 309 help
311 Used to allow a board to specify it wants an ePAPR compliant wrapper. 310 Used to allow a board to specify it wants an ePAPR compliant wrapper.
312 default n
313 311
314config DEFAULT_UIMAGE 312config DEFAULT_UIMAGE
315 bool 313 bool
316 help 314 help
317 Used to allow a board to specify it wants a uImage built by default 315 Used to allow a board to specify it wants a uImage built by default
318 default n
319 316
320config ARCH_HIBERNATION_POSSIBLE 317config ARCH_HIBERNATION_POSSIBLE
321 bool 318 bool
@@ -329,11 +326,9 @@ config ARCH_SUSPEND_POSSIBLE
329 326
330config PPC_DCR_NATIVE 327config PPC_DCR_NATIVE
331 bool 328 bool
332 default n
333 329
334config PPC_DCR_MMIO 330config PPC_DCR_MMIO
335 bool 331 bool
336 default n
337 332
338config PPC_DCR 333config PPC_DCR
339 bool 334 bool
@@ -344,7 +339,6 @@ config PPC_OF_PLATFORM_PCI
344 bool 339 bool
345 depends on PCI 340 depends on PCI
346 depends on PPC64 # not supported on 32 bits yet 341 depends on PPC64 # not supported on 32 bits yet
347 default n
348 342
349config ARCH_SUPPORTS_DEBUG_PAGEALLOC 343config ARCH_SUPPORTS_DEBUG_PAGEALLOC
350 depends on PPC32 || PPC_BOOK3S_64 344 depends on PPC32 || PPC_BOOK3S_64
@@ -447,14 +441,12 @@ config PPC_TRANSACTIONAL_MEM
447 depends on SMP 441 depends on SMP
448 select ALTIVEC 442 select ALTIVEC
449 select VSX 443 select VSX
450 default n
451 ---help--- 444 ---help---
452 Support user-mode Transactional Memory on POWERPC. 445 Support user-mode Transactional Memory on POWERPC.
453 446
454config LD_HEAD_STUB_CATCH 447config LD_HEAD_STUB_CATCH
455 bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT 448 bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
456 depends on PPC64 449 depends on PPC64
457 default n
458 help 450 help
459 Very large kernels can cause linker branch stubs to be generated by 451 Very large kernels can cause linker branch stubs to be generated by
460 code in head_64.S, which moves the head text sections out of their 452 code in head_64.S, which moves the head text sections out of their
@@ -557,7 +549,6 @@ config RELOCATABLE
557config RELOCATABLE_TEST 549config RELOCATABLE_TEST
558 bool "Test relocatable kernel" 550 bool "Test relocatable kernel"
559 depends on (PPC64 && RELOCATABLE) 551 depends on (PPC64 && RELOCATABLE)
560 default n
561 help 552 help
562 This runs the relocatable kernel at the address it was initially 553 This runs the relocatable kernel at the address it was initially
563 loaded at, which tends to be non-zero and therefore test the 554 loaded at, which tends to be non-zero and therefore test the
@@ -769,7 +760,6 @@ config PPC_SUBPAGE_PROT
769 760
770config PPC_COPRO_BASE 761config PPC_COPRO_BASE
771 bool 762 bool
772 default n
773 763
774config SCHED_SMT 764config SCHED_SMT
775 bool "SMT (Hyperthreading) scheduler support" 765 bool "SMT (Hyperthreading) scheduler support"
@@ -892,7 +882,6 @@ config PPC_INDIRECT_PCI
892 bool 882 bool
893 depends on PCI 883 depends on PCI
894 default y if 40x || 44x 884 default y if 40x || 44x
895 default n
896 885
897config EISA 886config EISA
898 bool 887 bool
@@ -989,7 +978,6 @@ source "drivers/pcmcia/Kconfig"
989 978
990config HAS_RAPIDIO 979config HAS_RAPIDIO
991 bool 980 bool
992 default n
993 981
994config RAPIDIO 982config RAPIDIO
995 tristate "RapidIO support" 983 tristate "RapidIO support"
@@ -1012,7 +1000,6 @@ endmenu
1012 1000
1013config NONSTATIC_KERNEL 1001config NONSTATIC_KERNEL
1014 bool 1002 bool
1015 default n
1016 1003
1017menu "Advanced setup" 1004menu "Advanced setup"
1018 depends on PPC32 1005 depends on PPC32
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index fd63cd914a74..f4961fbcb48d 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -2,7 +2,6 @@
2 2
3config PPC_DISABLE_WERROR 3config PPC_DISABLE_WERROR
4 bool "Don't build arch/powerpc code with -Werror" 4 bool "Don't build arch/powerpc code with -Werror"
5 default n
6 help 5 help
7 This option tells the compiler NOT to build the code under 6 This option tells the compiler NOT to build the code under
8 arch/powerpc with the -Werror flag (which means warnings 7 arch/powerpc with the -Werror flag (which means warnings
@@ -56,7 +55,6 @@ config PPC_EMULATED_STATS
56config CODE_PATCHING_SELFTEST 55config CODE_PATCHING_SELFTEST
57 bool "Run self-tests of the code-patching code" 56 bool "Run self-tests of the code-patching code"
58 depends on DEBUG_KERNEL 57 depends on DEBUG_KERNEL
59 default n
60 58
61config JUMP_LABEL_FEATURE_CHECKS 59config JUMP_LABEL_FEATURE_CHECKS
62 bool "Enable use of jump label for cpu/mmu_has_feature()" 60 bool "Enable use of jump label for cpu/mmu_has_feature()"
@@ -70,7 +68,6 @@ config JUMP_LABEL_FEATURE_CHECKS
70config JUMP_LABEL_FEATURE_CHECK_DEBUG 68config JUMP_LABEL_FEATURE_CHECK_DEBUG
71 bool "Do extra check on feature fixup calls" 69 bool "Do extra check on feature fixup calls"
72 depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS 70 depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS
73 default n
74 help 71 help
75 This tries to catch incorrect usage of cpu_has_feature() and 72 This tries to catch incorrect usage of cpu_has_feature() and
76 mmu_has_feature() in the code. 73 mmu_has_feature() in the code.
@@ -80,16 +77,13 @@ config JUMP_LABEL_FEATURE_CHECK_DEBUG
80config FTR_FIXUP_SELFTEST 77config FTR_FIXUP_SELFTEST
81 bool "Run self-tests of the feature-fixup code" 78 bool "Run self-tests of the feature-fixup code"
82 depends on DEBUG_KERNEL 79 depends on DEBUG_KERNEL
83 default n
84 80
85config MSI_BITMAP_SELFTEST 81config MSI_BITMAP_SELFTEST
86 bool "Run self-tests of the MSI bitmap code" 82 bool "Run self-tests of the MSI bitmap code"
87 depends on DEBUG_KERNEL 83 depends on DEBUG_KERNEL
88 default n
89 84
90config PPC_IRQ_SOFT_MASK_DEBUG 85config PPC_IRQ_SOFT_MASK_DEBUG
91 bool "Include extra checks for powerpc irq soft masking" 86 bool "Include extra checks for powerpc irq soft masking"
92 default n
93 87
94config XMON 88config XMON
95 bool "Include xmon kernel debugger" 89 bool "Include xmon kernel debugger"
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 11a1acba164a..17be664dafa2 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -112,6 +112,13 @@ KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION)
112KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET) 112KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET)
113endif 113endif
114 114
115cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard=tls
116ifdef CONFIG_PPC64
117cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r13
118else
119cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r2
120endif
121
115LDFLAGS_vmlinux-y := -Bstatic 122LDFLAGS_vmlinux-y := -Bstatic
116LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie 123LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
117LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) 124LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
@@ -160,8 +167,17 @@ else
160CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 167CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
161endif 168endif
162 169
170ifdef CONFIG_FUNCTION_TRACER
171CC_FLAGS_FTRACE := -pg
163ifdef CONFIG_MPROFILE_KERNEL 172ifdef CONFIG_MPROFILE_KERNEL
164 CC_FLAGS_FTRACE := -pg -mprofile-kernel 173CC_FLAGS_FTRACE += -mprofile-kernel
174endif
175# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
176# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
177# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
178ifneq ($(cc-name),clang)
179CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
180endif
165endif 181endif
166 182
167CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) 183CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU))
@@ -229,16 +245,15 @@ ifdef CONFIG_6xx
229KBUILD_CFLAGS += -mcpu=powerpc 245KBUILD_CFLAGS += -mcpu=powerpc
230endif 246endif
231 247
232# Work around a gcc code-gen bug with -fno-omit-frame-pointer.
233ifdef CONFIG_FUNCTION_TRACER
234KBUILD_CFLAGS += -mno-sched-epilog
235endif
236
237cpu-as-$(CONFIG_4xx) += -Wa,-m405 248cpu-as-$(CONFIG_4xx) += -Wa,-m405
238cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) 249cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec)
239cpu-as-$(CONFIG_E200) += -Wa,-me200 250cpu-as-$(CONFIG_E200) += -Wa,-me200
240cpu-as-$(CONFIG_E500) += -Wa,-me500 251cpu-as-$(CONFIG_E500) += -Wa,-me500
241cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 252
253# When using '-many -mpower4' gas will first try and find a matching power4
254# mnemonic and failing that it will allow any valid mnemonic that GAS knows
255# about. GCC will pass -many to GAS when assembling, clang does not.
256cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many
242cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) 257cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc)
243 258
244KBUILD_AFLAGS += $(cpu-as-y) 259KBUILD_AFLAGS += $(cpu-as-y)
@@ -258,18 +273,8 @@ head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o
258head-$(CONFIG_ALTIVEC) += arch/powerpc/kernel/vector.o 273head-$(CONFIG_ALTIVEC) += arch/powerpc/kernel/vector.o
259head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += arch/powerpc/kernel/prom_init.o 274head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += arch/powerpc/kernel/prom_init.o
260 275
261core-y += arch/powerpc/kernel/ \ 276# See arch/powerpc/Kbuild for content of core part of the kernel
262 arch/powerpc/mm/ \ 277core-y += arch/powerpc/
263 arch/powerpc/lib/ \
264 arch/powerpc/sysdev/ \
265 arch/powerpc/platforms/ \
266 arch/powerpc/math-emu/ \
267 arch/powerpc/crypto/ \
268 arch/powerpc/net/
269core-$(CONFIG_XMON) += arch/powerpc/xmon/
270core-$(CONFIG_KVM) += arch/powerpc/kvm/
271core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/
272core-$(CONFIG_KEXEC_FILE) += arch/powerpc/purgatory/
273 278
274drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ 279drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
275 280
@@ -293,9 +298,6 @@ $(BOOT_TARGETS2): vmlinux
293bootwrapper_install: 298bootwrapper_install:
294 $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) 299 $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
295 300
296%.dtb: scripts
297 $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
298
299# Used to create 'merged defconfigs' 301# Used to create 'merged defconfigs'
300# To use it $(call) it with the first argument as the base defconfig 302# To use it $(call) it with the first argument as the base defconfig
301# and the second argument as a space separated list of .config files to merge, 303# and the second argument as a space separated list of .config files to merge,
@@ -400,40 +402,20 @@ archclean:
400 402
401archprepare: checkbin 403archprepare: checkbin
402 404
403# Use the file '.tmp_gas_check' for binutils tests, as gas won't output 405ifdef CONFIG_STACKPROTECTOR
404# to stdout and these checks are run even on install targets. 406prepare: stack_protector_prepare
405TOUT := .tmp_gas_check 407
408stack_protector_prepare: prepare0
409ifdef CONFIG_PPC64
410 $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h))
411else
412 $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h))
413endif
414endif
406 415
407# Check gcc and binutils versions: 416# Check toolchain versions:
408# - gcc-3.4 and binutils-2.14 are a fatal combination 417# - gcc-4.6 is the minimum kernel-wide version so nothing required.
409# - Require gcc 4.0 or above on 64-bit
410# - gcc-4.2.0 has issues compiling modules on 64-bit
411checkbin: 418checkbin:
412 @if test "$(cc-name)" != "clang" \
413 && test "$(cc-version)" = "0304" ; then \
414 if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
415 echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
416 echo 'correctly with gcc-3.4 and your version of binutils.'; \
417 echo '*** Please upgrade your binutils or downgrade your gcc'; \
418 false; \
419 fi ; \
420 fi
421 @if test "$(cc-name)" != "clang" \
422 && test "$(cc-version)" -lt "0400" \
423 && test "x${CONFIG_PPC64}" = "xy" ; then \
424 echo -n "Sorry, GCC v4.0 or above is required to build " ; \
425 echo "the 64-bit powerpc kernel." ; \
426 false ; \
427 fi
428 @if test "$(cc-name)" != "clang" \
429 && test "$(cc-fullversion)" = "040200" \
430 && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
431 echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
432 echo 'kernel with modules enabled.' ; \
433 echo -n '*** Please use a different GCC version or ' ; \
434 echo 'disable kernel modules' ; \
435 false ; \
436 fi
437 @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ 419 @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \
438 && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \ 420 && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \
439 echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \ 421 echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
@@ -441,7 +423,3 @@ checkbin:
441 echo -n '*** Please use a different binutils version.' ; \ 423 echo -n '*** Please use a different binutils version.' ; \
442 false ; \ 424 false ; \
443 fi 425 fi
444
445
446CLEAN_FILES += $(TOUT)
447
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index f92d0530ceb1..32034a0cc554 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -44,4 +44,5 @@ fdt_sw.c
44fdt_wip.c 44fdt_wip.c
45libfdt.h 45libfdt.h
46libfdt_internal.h 46libfdt_internal.h
47autoconf.h
47 48
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 0fb96c26136f..39354365f54a 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -32,8 +32,8 @@ else
32endif 32endif
33 33
34BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ 34BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
35 -fno-strict-aliasing -Os -msoft-float -pipe \ 35 -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
36 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ 36 -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
37 -D$(compress-y) 37 -D$(compress-y)
38 38
39ifdef CONFIG_PPC64_BOOT_WRAPPER 39ifdef CONFIG_PPC64_BOOT_WRAPPER
@@ -197,9 +197,14 @@ $(obj)/empty.c:
197$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S 197$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
198 $(Q)cp $< $@ 198 $(Q)cp $< $@
199 199
200$(obj)/serial.c: $(obj)/autoconf.h
201
202$(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/%
203 $(Q)cp $< $@
204
200clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ 205clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
201 $(zlib-decomp-) $(libfdt) $(libfdtheader) \ 206 $(zlib-decomp-) $(libfdt) $(libfdtheader) \
202 empty.c zImage.coff.lds zImage.ps3.lds zImage.lds 207 autoconf.h empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
203 208
204quiet_cmd_bootcc = BOOTCC $@ 209quiet_cmd_bootcc = BOOTCC $@
205 cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< 210 cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
@@ -304,9 +309,9 @@ image-$(CONFIG_PPC_ADDER875) += cuImage.adder875-uboot \
304 dtbImage.adder875-redboot 309 dtbImage.adder875-redboot
305 310
306# Board ports in arch/powerpc/platform/52xx/Kconfig 311# Board ports in arch/powerpc/platform/52xx/Kconfig
307image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200 lite5200.dtb 312image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200
308image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b lite5200b.dtb 313image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b
309image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200 media5200.dtb 314image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200
310 315
311# Board ports in arch/powerpc/platform/82xx/Kconfig 316# Board ports in arch/powerpc/platform/82xx/Kconfig
312image-$(CONFIG_MPC8272_ADS) += cuImage.mpc8272ads 317image-$(CONFIG_MPC8272_ADS) += cuImage.mpc8272ads
@@ -381,11 +386,11 @@ $(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperb
381 $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@)) 386 $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
382 387
383# dtbImage% - a dtbImage is a zImage with an embedded device tree blob 388# dtbImage% - a dtbImage is a zImage with an embedded device tree blob
384$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE 389$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
385 $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) 390 $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
386 391
387$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE 392$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
388 $(call if_changed,wrap,$*,,$(obj)/$*.dtb) 393 $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb)
389 394
390# This cannot be in the root of $(src) as the zImage rule always adds a $(obj) 395# This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
391# prefix 396# prefix
@@ -395,36 +400,33 @@ $(obj)/vmlinux.strip: vmlinux
395$(obj)/uImage: vmlinux $(wrapperbits) FORCE 400$(obj)/uImage: vmlinux $(wrapperbits) FORCE
396 $(call if_changed,wrap,uboot) 401 $(call if_changed,wrap,uboot)
397 402
398$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE 403$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
399 $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) 404 $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
400
401$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
402 $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb)
403 405
404$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE 406$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
405 $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) 407 $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb)
406 408
407$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE 409$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
408 $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb) 410 $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
409 411
410$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE 412$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
411 $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) 413 $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb)
412 414
413$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE 415$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
414 $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb) 416 $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
415 417
416$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE 418$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
417 $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) 419 $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb)
418 420
419$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE 421$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
420 $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb) 422 $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
421 423
422# Rule to build device tree blobs 424$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
423$(obj)/%.dtb: $(src)/dts/%.dts FORCE 425 $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb)
424 $(call if_changed_dep,dtc)
425 426
426$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE 427# Needed for the above targets to work with dts/fsl/ files
427 $(call if_changed_dep,dtc) 428$(obj)/dts/%.dtb: $(obj)/dts/fsl/%.dtb
429 @cp $< $@
428 430
429# If there isn't a platform selected then just strip the vmlinux. 431# If there isn't a platform selected then just strip the vmlinux.
430ifeq (,$(image-y)) 432ifeq (,$(image-y))
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index dcf2f15e6797..32dfe6d083f3 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -47,8 +47,10 @@ p_end: .long _end
47p_pstack: .long _platform_stack_top 47p_pstack: .long _platform_stack_top
48#endif 48#endif
49 49
50 .weak _zimage_start
51 .globl _zimage_start 50 .globl _zimage_start
51 /* Clang appears to require the .weak directive to be after the symbol
52 * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */
53 .weak _zimage_start
52_zimage_start: 54_zimage_start:
53 .globl _zimage_start_lib 55 .globl _zimage_start_lib
54_zimage_start_lib: 56_zimage_start_lib:
diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile
new file mode 100644
index 000000000000..fb335d05aae8
--- /dev/null
+++ b/arch/powerpc/boot/dts/Makefile
@@ -0,0 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0
2
3subdir-y += fsl
4
5dtstree := $(srctree)/$(src)
6dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/dts/fsl/Makefile b/arch/powerpc/boot/dts/fsl/Makefile
new file mode 100644
index 000000000000..3bae982641e9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/Makefile
@@ -0,0 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
2
3dtstree := $(srctree)/$(src)
4dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index 2a0c8b1bf147..2abc8e83b95e 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -5,6 +5,8 @@
5#include <types.h> 5#include <types.h>
6#include <string.h> 6#include <string.h>
7 7
8#define INT_MAX ((int)(~0U>>1))
9
8#include "of.h" 10#include "of.h"
9 11
10typedef unsigned long uintptr_t; 12typedef unsigned long uintptr_t;
diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c
index 0272570d02de..dfb199ef5b94 100644
--- a/arch/powerpc/boot/opal.c
+++ b/arch/powerpc/boot/opal.c
@@ -13,8 +13,6 @@
13#include <libfdt.h> 13#include <libfdt.h>
14#include "../include/asm/opal-api.h" 14#include "../include/asm/opal-api.h"
15 15
16#ifdef CONFIG_PPC64_BOOT_WRAPPER
17
18/* Global OPAL struct used by opal-call.S */ 16/* Global OPAL struct used by opal-call.S */
19struct opal { 17struct opal {
20 u64 base; 18 u64 base;
@@ -101,9 +99,3 @@ int opal_console_init(void *devp, struct serial_console_data *scdp)
101 99
102 return 0; 100 return 0;
103} 101}
104#else
105int opal_console_init(void *devp, struct serial_console_data *scdp)
106{
107 return -1;
108}
109#endif /* __powerpc64__ */
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index 48e3743faedf..f045f8494bf9 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -18,6 +18,7 @@
18#include "stdio.h" 18#include "stdio.h"
19#include "io.h" 19#include "io.h"
20#include "ops.h" 20#include "ops.h"
21#include "autoconf.h"
21 22
22static int serial_open(void) 23static int serial_open(void)
23{ 24{
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 67c39f4acede..f686cc1eac0b 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -262,3 +262,4 @@ CONFIG_CRYPTO_SERPENT=m
262CONFIG_CRYPTO_TEA=m 262CONFIG_CRYPTO_TEA=m
263CONFIG_CRYPTO_TWOFISH=m 263CONFIG_CRYPTO_TWOFISH=m
264# CONFIG_CRYPTO_HW is not set 264# CONFIG_CRYPTO_HW is not set
265CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index 59e47ec85336..f71eddafb02f 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -112,3 +112,4 @@ CONFIG_PPC_EARLY_DEBUG=y
112CONFIG_CRYPTO_ECB=m 112CONFIG_CRYPTO_ECB=m
113CONFIG_CRYPTO_PCBC=m 113CONFIG_CRYPTO_PCBC=m
114# CONFIG_CRYPTO_HW is not set 114# CONFIG_CRYPTO_HW is not set
115CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 6ab34e60495f..ef2ef98d3f28 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -44,6 +44,9 @@ CONFIG_PPC_MEMTRACE=y
44# CONFIG_PPC_PSERIES is not set 44# CONFIG_PPC_PSERIES is not set
45# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set 45# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
46CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y 46CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
47CONFIG_CPU_FREQ_GOV_POWERSAVE=y
48CONFIG_CPU_FREQ_GOV_USERSPACE=y
49CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
47CONFIG_CPU_IDLE=y 50CONFIG_CPU_IDLE=y
48CONFIG_HZ_100=y 51CONFIG_HZ_100=y
49CONFIG_BINFMT_MISC=m 52CONFIG_BINFMT_MISC=m
@@ -350,3 +353,4 @@ CONFIG_VIRTUALIZATION=y
350CONFIG_KVM_BOOK3S_64=m 353CONFIG_KVM_BOOK3S_64=m
351CONFIG_KVM_BOOK3S_64_HV=m 354CONFIG_KVM_BOOK3S_64_HV=m
352CONFIG_VHOST_NET=m 355CONFIG_VHOST_NET=m
356CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 5033e630afea..f2515674a1e2 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -40,6 +40,9 @@ CONFIG_PS3_LPM=m
40CONFIG_PPC_IBM_CELL_BLADE=y 40CONFIG_PPC_IBM_CELL_BLADE=y
41CONFIG_RTAS_FLASH=m 41CONFIG_RTAS_FLASH=m
42CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y 42CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
43CONFIG_CPU_FREQ_GOV_POWERSAVE=y
44CONFIG_CPU_FREQ_GOV_USERSPACE=y
45CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
43CONFIG_CPU_FREQ_PMAC64=y 46CONFIG_CPU_FREQ_PMAC64=y
44CONFIG_HZ_100=y 47CONFIG_HZ_100=y
45CONFIG_BINFMT_MISC=m 48CONFIG_BINFMT_MISC=m
@@ -365,3 +368,4 @@ CONFIG_VIRTUALIZATION=y
365CONFIG_KVM_BOOK3S_64=m 368CONFIG_KVM_BOOK3S_64=m
366CONFIG_KVM_BOOK3S_64_HV=m 369CONFIG_KVM_BOOK3S_64_HV=m
367CONFIG_VHOST_NET=m 370CONFIG_VHOST_NET=m
371CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 187e2f7c12c8..cf8d55f67272 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -171,3 +171,4 @@ CONFIG_CRYPTO_PCBC=m
171CONFIG_CRYPTO_MICHAEL_MIC=m 171CONFIG_CRYPTO_MICHAEL_MIC=m
172CONFIG_CRYPTO_SALSA20=m 172CONFIG_CRYPTO_SALSA20=m
173CONFIG_CRYPTO_LZO=m 173CONFIG_CRYPTO_LZO=m
174CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 0dd5cf7b566d..5e09a40cbcbf 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -325,3 +325,4 @@ CONFIG_VIRTUALIZATION=y
325CONFIG_KVM_BOOK3S_64=m 325CONFIG_KVM_BOOK3S_64=m
326CONFIG_KVM_BOOK3S_64_HV=m 326CONFIG_KVM_BOOK3S_64_HV=m
327CONFIG_VHOST_NET=m 327CONFIG_VHOST_NET=m
328CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 6bd5e7261335..cfdd08897a06 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -3,20 +3,17 @@ CONFIG_ALTIVEC=y
3CONFIG_VSX=y 3CONFIG_VSX=y
4CONFIG_NR_CPUS=2048 4CONFIG_NR_CPUS=2048
5CONFIG_CPU_LITTLE_ENDIAN=y 5CONFIG_CPU_LITTLE_ENDIAN=y
6CONFIG_KERNEL_XZ=y
6# CONFIG_SWAP is not set 7# CONFIG_SWAP is not set
7CONFIG_SYSVIPC=y 8CONFIG_SYSVIPC=y
8CONFIG_POSIX_MQUEUE=y 9CONFIG_POSIX_MQUEUE=y
9# CONFIG_CROSS_MEMORY_ATTACH is not set 10# CONFIG_CROSS_MEMORY_ATTACH is not set
10CONFIG_NO_HZ=y 11CONFIG_NO_HZ=y
11CONFIG_HIGH_RES_TIMERS=y 12CONFIG_HIGH_RES_TIMERS=y
12CONFIG_TASKSTATS=y 13# CONFIG_CPU_ISOLATION is not set
13CONFIG_TASK_DELAY_ACCT=y
14CONFIG_TASK_XACCT=y
15CONFIG_TASK_IO_ACCOUNTING=y
16CONFIG_IKCONFIG=y 14CONFIG_IKCONFIG=y
17CONFIG_IKCONFIG_PROC=y 15CONFIG_IKCONFIG_PROC=y
18CONFIG_LOG_BUF_SHIFT=20 16CONFIG_LOG_BUF_SHIFT=20
19CONFIG_RELAY=y
20CONFIG_BLK_DEV_INITRD=y 17CONFIG_BLK_DEV_INITRD=y
21# CONFIG_RD_GZIP is not set 18# CONFIG_RD_GZIP is not set
22# CONFIG_RD_BZIP2 is not set 19# CONFIG_RD_BZIP2 is not set
@@ -24,8 +21,14 @@ CONFIG_BLK_DEV_INITRD=y
24# CONFIG_RD_LZO is not set 21# CONFIG_RD_LZO is not set
25# CONFIG_RD_LZ4 is not set 22# CONFIG_RD_LZ4 is not set
26CONFIG_CC_OPTIMIZE_FOR_SIZE=y 23CONFIG_CC_OPTIMIZE_FOR_SIZE=y
24CONFIG_EXPERT=y
25# CONFIG_SGETMASK_SYSCALL is not set
26# CONFIG_SYSFS_SYSCALL is not set
27# CONFIG_SHMEM is not set
28# CONFIG_AIO is not set
27CONFIG_PERF_EVENTS=y 29CONFIG_PERF_EVENTS=y
28# CONFIG_COMPAT_BRK is not set 30# CONFIG_COMPAT_BRK is not set
31CONFIG_SLAB_FREELIST_HARDENED=y
29CONFIG_JUMP_LABEL=y 32CONFIG_JUMP_LABEL=y
30CONFIG_STRICT_KERNEL_RWX=y 33CONFIG_STRICT_KERNEL_RWX=y
31CONFIG_MODULES=y 34CONFIG_MODULES=y
@@ -35,7 +38,9 @@ CONFIG_MODULE_SIG_FORCE=y
35CONFIG_MODULE_SIG_SHA512=y 38CONFIG_MODULE_SIG_SHA512=y
36CONFIG_PARTITION_ADVANCED=y 39CONFIG_PARTITION_ADVANCED=y
37# CONFIG_IOSCHED_DEADLINE is not set 40# CONFIG_IOSCHED_DEADLINE is not set
41# CONFIG_PPC_VAS is not set
38# CONFIG_PPC_PSERIES is not set 42# CONFIG_PPC_PSERIES is not set
43# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
39CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y 44CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
40CONFIG_CPU_IDLE=y 45CONFIG_CPU_IDLE=y
41CONFIG_HZ_100=y 46CONFIG_HZ_100=y
@@ -48,8 +53,9 @@ CONFIG_NUMA=y
48CONFIG_PPC_64K_PAGES=y 53CONFIG_PPC_64K_PAGES=y
49CONFIG_SCHED_SMT=y 54CONFIG_SCHED_SMT=y
50CONFIG_CMDLINE_BOOL=y 55CONFIG_CMDLINE_BOOL=y
51CONFIG_CMDLINE="console=tty0 console=hvc0 powersave=off" 56CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
52# CONFIG_SECCOMP is not set 57# CONFIG_SECCOMP is not set
58# CONFIG_PPC_MEM_KEYS is not set
53CONFIG_NET=y 59CONFIG_NET=y
54CONFIG_PACKET=y 60CONFIG_PACKET=y
55CONFIG_UNIX=y 61CONFIG_UNIX=y
@@ -60,7 +66,6 @@ CONFIG_SYN_COOKIES=y
60# CONFIG_INET_XFRM_MODE_TRANSPORT is not set 66# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
61# CONFIG_INET_XFRM_MODE_TUNNEL is not set 67# CONFIG_INET_XFRM_MODE_TUNNEL is not set
62# CONFIG_INET_XFRM_MODE_BEET is not set 68# CONFIG_INET_XFRM_MODE_BEET is not set
63# CONFIG_IPV6 is not set
64CONFIG_DNS_RESOLVER=y 69CONFIG_DNS_RESOLVER=y
65# CONFIG_WIRELESS is not set 70# CONFIG_WIRELESS is not set
66CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 71CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -73,8 +78,10 @@ CONFIG_BLK_DEV_RAM=y
73CONFIG_BLK_DEV_RAM_SIZE=65536 78CONFIG_BLK_DEV_RAM_SIZE=65536
74CONFIG_VIRTIO_BLK=m 79CONFIG_VIRTIO_BLK=m
75CONFIG_BLK_DEV_NVME=m 80CONFIG_BLK_DEV_NVME=m
76CONFIG_EEPROM_AT24=y 81CONFIG_NVME_MULTIPATH=y
82CONFIG_EEPROM_AT24=m
77# CONFIG_CXL is not set 83# CONFIG_CXL is not set
84# CONFIG_OCXL is not set
78CONFIG_BLK_DEV_SD=m 85CONFIG_BLK_DEV_SD=m
79CONFIG_BLK_DEV_SR=m 86CONFIG_BLK_DEV_SR=m
80CONFIG_BLK_DEV_SR_VENDOR=y 87CONFIG_BLK_DEV_SR_VENDOR=y
@@ -85,7 +92,6 @@ CONFIG_SCSI_FC_ATTRS=y
85CONFIG_SCSI_CXGB3_ISCSI=m 92CONFIG_SCSI_CXGB3_ISCSI=m
86CONFIG_SCSI_CXGB4_ISCSI=m 93CONFIG_SCSI_CXGB4_ISCSI=m
87CONFIG_SCSI_BNX2_ISCSI=m 94CONFIG_SCSI_BNX2_ISCSI=m
88CONFIG_BE2ISCSI=m
89CONFIG_SCSI_AACRAID=m 95CONFIG_SCSI_AACRAID=m
90CONFIG_MEGARAID_NEWGEN=y 96CONFIG_MEGARAID_NEWGEN=y
91CONFIG_MEGARAID_MM=m 97CONFIG_MEGARAID_MM=m
@@ -102,7 +108,7 @@ CONFIG_SCSI_VIRTIO=m
102CONFIG_SCSI_DH=y 108CONFIG_SCSI_DH=y
103CONFIG_SCSI_DH_ALUA=m 109CONFIG_SCSI_DH_ALUA=m
104CONFIG_ATA=y 110CONFIG_ATA=y
105CONFIG_SATA_AHCI=y 111CONFIG_SATA_AHCI=m
106# CONFIG_ATA_SFF is not set 112# CONFIG_ATA_SFF is not set
107CONFIG_MD=y 113CONFIG_MD=y
108CONFIG_BLK_DEV_MD=m 114CONFIG_BLK_DEV_MD=m
@@ -119,25 +125,72 @@ CONFIG_DM_SNAPSHOT=m
119CONFIG_DM_MIRROR=m 125CONFIG_DM_MIRROR=m
120CONFIG_DM_ZERO=m 126CONFIG_DM_ZERO=m
121CONFIG_DM_MULTIPATH=m 127CONFIG_DM_MULTIPATH=m
128# CONFIG_NET_VENDOR_3COM is not set
129# CONFIG_NET_VENDOR_ADAPTEC is not set
130# CONFIG_NET_VENDOR_AGERE is not set
131# CONFIG_NET_VENDOR_ALACRITECH is not set
122CONFIG_ACENIC=m 132CONFIG_ACENIC=m
123CONFIG_ACENIC_OMIT_TIGON_I=y 133CONFIG_ACENIC_OMIT_TIGON_I=y
124CONFIG_TIGON3=y 134# CONFIG_NET_VENDOR_AMAZON is not set
135# CONFIG_NET_VENDOR_AMD is not set
136# CONFIG_NET_VENDOR_AQUANTIA is not set
137# CONFIG_NET_VENDOR_ARC is not set
138# CONFIG_NET_VENDOR_ATHEROS is not set
139CONFIG_TIGON3=m
125CONFIG_BNX2X=m 140CONFIG_BNX2X=m
126CONFIG_CHELSIO_T1=y 141# CONFIG_NET_VENDOR_BROCADE is not set
142# CONFIG_NET_CADENCE is not set
143# CONFIG_NET_VENDOR_CAVIUM is not set
144CONFIG_CHELSIO_T1=m
145# CONFIG_NET_VENDOR_CISCO is not set
146# CONFIG_NET_VENDOR_CORTINA is not set
147# CONFIG_NET_VENDOR_DEC is not set
148# CONFIG_NET_VENDOR_DLINK is not set
127CONFIG_BE2NET=m 149CONFIG_BE2NET=m
128CONFIG_S2IO=m 150# CONFIG_NET_VENDOR_EZCHIP is not set
129CONFIG_E100=m 151# CONFIG_NET_VENDOR_HP is not set
152# CONFIG_NET_VENDOR_HUAWEI is not set
130CONFIG_E1000=m 153CONFIG_E1000=m
131CONFIG_E1000E=m 154CONFIG_IGB=m
132CONFIG_IXGB=m 155CONFIG_IXGB=m
133CONFIG_IXGBE=m 156CONFIG_IXGBE=m
157CONFIG_I40E=m
158CONFIG_S2IO=m
159# CONFIG_NET_VENDOR_MARVELL is not set
134CONFIG_MLX4_EN=m 160CONFIG_MLX4_EN=m
161# CONFIG_MLX4_CORE_GEN2 is not set
135CONFIG_MLX5_CORE=m 162CONFIG_MLX5_CORE=m
136CONFIG_MLX5_CORE_EN=y 163# CONFIG_NET_VENDOR_MICREL is not set
137CONFIG_MYRI10GE=m 164CONFIG_MYRI10GE=m
165# CONFIG_NET_VENDOR_NATSEMI is not set
166# CONFIG_NET_VENDOR_NETRONOME is not set
167# CONFIG_NET_VENDOR_NI is not set
168# CONFIG_NET_VENDOR_NVIDIA is not set
169# CONFIG_NET_VENDOR_OKI is not set
170# CONFIG_NET_PACKET_ENGINE is not set
138CONFIG_QLGE=m 171CONFIG_QLGE=m
139CONFIG_NETXEN_NIC=m 172CONFIG_NETXEN_NIC=m
173# CONFIG_NET_VENDOR_QUALCOMM is not set
174# CONFIG_NET_VENDOR_RDC is not set
175# CONFIG_NET_VENDOR_REALTEK is not set
176# CONFIG_NET_VENDOR_RENESAS is not set
177# CONFIG_NET_VENDOR_ROCKER is not set
178# CONFIG_NET_VENDOR_SAMSUNG is not set
179# CONFIG_NET_VENDOR_SEEQ is not set
140CONFIG_SFC=m 180CONFIG_SFC=m
181# CONFIG_NET_VENDOR_SILAN is not set
182# CONFIG_NET_VENDOR_SIS is not set
183# CONFIG_NET_VENDOR_SMSC is not set
184# CONFIG_NET_VENDOR_SOCIONEXT is not set
185# CONFIG_NET_VENDOR_STMICRO is not set
186# CONFIG_NET_VENDOR_SUN is not set
187# CONFIG_NET_VENDOR_SYNOPSYS is not set
188# CONFIG_NET_VENDOR_TEHUTI is not set
189# CONFIG_NET_VENDOR_TI is not set
190# CONFIG_NET_VENDOR_VIA is not set
191# CONFIG_NET_VENDOR_WIZNET is not set
192# CONFIG_NET_VENDOR_XILINX is not set
193CONFIG_PHYLIB=y
141# CONFIG_USB_NET_DRIVERS is not set 194# CONFIG_USB_NET_DRIVERS is not set
142# CONFIG_WLAN is not set 195# CONFIG_WLAN is not set
143CONFIG_INPUT_EVDEV=y 196CONFIG_INPUT_EVDEV=y
@@ -149,39 +202,51 @@ CONFIG_SERIAL_8250_CONSOLE=y
149CONFIG_IPMI_HANDLER=y 202CONFIG_IPMI_HANDLER=y
150CONFIG_IPMI_DEVICE_INTERFACE=y 203CONFIG_IPMI_DEVICE_INTERFACE=y
151CONFIG_IPMI_POWERNV=y 204CONFIG_IPMI_POWERNV=y
205CONFIG_IPMI_WATCHDOG=y
152CONFIG_HW_RANDOM=y 206CONFIG_HW_RANDOM=y
207CONFIG_TCG_TPM=y
153CONFIG_TCG_TIS_I2C_NUVOTON=y 208CONFIG_TCG_TIS_I2C_NUVOTON=y
209CONFIG_I2C=y
154# CONFIG_I2C_COMPAT is not set 210# CONFIG_I2C_COMPAT is not set
155CONFIG_I2C_CHARDEV=y 211CONFIG_I2C_CHARDEV=y
156# CONFIG_I2C_HELPER_AUTO is not set 212# CONFIG_I2C_HELPER_AUTO is not set
157CONFIG_DRM=y 213CONFIG_I2C_ALGOBIT=y
158CONFIG_DRM_RADEON=y 214CONFIG_I2C_OPAL=m
215CONFIG_PPS=y
216CONFIG_SENSORS_IBMPOWERNV=m
217CONFIG_DRM=m
159CONFIG_DRM_AST=m 218CONFIG_DRM_AST=m
219CONFIG_FB=y
160CONFIG_FIRMWARE_EDID=y 220CONFIG_FIRMWARE_EDID=y
161CONFIG_FB_MODE_HELPERS=y
162CONFIG_FB_OF=y
163CONFIG_FB_MATROX=y
164CONFIG_FB_MATROX_MILLENIUM=y
165CONFIG_FB_MATROX_MYSTIQUE=y
166CONFIG_FB_MATROX_G=y
167# CONFIG_LCD_CLASS_DEVICE is not set
168# CONFIG_BACKLIGHT_GENERIC is not set
169# CONFIG_VGA_CONSOLE is not set 221# CONFIG_VGA_CONSOLE is not set
222CONFIG_FRAMEBUFFER_CONSOLE=y
170CONFIG_LOGO=y 223CONFIG_LOGO=y
171# CONFIG_LOGO_LINUX_MONO is not set 224# CONFIG_LOGO_LINUX_MONO is not set
172# CONFIG_LOGO_LINUX_VGA16 is not set 225# CONFIG_LOGO_LINUX_VGA16 is not set
226CONFIG_HID_GENERIC=m
227CONFIG_HID_A4TECH=y
228CONFIG_HID_BELKIN=y
229CONFIG_HID_CHERRY=y
230CONFIG_HID_CHICONY=y
231CONFIG_HID_CYPRESS=y
232CONFIG_HID_EZKEY=y
233CONFIG_HID_ITE=y
234CONFIG_HID_KENSINGTON=y
235CONFIG_HID_LOGITECH=y
236CONFIG_HID_MICROSOFT=y
237CONFIG_HID_MONTEREY=y
173CONFIG_USB_HIDDEV=y 238CONFIG_USB_HIDDEV=y
174CONFIG_USB=y 239CONFIG_USB=m
175CONFIG_USB_MON=y 240CONFIG_USB_XHCI_HCD=m
176CONFIG_USB_XHCI_HCD=y 241CONFIG_USB_EHCI_HCD=m
177CONFIG_USB_EHCI_HCD=y
178# CONFIG_USB_EHCI_HCD_PPC_OF is not set 242# CONFIG_USB_EHCI_HCD_PPC_OF is not set
179CONFIG_USB_OHCI_HCD=y 243CONFIG_USB_OHCI_HCD=m
180CONFIG_USB_STORAGE=y 244CONFIG_USB_STORAGE=m
181CONFIG_RTC_CLASS=y 245CONFIG_RTC_CLASS=y
246CONFIG_RTC_DRV_OPAL=m
182CONFIG_RTC_DRV_GENERIC=m 247CONFIG_RTC_DRV_GENERIC=m
183CONFIG_VIRT_DRIVERS=y 248CONFIG_VIRT_DRIVERS=y
184CONFIG_VIRTIO_PCI=y 249CONFIG_VIRTIO_PCI=m
185# CONFIG_IOMMU_SUPPORT is not set 250# CONFIG_IOMMU_SUPPORT is not set
186CONFIG_EXT4_FS=m 251CONFIG_EXT4_FS=m
187CONFIG_EXT4_FS_POSIX_ACL=y 252CONFIG_EXT4_FS_POSIX_ACL=y
@@ -195,10 +260,9 @@ CONFIG_UDF_FS=m
195CONFIG_MSDOS_FS=m 260CONFIG_MSDOS_FS=m
196CONFIG_VFAT_FS=m 261CONFIG_VFAT_FS=m
197CONFIG_PROC_KCORE=y 262CONFIG_PROC_KCORE=y
198CONFIG_TMPFS=y
199CONFIG_TMPFS_POSIX_ACL=y
200# CONFIG_MISC_FILESYSTEMS is not set 263# CONFIG_MISC_FILESYSTEMS is not set
201# CONFIG_NETWORK_FILESYSTEMS is not set 264# CONFIG_NETWORK_FILESYSTEMS is not set
265CONFIG_NLS=y
202CONFIG_NLS_DEFAULT="utf8" 266CONFIG_NLS_DEFAULT="utf8"
203CONFIG_NLS_CODEPAGE_437=y 267CONFIG_NLS_CODEPAGE_437=y
204CONFIG_NLS_ASCII=y 268CONFIG_NLS_ASCII=y
@@ -207,26 +271,24 @@ CONFIG_NLS_UTF8=y
207CONFIG_CRC16=y 271CONFIG_CRC16=y
208CONFIG_CRC_ITU_T=y 272CONFIG_CRC_ITU_T=y
209CONFIG_LIBCRC32C=y 273CONFIG_LIBCRC32C=y
274# CONFIG_XZ_DEC_X86 is not set
275# CONFIG_XZ_DEC_IA64 is not set
276# CONFIG_XZ_DEC_ARM is not set
277# CONFIG_XZ_DEC_ARMTHUMB is not set
278# CONFIG_XZ_DEC_SPARC is not set
210CONFIG_PRINTK_TIME=y 279CONFIG_PRINTK_TIME=y
211CONFIG_MAGIC_SYSRQ=y 280CONFIG_MAGIC_SYSRQ=y
212CONFIG_DEBUG_KERNEL=y
213CONFIG_DEBUG_STACKOVERFLOW=y 281CONFIG_DEBUG_STACKOVERFLOW=y
214CONFIG_SOFTLOCKUP_DETECTOR=y 282CONFIG_SOFTLOCKUP_DETECTOR=y
283CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
215CONFIG_HARDLOCKUP_DETECTOR=y 284CONFIG_HARDLOCKUP_DETECTOR=y
216CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y 285CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
217CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
218CONFIG_WQ_WATCHDOG=y 286CONFIG_WQ_WATCHDOG=y
219CONFIG_SCHEDSTATS=y 287# CONFIG_SCHED_DEBUG is not set
220# CONFIG_FTRACE is not set 288# CONFIG_FTRACE is not set
289# CONFIG_RUNTIME_TESTING_MENU is not set
221CONFIG_XMON=y 290CONFIG_XMON=y
222CONFIG_XMON_DEFAULT=y 291CONFIG_XMON_DEFAULT=y
223CONFIG_SECURITY=y 292CONFIG_ENCRYPTED_KEYS=y
224CONFIG_IMA=y
225CONFIG_EVM=y
226# CONFIG_CRYPTO_ECHAINIV is not set 293# CONFIG_CRYPTO_ECHAINIV is not set
227CONFIG_CRYPTO_ECB=y
228CONFIG_CRYPTO_CMAC=y
229CONFIG_CRYPTO_MD4=y
230CONFIG_CRYPTO_ARC4=y
231CONFIG_CRYPTO_DES=y
232# CONFIG_CRYPTO_HW is not set 294# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h
index 3abcf98ed2e0..c607c5d835cc 100644
--- a/arch/powerpc/include/asm/accounting.h
+++ b/arch/powerpc/include/asm/accounting.h
@@ -15,8 +15,10 @@ struct cpu_accounting_data {
15 /* Accumulated cputime values to flush on ticks*/ 15 /* Accumulated cputime values to flush on ticks*/
16 unsigned long utime; 16 unsigned long utime;
17 unsigned long stime; 17 unsigned long stime;
18#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
18 unsigned long utime_scaled; 19 unsigned long utime_scaled;
19 unsigned long stime_scaled; 20 unsigned long stime_scaled;
21#endif
20 unsigned long gtime; 22 unsigned long gtime;
21 unsigned long hardirq_time; 23 unsigned long hardirq_time;
22 unsigned long softirq_time; 24 unsigned long softirq_time;
@@ -25,8 +27,10 @@ struct cpu_accounting_data {
25 /* Internal counters */ 27 /* Internal counters */
26 unsigned long starttime; /* TB value snapshot */ 28 unsigned long starttime; /* TB value snapshot */
27 unsigned long starttime_user; /* TB value on exit to usermode */ 29 unsigned long starttime_user; /* TB value on exit to usermode */
30#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
28 unsigned long startspurr; /* SPURR value snapshot */ 31 unsigned long startspurr; /* SPURR value snapshot */
29 unsigned long utime_sspurr; /* ->user_time when ->startspurr set */ 32 unsigned long utime_sspurr; /* ->user_time when ->startspurr set */
33#endif
30}; 34};
31 35
32#endif 36#endif
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1f4691ce4126..ec691d489656 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -63,7 +63,6 @@ void program_check_exception(struct pt_regs *regs);
63void alignment_exception(struct pt_regs *regs); 63void alignment_exception(struct pt_regs *regs);
64void slb_miss_bad_addr(struct pt_regs *regs); 64void slb_miss_bad_addr(struct pt_regs *regs);
65void StackOverflow(struct pt_regs *regs); 65void StackOverflow(struct pt_regs *regs);
66void nonrecoverable_exception(struct pt_regs *regs);
67void kernel_fp_unavailable_exception(struct pt_regs *regs); 66void kernel_fp_unavailable_exception(struct pt_regs *regs);
68void altivec_unavailable_exception(struct pt_regs *regs); 67void altivec_unavailable_exception(struct pt_regs *regs);
69void vsx_unavailable_exception(struct pt_regs *regs); 68void vsx_unavailable_exception(struct pt_regs *regs);
@@ -78,6 +77,8 @@ void kernel_bad_stack(struct pt_regs *regs);
78void system_reset_exception(struct pt_regs *regs); 77void system_reset_exception(struct pt_regs *regs);
79void machine_check_exception(struct pt_regs *regs); 78void machine_check_exception(struct pt_regs *regs);
80void emulation_assist_interrupt(struct pt_regs *regs); 79void emulation_assist_interrupt(struct pt_regs *regs);
80long do_slb_fault(struct pt_regs *regs, unsigned long ea);
81void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err);
81 82
82/* signals, syscalls and interrupts */ 83/* signals, syscalls and interrupts */
83long sys_swapcontext(struct ucontext __user *old_ctx, 84long sys_swapcontext(struct ucontext __user *old_ctx,
@@ -150,4 +151,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
150 151
151extern long flush_count_cache; 152extern long flush_count_cache;
152 153
154#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
155void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
156void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
157#else
158static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
159 bool preserve_nv) { }
160static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
161 bool preserve_nv) { }
162#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
163
164void kvmhv_save_host_pmu(void);
165void kvmhv_load_host_pmu(void);
166void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
167void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
168
169int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
170
171long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
172long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
173 unsigned long dabrx);
174
153#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ 175#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 751cf931bb3f..c21d33704633 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -8,7 +8,97 @@
8#include <asm/book3s/32/hash.h> 8#include <asm/book3s/32/hash.h>
9 9
10/* And here we include common definitions */ 10/* And here we include common definitions */
11#include <asm/pte-common.h> 11
12#define _PAGE_KERNEL_RO 0
13#define _PAGE_KERNEL_ROX 0
14#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW)
15#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW)
16
17#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
18
19#ifndef __ASSEMBLY__
20
21static inline bool pte_user(pte_t pte)
22{
23 return pte_val(pte) & _PAGE_USER;
24}
25#endif /* __ASSEMBLY__ */
26
27/*
28 * Location of the PFN in the PTE. Most 32-bit platforms use the same
29 * as _PAGE_SHIFT here (ie, naturally aligned).
30 * Platform who don't just pre-define the value so we don't override it here.
31 */
32#define PTE_RPN_SHIFT (PAGE_SHIFT)
33
34/*
35 * The mask covered by the RPN must be a ULL on 32-bit platforms with
36 * 64-bit PTEs.
37 */
38#ifdef CONFIG_PTE_64BIT
39#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
40#else
41#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
42#endif
43
44/*
45 * _PAGE_CHG_MASK masks of bits that are to be preserved across
46 * pgprot changes.
47 */
48#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HASHPTE | _PAGE_DIRTY | \
49 _PAGE_ACCESSED | _PAGE_SPECIAL)
50
51/*
52 * We define 2 sets of base prot bits, one for basic pages (ie,
53 * cacheable kernel and user pages) and one for non cacheable
54 * pages. We always set _PAGE_COHERENT when SMP is enabled or
55 * the processor might need it for DMA coherency.
56 */
57#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
58#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
59
60/*
61 * Permission masks used to generate the __P and __S table.
62 *
63 * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
64 *
65 * Write permissions imply read permissions for now.
66 */
67#define PAGE_NONE __pgprot(_PAGE_BASE)
68#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
69#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
70#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
71#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER)
72#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
73#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER)
74
75/* Permission masks used for kernel mappings */
76#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
77#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
78#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
79 _PAGE_NO_CACHE | _PAGE_GUARDED)
80#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
81#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
82#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
83
84/*
85 * Protection used for kernel text. We want the debuggers to be able to
86 * set breakpoints anywhere, so don't write protect the kernel text
87 * on platforms where such control is possible.
88 */
89#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
90 defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
91#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
92#else
93#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
94#endif
95
96/* Make modules code happy. We don't set RO yet */
97#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
98
99/* Advertise special mapping type for AGP */
100#define PAGE_AGP (PAGE_KERNEL_NC)
101#define HAVE_PAGE_AGP
12 102
13#define PTE_INDEX_SIZE PTE_SHIFT 103#define PTE_INDEX_SIZE PTE_SHIFT
14#define PMD_INDEX_SIZE 0 104#define PMD_INDEX_SIZE 0
@@ -219,14 +309,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
219static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, 309static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
220 pte_t *ptep) 310 pte_t *ptep)
221{ 311{
222 pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO); 312 pte_update(ptep, _PAGE_RW, 0);
223} 313}
224static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
225 unsigned long addr, pte_t *ptep)
226{
227 ptep_set_wrprotect(mm, addr, ptep);
228}
229
230 314
231static inline void __ptep_set_access_flags(struct vm_area_struct *vma, 315static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
232 pte_t *ptep, pte_t entry, 316 pte_t *ptep, pte_t entry,
@@ -234,10 +318,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
234 int psize) 318 int psize)
235{ 319{
236 unsigned long set = pte_val(entry) & 320 unsigned long set = pte_val(entry) &
237 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); 321 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
238 unsigned long clr = ~pte_val(entry) & _PAGE_RO;
239 322
240 pte_update(ptep, clr, set); 323 pte_update(ptep, 0, set);
241 324
242 flush_tlb_page(vma, address); 325 flush_tlb_page(vma, address);
243} 326}
@@ -292,7 +375,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
292#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) 375#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
293#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) 376#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
294 377
295int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); 378int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
296 379
297/* Generic accessors to PTE bits */ 380/* Generic accessors to PTE bits */
298static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} 381static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
@@ -301,13 +384,28 @@ static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY);
301static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } 384static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); }
302static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } 385static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); }
303static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } 386static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
304static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } 387static inline bool pte_exec(pte_t pte) { return true; }
305 388
306static inline int pte_present(pte_t pte) 389static inline int pte_present(pte_t pte)
307{ 390{
308 return pte_val(pte) & _PAGE_PRESENT; 391 return pte_val(pte) & _PAGE_PRESENT;
309} 392}
310 393
394static inline bool pte_hw_valid(pte_t pte)
395{
396 return pte_val(pte) & _PAGE_PRESENT;
397}
398
399static inline bool pte_hashpte(pte_t pte)
400{
401 return !!(pte_val(pte) & _PAGE_HASHPTE);
402}
403
404static inline bool pte_ci(pte_t pte)
405{
406 return !!(pte_val(pte) & _PAGE_NO_CACHE);
407}
408
311/* 409/*
312 * We only find page table entry in the last level 410 * We only find page table entry in the last level
313 * Hence no need for other accessors 411 * Hence no need for other accessors
@@ -315,17 +413,14 @@ static inline int pte_present(pte_t pte)
315#define pte_access_permitted pte_access_permitted 413#define pte_access_permitted pte_access_permitted
316static inline bool pte_access_permitted(pte_t pte, bool write) 414static inline bool pte_access_permitted(pte_t pte, bool write)
317{ 415{
318 unsigned long pteval = pte_val(pte);
319 /* 416 /*
320 * A read-only access is controlled by _PAGE_USER bit. 417 * A read-only access is controlled by _PAGE_USER bit.
321 * We have _PAGE_READ set for WRITE and EXECUTE 418 * We have _PAGE_READ set for WRITE and EXECUTE
322 */ 419 */
323 unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER; 420 if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
324 421 return false;
325 if (write)
326 need_pte_bits |= _PAGE_WRITE;
327 422
328 if ((pteval & need_pte_bits) != need_pte_bits) 423 if (write && !pte_write(pte))
329 return false; 424 return false;
330 425
331 return true; 426 return true;
@@ -354,6 +449,11 @@ static inline pte_t pte_wrprotect(pte_t pte)
354 return __pte(pte_val(pte) & ~_PAGE_RW); 449 return __pte(pte_val(pte) & ~_PAGE_RW);
355} 450}
356 451
452static inline pte_t pte_exprotect(pte_t pte)
453{
454 return pte;
455}
456
357static inline pte_t pte_mkclean(pte_t pte) 457static inline pte_t pte_mkclean(pte_t pte)
358{ 458{
359 return __pte(pte_val(pte) & ~_PAGE_DIRTY); 459 return __pte(pte_val(pte) & ~_PAGE_DIRTY);
@@ -364,6 +464,16 @@ static inline pte_t pte_mkold(pte_t pte)
364 return __pte(pte_val(pte) & ~_PAGE_ACCESSED); 464 return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
365} 465}
366 466
467static inline pte_t pte_mkexec(pte_t pte)
468{
469 return pte;
470}
471
472static inline pte_t pte_mkpte(pte_t pte)
473{
474 return pte;
475}
476
367static inline pte_t pte_mkwrite(pte_t pte) 477static inline pte_t pte_mkwrite(pte_t pte)
368{ 478{
369 return __pte(pte_val(pte) | _PAGE_RW); 479 return __pte(pte_val(pte) | _PAGE_RW);
@@ -389,6 +499,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
389 return pte; 499 return pte;
390} 500}
391 501
502static inline pte_t pte_mkprivileged(pte_t pte)
503{
504 return __pte(pte_val(pte) & ~_PAGE_USER);
505}
506
507static inline pte_t pte_mkuser(pte_t pte)
508{
509 return __pte(pte_val(pte) | _PAGE_USER);
510}
511
392static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 512static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
393{ 513{
394 return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); 514 return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 9a3798660cef..15bc16b1dc9c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -66,7 +66,7 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
66 * if it is not a pte and have hugepd shift mask 66 * if it is not a pte and have hugepd shift mask
67 * set, then it is a hugepd directory pointer 67 * set, then it is a hugepd directory pointer
68 */ 68 */
69 if (!(hpdval & _PAGE_PTE) && 69 if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) &&
70 ((hpdval & HUGEPD_SHIFT_MASK) != 0)) 70 ((hpdval & HUGEPD_SHIFT_MASK) != 0))
71 return true; 71 return true;
72 return false; 72 return false;
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index d52a51b2ce7b..247aff9cc6ba 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -18,6 +18,11 @@
18#include <asm/book3s/64/hash-4k.h> 18#include <asm/book3s/64/hash-4k.h>
19#endif 19#endif
20 20
21/* Bits to set in a PMD/PUD/PGD entry valid bit*/
22#define HASH_PMD_VAL_BITS (0x8000000000000000UL)
23#define HASH_PUD_VAL_BITS (0x8000000000000000UL)
24#define HASH_PGD_VAL_BITS (0x8000000000000000UL)
25
21/* 26/*
22 * Size of EA range mapped by our pagetables. 27 * Size of EA range mapped by our pagetables.
23 */ 28 */
@@ -196,8 +201,7 @@ static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
196#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 201#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
197 202
198 203
199extern int hash__map_kernel_page(unsigned long ea, unsigned long pa, 204int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
200 unsigned long flags);
201extern int __meminit hash__vmemmap_create_mapping(unsigned long start, 205extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
202 unsigned long page_size, 206 unsigned long page_size,
203 unsigned long phys); 207 unsigned long phys);
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 50888388a359..5b0177733994 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -39,4 +39,7 @@ static inline bool gigantic_page_supported(void)
39} 39}
40#endif 40#endif
41 41
42/* hugepd entry valid bit */
43#define HUGEPD_VAL_BITS (0x8000000000000000UL)
44
42#endif 45#endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index b3520b549cba..12e522807f9f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -30,7 +30,7 @@
30 * SLB 30 * SLB
31 */ 31 */
32 32
33#define SLB_NUM_BOLTED 3 33#define SLB_NUM_BOLTED 2
34#define SLB_CACHE_ENTRIES 8 34#define SLB_CACHE_ENTRIES 8
35#define SLB_MIN_SIZE 32 35#define SLB_MIN_SIZE 32
36 36
@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
203 BUG(); 203 BUG();
204} 204}
205 205
206static inline unsigned int ap_to_shift(unsigned long ap)
207{
208 int psize;
209
210 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
211 if (mmu_psize_defs[psize].ap == ap)
212 return mmu_psize_defs[psize].shift;
213 }
214
215 return -1;
216}
217
206static inline unsigned long get_sllp_encoding(int psize) 218static inline unsigned long get_sllp_encoding(int psize)
207{ 219{
208 unsigned long sllp; 220 unsigned long sllp;
@@ -487,6 +499,8 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
487extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); 499extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
488extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); 500extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
489 501
502extern void hash__setup_new_exec(void);
503
490#ifdef CONFIG_PPC_PSERIES 504#ifdef CONFIG_PPC_PSERIES
491void hpte_init_pseries(void); 505void hpte_init_pseries(void);
492#else 506#else
@@ -495,11 +509,18 @@ static inline void hpte_init_pseries(void) { }
495 509
496extern void hpte_init_native(void); 510extern void hpte_init_native(void);
497 511
512struct slb_entry {
513 u64 esid;
514 u64 vsid;
515};
516
498extern void slb_initialize(void); 517extern void slb_initialize(void);
499extern void slb_flush_and_rebolt(void); 518void slb_flush_and_restore_bolted(void);
500void slb_flush_all_realmode(void); 519void slb_flush_all_realmode(void);
501void __slb_restore_bolted_realmode(void); 520void __slb_restore_bolted_realmode(void);
502void slb_restore_bolted_realmode(void); 521void slb_restore_bolted_realmode(void);
522void slb_save_contents(struct slb_entry *slb_ptr);
523void slb_dump_contents(struct slb_entry *slb_ptr);
503 524
504extern void slb_vmalloc_update(void); 525extern void slb_vmalloc_update(void);
505extern void slb_set_size(u16 size); 526extern void slb_set_size(u16 size);
@@ -512,13 +533,9 @@ extern void slb_set_size(u16 size);
512 * from mmu context id and effective segment id of the address. 533 * from mmu context id and effective segment id of the address.
513 * 534 *
514 * For user processes max context id is limited to MAX_USER_CONTEXT. 535 * For user processes max context id is limited to MAX_USER_CONTEXT.
515 536 * more details in get_user_context
516 * For kernel space, we use context ids 1-4 to map addresses as below: 537 *
517 * NOTE: each context only support 64TB now. 538 * For kernel space get_kernel_context
518 * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ]
519 * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ]
520 * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ]
521 * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ]
522 * 539 *
523 * The proto-VSIDs are then scrambled into real VSIDs with the 540 * The proto-VSIDs are then scrambled into real VSIDs with the
524 * multiplicative hash: 541 * multiplicative hash:
@@ -559,6 +576,21 @@ extern void slb_set_size(u16 size);
559#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1) 576#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1)
560 577
561/* 578/*
579 * Now certain config support MAX_PHYSMEM more than 512TB. Hence we will need
580 * to use more than one context for linear mapping the kernel.
581 * For vmalloc and memmap, we use just one context with 512TB. With 64 byte
582 * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
583 */
584#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
585#define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
586#else
587#define MAX_KERNEL_CTX_CNT 1
588#endif
589
590#define MAX_VMALLOC_CTX_CNT 1
591#define MAX_MEMMAP_CTX_CNT 1
592
593/*
562 * 256MB segment 594 * 256MB segment
563 * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments 595 * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
564 * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts 596 * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
@@ -568,12 +600,13 @@ extern void slb_set_size(u16 size);
568 * We also need to avoid the last segment of the last context, because that 600 * We also need to avoid the last segment of the last context, because that
569 * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 601 * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
570 * because of the modulo operation in vsid scramble. 602 * because of the modulo operation in vsid scramble.
603 *
604 * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
605 * context easily. The +1 is to map the unused 0xe region mapping.
571 */ 606 */
572#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) 607#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
573#define MIN_USER_CONTEXT (5) 608#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
574 609 MAX_MEMMAP_CTX_CNT + 2)
575/* Would be nice to use KERNEL_REGION_ID here */
576#define KERNEL_REGION_CONTEXT_OFFSET (0xc - 1)
577 610
578/* 611/*
579 * For platforms that support on 65bit VA we limit the context bits 612 * For platforms that support on 65bit VA we limit the context bits
@@ -734,6 +767,39 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
734} 767}
735 768
736/* 769/*
770 * For kernel space, we use context ids as below
771 * below. Range is 512TB per context.
772 *
773 * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff]
774 * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
775 * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
776 * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
777
778 * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
779 * 0x00006 - Not used - Can map 0xe000000000000000 range.
780 * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
781 *
782 * So we can compute the context from the region (top nibble) by
783 * subtracting 11, or 0xc - 1.
784 */
785static inline unsigned long get_kernel_context(unsigned long ea)
786{
787 unsigned long region_id = REGION_ID(ea);
788 unsigned long ctx;
789 /*
790 * For linear mapping we do support multiple context
791 */
792 if (region_id == KERNEL_REGION_ID) {
793 /*
794 * We already verified ea to be not beyond the addr limit.
795 */
796 ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
797 } else
798 ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
799 return ctx;
800}
801
802/*
737 * This is only valid for addresses >= PAGE_OFFSET 803 * This is only valid for addresses >= PAGE_OFFSET
738 */ 804 */
739static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) 805static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
@@ -743,20 +809,7 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
743 if (!is_kernel_addr(ea)) 809 if (!is_kernel_addr(ea))
744 return 0; 810 return 0;
745 811
746 /* 812 context = get_kernel_context(ea);
747 * For kernel space, we use context ids 1-4 to map the address space as
748 * below:
749 *
750 * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ]
751 * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ]
752 * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ]
753 * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ]
754 *
755 * So we can compute the context from the region (top nibble) by
756 * subtracting 11, or 0xc - 1.
757 */
758 context = (ea >> 60) - KERNEL_REGION_CONTEXT_OFFSET;
759
760 return get_vsid(context, ea, ssize); 813 return get_vsid(context, ea, ssize);
761} 814}
762 815
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 9c8c669a6b6a..6328857f259f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -208,7 +208,7 @@ extern void radix_init_pseries(void);
208static inline void radix_init_pseries(void) { }; 208static inline void radix_init_pseries(void) { };
209#endif 209#endif
210 210
211static inline int get_ea_context(mm_context_t *ctx, unsigned long ea) 211static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
212{ 212{
213 int index = ea >> MAX_EA_BITS_PER_CONTEXT; 213 int index = ea >> MAX_EA_BITS_PER_CONTEXT;
214 214
@@ -223,7 +223,7 @@ static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
223static inline unsigned long get_user_vsid(mm_context_t *ctx, 223static inline unsigned long get_user_vsid(mm_context_t *ctx,
224 unsigned long ea, int ssize) 224 unsigned long ea, int ssize)
225{ 225{
226 unsigned long context = get_ea_context(ctx, ea); 226 unsigned long context = get_user_context(ctx, ea);
227 227
228 return get_vsid(context, ea, ssize); 228 return get_vsid(context, ea, ssize);
229} 229}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index d7ee249d6890..e3d4dd4ae2fa 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -10,6 +10,9 @@
10 * 10 *
11 * Defined in such a way that we can optimize away code block at build time 11 * Defined in such a way that we can optimize away code block at build time
12 * if CONFIG_HUGETLB_PAGE=n. 12 * if CONFIG_HUGETLB_PAGE=n.
13 *
14 * returns true for pmd migration entries, THP, devmap, hugetlb
15 * But compile time dependent on CONFIG_HUGETLB_PAGE
13 */ 16 */
14static inline int pmd_huge(pmd_t pmd) 17static inline int pmd_huge(pmd_t pmd)
15{ 18{
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2a2486526d1f..6c99e846a8c9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -14,10 +14,6 @@
14 */ 14 */
15#define _PAGE_BIT_SWAP_TYPE 0 15#define _PAGE_BIT_SWAP_TYPE 0
16 16
17#define _PAGE_NA 0
18#define _PAGE_RO 0
19#define _PAGE_USER 0
20
21#define _PAGE_EXEC 0x00001 /* execute permission */ 17#define _PAGE_EXEC 0x00001 /* execute permission */
22#define _PAGE_WRITE 0x00002 /* write access allowed */ 18#define _PAGE_WRITE 0x00002 /* write access allowed */
23#define _PAGE_READ 0x00004 /* read access allowed */ 19#define _PAGE_READ 0x00004 /* read access allowed */
@@ -123,10 +119,6 @@
123#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ 119#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \
124 _PAGE_RW | _PAGE_EXEC) 120 _PAGE_RW | _PAGE_EXEC)
125/* 121/*
126 * No page size encoding in the linux PTE
127 */
128#define _PAGE_PSIZE 0
129/*
130 * _PAGE_CHG_MASK masks of bits that are to be preserved across 122 * _PAGE_CHG_MASK masks of bits that are to be preserved across
131 * pgprot changes 123 * pgprot changes
132 */ 124 */
@@ -137,19 +129,12 @@
137#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ 129#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
138 H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) 130 H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
139/* 131/*
140 * Mask of bits returned by pte_pgprot()
141 */
142#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
143 H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
144 _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
145 _PAGE_SOFT_DIRTY | H_PTE_PKEY)
146/*
147 * We define 2 sets of base prot bits, one for basic pages (ie, 132 * We define 2 sets of base prot bits, one for basic pages (ie,
148 * cacheable kernel and user pages) and one for non cacheable 133 * cacheable kernel and user pages) and one for non cacheable
149 * pages. We always set _PAGE_COHERENT when SMP is enabled or 134 * pages. We always set _PAGE_COHERENT when SMP is enabled or
150 * the processor might need it for DMA coherency. 135 * the processor might need it for DMA coherency.
151 */ 136 */
152#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) 137#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
153#define _PAGE_BASE (_PAGE_BASE_NC) 138#define _PAGE_BASE (_PAGE_BASE_NC)
154 139
155/* Permission masks used to generate the __P and __S table, 140/* Permission masks used to generate the __P and __S table,
@@ -159,8 +144,6 @@
159 * Write permissions imply read permissions for now (we could make write-only 144 * Write permissions imply read permissions for now (we could make write-only
160 * pages on BookE but we don't bother for now). Execute permission control is 145 * pages on BookE but we don't bother for now). Execute permission control is
161 * possible on platforms that define _PAGE_EXEC 146 * possible on platforms that define _PAGE_EXEC
162 *
163 * Note due to the way vm flags are laid out, the bits are XWR
164 */ 147 */
165#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED) 148#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
166#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) 149#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW)
@@ -170,24 +153,6 @@
170#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) 153#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ)
171#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) 154#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
172 155
173#define __P000 PAGE_NONE
174#define __P001 PAGE_READONLY
175#define __P010 PAGE_COPY
176#define __P011 PAGE_COPY
177#define __P100 PAGE_READONLY_X
178#define __P101 PAGE_READONLY_X
179#define __P110 PAGE_COPY_X
180#define __P111 PAGE_COPY_X
181
182#define __S000 PAGE_NONE
183#define __S001 PAGE_READONLY
184#define __S010 PAGE_SHARED
185#define __S011 PAGE_SHARED
186#define __S100 PAGE_READONLY_X
187#define __S101 PAGE_READONLY_X
188#define __S110 PAGE_SHARED_X
189#define __S111 PAGE_SHARED_X
190
191/* Permission masks used for kernel mappings */ 156/* Permission masks used for kernel mappings */
192#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) 157#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
193#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ 158#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
@@ -461,6 +426,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
461 pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); 426 pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
462} 427}
463 428
429#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
464static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, 430static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
465 unsigned long addr, pte_t *ptep) 431 unsigned long addr, pte_t *ptep)
466{ 432{
@@ -519,7 +485,11 @@ static inline int pte_special(pte_t pte)
519 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL)); 485 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
520} 486}
521 487
522static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } 488static inline bool pte_exec(pte_t pte)
489{
490 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
491}
492
523 493
524#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 494#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
525static inline bool pte_soft_dirty(pte_t pte) 495static inline bool pte_soft_dirty(pte_t pte)
@@ -529,12 +499,12 @@ static inline bool pte_soft_dirty(pte_t pte)
529 499
530static inline pte_t pte_mksoft_dirty(pte_t pte) 500static inline pte_t pte_mksoft_dirty(pte_t pte)
531{ 501{
532 return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); 502 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
533} 503}
534 504
535static inline pte_t pte_clear_soft_dirty(pte_t pte) 505static inline pte_t pte_clear_soft_dirty(pte_t pte)
536{ 506{
537 return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY); 507 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
538} 508}
539#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ 509#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
540 510
@@ -555,7 +525,7 @@ static inline pte_t pte_mk_savedwrite(pte_t pte)
555 */ 525 */
556 VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) != 526 VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
557 cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED)); 527 cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
558 return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED); 528 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
559} 529}
560 530
561#define pte_clear_savedwrite pte_clear_savedwrite 531#define pte_clear_savedwrite pte_clear_savedwrite
@@ -565,14 +535,14 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
565 * Used by KSM subsystem to make a protnone pte readonly. 535 * Used by KSM subsystem to make a protnone pte readonly.
566 */ 536 */
567 VM_BUG_ON(!pte_protnone(pte)); 537 VM_BUG_ON(!pte_protnone(pte));
568 return __pte(pte_val(pte) | _PAGE_PRIVILEGED); 538 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
569} 539}
570#else 540#else
571#define pte_clear_savedwrite pte_clear_savedwrite 541#define pte_clear_savedwrite pte_clear_savedwrite
572static inline pte_t pte_clear_savedwrite(pte_t pte) 542static inline pte_t pte_clear_savedwrite(pte_t pte)
573{ 543{
574 VM_WARN_ON(1); 544 VM_WARN_ON(1);
575 return __pte(pte_val(pte) & ~_PAGE_WRITE); 545 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
576} 546}
577#endif /* CONFIG_NUMA_BALANCING */ 547#endif /* CONFIG_NUMA_BALANCING */
578 548
@@ -587,6 +557,11 @@ static inline int pte_present(pte_t pte)
587 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); 557 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
588} 558}
589 559
560static inline bool pte_hw_valid(pte_t pte)
561{
562 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
563}
564
590#ifdef CONFIG_PPC_MEM_KEYS 565#ifdef CONFIG_PPC_MEM_KEYS
591extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); 566extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
592#else 567#else
@@ -596,25 +571,22 @@ static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
596} 571}
597#endif /* CONFIG_PPC_MEM_KEYS */ 572#endif /* CONFIG_PPC_MEM_KEYS */
598 573
574static inline bool pte_user(pte_t pte)
575{
576 return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
577}
578
599#define pte_access_permitted pte_access_permitted 579#define pte_access_permitted pte_access_permitted
600static inline bool pte_access_permitted(pte_t pte, bool write) 580static inline bool pte_access_permitted(pte_t pte, bool write)
601{ 581{
602 unsigned long pteval = pte_val(pte);
603 /* Also check for pte_user */
604 unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
605 /* 582 /*
606 * _PAGE_READ is needed for any access and will be 583 * _PAGE_READ is needed for any access and will be
607 * cleared for PROT_NONE 584 * cleared for PROT_NONE
608 */ 585 */
609 unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ; 586 if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
610
611 if (write)
612 need_pte_bits |= _PAGE_WRITE;
613
614 if ((pteval & need_pte_bits) != need_pte_bits)
615 return false; 587 return false;
616 588
617 if ((pteval & clear_pte_bits) == clear_pte_bits) 589 if (write && !pte_write(pte))
618 return false; 590 return false;
619 591
620 return arch_pte_access_permitted(pte_val(pte), write, 0); 592 return arch_pte_access_permitted(pte_val(pte), write, 0);
@@ -643,17 +615,32 @@ static inline pte_t pte_wrprotect(pte_t pte)
643{ 615{
644 if (unlikely(pte_savedwrite(pte))) 616 if (unlikely(pte_savedwrite(pte)))
645 return pte_clear_savedwrite(pte); 617 return pte_clear_savedwrite(pte);
646 return __pte(pte_val(pte) & ~_PAGE_WRITE); 618 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
619}
620
621static inline pte_t pte_exprotect(pte_t pte)
622{
623 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
647} 624}
648 625
649static inline pte_t pte_mkclean(pte_t pte) 626static inline pte_t pte_mkclean(pte_t pte)
650{ 627{
651 return __pte(pte_val(pte) & ~_PAGE_DIRTY); 628 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
652} 629}
653 630
654static inline pte_t pte_mkold(pte_t pte) 631static inline pte_t pte_mkold(pte_t pte)
655{ 632{
656 return __pte(pte_val(pte) & ~_PAGE_ACCESSED); 633 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
634}
635
636static inline pte_t pte_mkexec(pte_t pte)
637{
638 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
639}
640
641static inline pte_t pte_mkpte(pte_t pte)
642{
643 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
657} 644}
658 645
659static inline pte_t pte_mkwrite(pte_t pte) 646static inline pte_t pte_mkwrite(pte_t pte)
@@ -661,22 +648,22 @@ static inline pte_t pte_mkwrite(pte_t pte)
661 /* 648 /*
662 * write implies read, hence set both 649 * write implies read, hence set both
663 */ 650 */
664 return __pte(pte_val(pte) | _PAGE_RW); 651 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
665} 652}
666 653
667static inline pte_t pte_mkdirty(pte_t pte) 654static inline pte_t pte_mkdirty(pte_t pte)
668{ 655{
669 return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); 656 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
670} 657}
671 658
672static inline pte_t pte_mkyoung(pte_t pte) 659static inline pte_t pte_mkyoung(pte_t pte)
673{ 660{
674 return __pte(pte_val(pte) | _PAGE_ACCESSED); 661 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
675} 662}
676 663
677static inline pte_t pte_mkspecial(pte_t pte) 664static inline pte_t pte_mkspecial(pte_t pte)
678{ 665{
679 return __pte(pte_val(pte) | _PAGE_SPECIAL); 666 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
680} 667}
681 668
682static inline pte_t pte_mkhuge(pte_t pte) 669static inline pte_t pte_mkhuge(pte_t pte)
@@ -686,7 +673,17 @@ static inline pte_t pte_mkhuge(pte_t pte)
686 673
687static inline pte_t pte_mkdevmap(pte_t pte) 674static inline pte_t pte_mkdevmap(pte_t pte)
688{ 675{
689 return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP); 676 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
677}
678
679static inline pte_t pte_mkprivileged(pte_t pte)
680{
681 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
682}
683
684static inline pte_t pte_mkuser(pte_t pte)
685{
686 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
690} 687}
691 688
692/* 689/*
@@ -705,12 +702,8 @@ static inline int pte_devmap(pte_t pte)
705static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 702static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
706{ 703{
707 /* FIXME!! check whether this need to be a conditional */ 704 /* FIXME!! check whether this need to be a conditional */
708 return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); 705 return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
709} 706 cpu_to_be64(pgprot_val(newprot)));
710
711static inline bool pte_user(pte_t pte)
712{
713 return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
714} 707}
715 708
716/* Encode and de-code a swap entry */ 709/* Encode and de-code a swap entry */
@@ -723,9 +716,7 @@ static inline bool pte_user(pte_t pte)
723 BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ 716 BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
724 BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \ 717 BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
725 } while (0) 718 } while (0)
726/* 719
727 * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
728 */
729#define SWP_TYPE_BITS 5 720#define SWP_TYPE_BITS 5
730#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ 721#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
731 & ((1UL << SWP_TYPE_BITS) - 1)) 722 & ((1UL << SWP_TYPE_BITS) - 1))
@@ -741,6 +732,8 @@ static inline bool pte_user(pte_t pte)
741 */ 732 */
742#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE }) 733#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
743#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE) 734#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
735#define __pmd_to_swp_entry(pmd) (__pte_to_swp_entry(pmd_pte(pmd)))
736#define __swp_entry_to_pmd(x) (pte_pmd(__swp_entry_to_pte(x)))
744 737
745#ifdef CONFIG_MEM_SOFT_DIRTY 738#ifdef CONFIG_MEM_SOFT_DIRTY
746#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE)) 739#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
@@ -751,7 +744,7 @@ static inline bool pte_user(pte_t pte)
751#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 744#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
752static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 745static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
753{ 746{
754 return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); 747 return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
755} 748}
756 749
757static inline bool pte_swp_soft_dirty(pte_t pte) 750static inline bool pte_swp_soft_dirty(pte_t pte)
@@ -761,7 +754,7 @@ static inline bool pte_swp_soft_dirty(pte_t pte)
761 754
762static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 755static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
763{ 756{
764 return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY); 757 return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
765} 758}
766#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ 759#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
767 760
@@ -850,10 +843,10 @@ static inline pgprot_t pgprot_writecombine(pgprot_t prot)
850 */ 843 */
851static inline bool pte_ci(pte_t pte) 844static inline bool pte_ci(pte_t pte)
852{ 845{
853 unsigned long pte_v = pte_val(pte); 846 __be64 pte_v = pte_raw(pte);
854 847
855 if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) || 848 if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
856 ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)) 849 ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
857 return true; 850 return true;
858 return false; 851 return false;
859} 852}
@@ -875,8 +868,16 @@ static inline int pmd_none(pmd_t pmd)
875 868
876static inline int pmd_present(pmd_t pmd) 869static inline int pmd_present(pmd_t pmd)
877{ 870{
871 /*
872 * A pmd is considerent present if _PAGE_PRESENT is set.
873 * We also need to consider the pmd present which is marked
874 * invalid during a split. Hence we look for _PAGE_INVALID
875 * if we find _PAGE_PRESENT cleared.
876 */
877 if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
878 return true;
878 879
879 return !pmd_none(pmd); 880 return false;
880} 881}
881 882
882static inline int pmd_bad(pmd_t pmd) 883static inline int pmd_bad(pmd_t pmd)
@@ -903,7 +904,7 @@ static inline int pud_none(pud_t pud)
903 904
904static inline int pud_present(pud_t pud) 905static inline int pud_present(pud_t pud)
905{ 906{
906 return !pud_none(pud); 907 return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
907} 908}
908 909
909extern struct page *pud_page(pud_t pud); 910extern struct page *pud_page(pud_t pud);
@@ -950,7 +951,7 @@ static inline int pgd_none(pgd_t pgd)
950 951
951static inline int pgd_present(pgd_t pgd) 952static inline int pgd_present(pgd_t pgd)
952{ 953{
953 return !pgd_none(pgd); 954 return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
954} 955}
955 956
956static inline pte_t pgd_pte(pgd_t pgd) 957static inline pte_t pgd_pte(pgd_t pgd)
@@ -1020,17 +1021,16 @@ extern struct page *pgd_page(pgd_t pgd);
1020#define pgd_ERROR(e) \ 1021#define pgd_ERROR(e) \
1021 pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 1022 pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
1022 1023
1023static inline int map_kernel_page(unsigned long ea, unsigned long pa, 1024static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
1024 unsigned long flags)
1025{ 1025{
1026 if (radix_enabled()) { 1026 if (radix_enabled()) {
1027#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM) 1027#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
1028 unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift; 1028 unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
1029 WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE"); 1029 WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
1030#endif 1030#endif
1031 return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE); 1031 return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
1032 } 1032 }
1033 return hash__map_kernel_page(ea, pa, flags); 1033 return hash__map_kernel_page(ea, pa, prot);
1034} 1034}
1035 1035
1036static inline int __meminit vmemmap_create_mapping(unsigned long start, 1036static inline int __meminit vmemmap_create_mapping(unsigned long start,
@@ -1082,6 +1082,12 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
1082#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd)) 1082#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
1083#define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))) 1083#define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
1084#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))) 1084#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
1085
1086#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1087#define pmd_swp_mksoft_dirty(pmd) pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
1088#define pmd_swp_soft_dirty(pmd) pte_swp_soft_dirty(pmd_pte(pmd))
1089#define pmd_swp_clear_soft_dirty(pmd) pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
1090#endif
1085#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ 1091#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
1086 1092
1087#ifdef CONFIG_NUMA_BALANCING 1093#ifdef CONFIG_NUMA_BALANCING
@@ -1127,6 +1133,10 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
1127 return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set); 1133 return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
1128} 1134}
1129 1135
1136/*
1137 * returns true for pmd migration entries, THP, devmap, hugetlb
1138 * But compile time dependent on THP config
1139 */
1130static inline int pmd_large(pmd_t pmd) 1140static inline int pmd_large(pmd_t pmd)
1131{ 1141{
1132 return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); 1142 return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
@@ -1161,8 +1171,22 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
1161 pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); 1171 pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
1162} 1172}
1163 1173
1174/*
1175 * Only returns true for a THP. False for pmd migration entry.
1176 * We also need to return true when we come across a pte that
1177 * in between a thp split. While splitting THP, we mark the pmd
1178 * invalid (pmdp_invalidate()) before we set it with pte page
1179 * address. A pmd_trans_huge() check against a pmd entry during that time
1180 * should return true.
1181 * We should not call this on a hugetlb entry. We should check for HugeTLB
1182 * entry using vma->vm_flags
1183 * The page table walk rule is explained in Documentation/vm/transhuge.rst
1184 */
1164static inline int pmd_trans_huge(pmd_t pmd) 1185static inline int pmd_trans_huge(pmd_t pmd)
1165{ 1186{
1187 if (!pmd_present(pmd))
1188 return false;
1189
1166 if (radix_enabled()) 1190 if (radix_enabled())
1167 return radix__pmd_trans_huge(pmd); 1191 return radix__pmd_trans_huge(pmd);
1168 return hash__pmd_trans_huge(pmd); 1192 return hash__pmd_trans_huge(pmd);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 1154a6dc6d26..671316f9e95d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
53 unsigned long addr, 53 unsigned long addr,
54 unsigned long page_size); 54 unsigned long page_size);
55extern void radix__flush_pwc_lpid(unsigned int lpid); 55extern void radix__flush_pwc_lpid(unsigned int lpid);
56extern void radix__flush_tlb_lpid(unsigned int lpid);
56extern void radix__local_flush_tlb_lpid(unsigned int lpid); 57extern void radix__local_flush_tlb_lpid(unsigned int lpid);
57extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); 58extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
58 59
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index fd06dbe7d7d3..fed7e6241349 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -133,7 +133,7 @@ struct pt_regs;
133extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); 133extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
134extern void bad_page_fault(struct pt_regs *, unsigned long, int); 134extern void bad_page_fault(struct pt_regs *, unsigned long, int);
135extern void _exception(int, struct pt_regs *, int, unsigned long); 135extern void _exception(int, struct pt_regs *, int, unsigned long);
136extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int); 136extern void _exception_pkey(struct pt_regs *, unsigned long, int);
137extern void die(const char *, struct pt_regs *, long); 137extern void die(const char *, struct pt_regs *, long);
138extern bool die_will_crash(void); 138extern bool die_will_crash(void);
139extern void panic_flush_kmsg_start(void); 139extern void panic_flush_kmsg_start(void);
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 85c8af2bb272..74d0db511099 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -8,6 +8,8 @@
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10 10
11#include <asm-generic/compat.h>
12
11#define COMPAT_USER_HZ 100 13#define COMPAT_USER_HZ 100
12#ifdef __BIG_ENDIAN__ 14#ifdef __BIG_ENDIAN__
13#define COMPAT_UTS_MACHINE "ppc\0\0" 15#define COMPAT_UTS_MACHINE "ppc\0\0"
@@ -15,34 +17,18 @@
15#define COMPAT_UTS_MACHINE "ppcle\0\0" 17#define COMPAT_UTS_MACHINE "ppcle\0\0"
16#endif 18#endif
17 19
18typedef u32 compat_size_t;
19typedef s32 compat_ssize_t;
20typedef s32 compat_clock_t;
21typedef s32 compat_pid_t;
22typedef u32 __compat_uid_t; 20typedef u32 __compat_uid_t;
23typedef u32 __compat_gid_t; 21typedef u32 __compat_gid_t;
24typedef u32 __compat_uid32_t; 22typedef u32 __compat_uid32_t;
25typedef u32 __compat_gid32_t; 23typedef u32 __compat_gid32_t;
26typedef u32 compat_mode_t; 24typedef u32 compat_mode_t;
27typedef u32 compat_ino_t;
28typedef u32 compat_dev_t; 25typedef u32 compat_dev_t;
29typedef s32 compat_off_t;
30typedef s64 compat_loff_t;
31typedef s16 compat_nlink_t; 26typedef s16 compat_nlink_t;
32typedef u16 compat_ipc_pid_t; 27typedef u16 compat_ipc_pid_t;
33typedef s32 compat_daddr_t;
34typedef u32 compat_caddr_t; 28typedef u32 compat_caddr_t;
35typedef __kernel_fsid_t compat_fsid_t; 29typedef __kernel_fsid_t compat_fsid_t;
36typedef s32 compat_key_t;
37typedef s32 compat_timer_t;
38
39typedef s32 compat_int_t;
40typedef s32 compat_long_t;
41typedef s64 compat_s64; 30typedef s64 compat_s64;
42typedef u32 compat_uint_t;
43typedef u32 compat_ulong_t;
44typedef u64 compat_u64; 31typedef u64 compat_u64;
45typedef u32 compat_uptr_t;
46 32
47struct compat_stat { 33struct compat_stat {
48 compat_dev_t st_dev; 34 compat_dev_t st_dev;
@@ -55,11 +41,11 @@ struct compat_stat {
55 compat_off_t st_size; 41 compat_off_t st_size;
56 compat_off_t st_blksize; 42 compat_off_t st_blksize;
57 compat_off_t st_blocks; 43 compat_off_t st_blocks;
58 compat_time_t st_atime; 44 old_time32_t st_atime;
59 u32 st_atime_nsec; 45 u32 st_atime_nsec;
60 compat_time_t st_mtime; 46 old_time32_t st_mtime;
61 u32 st_mtime_nsec; 47 u32 st_mtime_nsec;
62 compat_time_t st_ctime; 48 old_time32_t st_ctime;
63 u32 st_ctime_nsec; 49 u32 st_ctime_nsec;
64 u32 __unused4[2]; 50 u32 __unused4[2];
65}; 51};
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index d71a90924f3b..deb99fd6e060 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -23,11 +23,13 @@
23extern int threads_per_core; 23extern int threads_per_core;
24extern int threads_per_subcore; 24extern int threads_per_subcore;
25extern int threads_shift; 25extern int threads_shift;
26extern bool has_big_cores;
26extern cpumask_t threads_core_mask; 27extern cpumask_t threads_core_mask;
27#else 28#else
28#define threads_per_core 1 29#define threads_per_core 1
29#define threads_per_subcore 1 30#define threads_per_subcore 1
30#define threads_shift 0 31#define threads_shift 0
32#define has_big_cores 0
31#define threads_core_mask (*get_cpu_mask(0)) 33#define threads_core_mask (*get_cpu_mask(0))
32#endif 34#endif
33 35
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 133672744b2e..ae73dc8da2d4 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -61,7 +61,6 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
61 struct cpu_accounting_data *acct0 = get_accounting(prev); 61 struct cpu_accounting_data *acct0 = get_accounting(prev);
62 62
63 acct->starttime = acct0->starttime; 63 acct->starttime = acct0->starttime;
64 acct->startspurr = acct0->startspurr;
65} 64}
66#endif 65#endif
67 66
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index ce242b9ea8c6..7c1d8e74b25d 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -99,4 +99,9 @@ void __init walk_drmem_lmbs_early(unsigned long node,
99 void (*func)(struct drmem_lmb *, const __be32 **)); 99 void (*func)(struct drmem_lmb *, const __be32 **));
100#endif 100#endif
101 101
102static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
103{
104 lmb->aa_index = 0xffffffff;
105}
106
102#endif /* _ASM_POWERPC_LMB_H */ 107#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 219637ea69a1..8b596d096ebe 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -43,7 +43,6 @@ struct pci_dn;
43#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */ 43#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */
44#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */ 44#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */
45#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */ 45#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */
46#define EEH_POSTPONED_PROBE 0x80 /* Powernv may postpone device probe */
47 46
48/* 47/*
49 * Delay for PE reset, all in ms 48 * Delay for PE reset, all in ms
@@ -99,13 +98,13 @@ struct eeh_pe {
99 atomic_t pass_dev_cnt; /* Count of passed through devs */ 98 atomic_t pass_dev_cnt; /* Count of passed through devs */
100 struct eeh_pe *parent; /* Parent PE */ 99 struct eeh_pe *parent; /* Parent PE */
101 void *data; /* PE auxillary data */ 100 void *data; /* PE auxillary data */
102 struct list_head child_list; /* Link PE to the child list */ 101 struct list_head child_list; /* List of PEs below this PE */
103 struct list_head edevs; /* Link list of EEH devices */ 102 struct list_head child; /* Memb. child_list/eeh_phb_pe */
104 struct list_head child; /* Child PEs */ 103 struct list_head edevs; /* List of eeh_dev in this PE */
105}; 104};
106 105
107#define eeh_pe_for_each_dev(pe, edev, tmp) \ 106#define eeh_pe_for_each_dev(pe, edev, tmp) \
108 list_for_each_entry_safe(edev, tmp, &pe->edevs, list) 107 list_for_each_entry_safe(edev, tmp, &pe->edevs, entry)
109 108
110#define eeh_for_each_pe(root, pe) \ 109#define eeh_for_each_pe(root, pe) \
111 for (pe = root; pe; pe = eeh_pe_next(pe, root)) 110 for (pe = root; pe; pe = eeh_pe_next(pe, root))
@@ -142,13 +141,12 @@ struct eeh_dev {
142 int aer_cap; /* Saved AER capability */ 141 int aer_cap; /* Saved AER capability */
143 int af_cap; /* Saved AF capability */ 142 int af_cap; /* Saved AF capability */
144 struct eeh_pe *pe; /* Associated PE */ 143 struct eeh_pe *pe; /* Associated PE */
145 struct list_head list; /* Form link list in the PE */ 144 struct list_head entry; /* Membership in eeh_pe.edevs */
146 struct list_head rmv_list; /* Record the removed edevs */ 145 struct list_head rmv_entry; /* Membership in rmv_list */
147 struct pci_dn *pdn; /* Associated PCI device node */ 146 struct pci_dn *pdn; /* Associated PCI device node */
148 struct pci_dev *pdev; /* Associated PCI device */ 147 struct pci_dev *pdev; /* Associated PCI device */
149 bool in_error; /* Error flag for edev */ 148 bool in_error; /* Error flag for edev */
150 struct pci_dev *physfn; /* Associated SRIOV PF */ 149 struct pci_dev *physfn; /* Associated SRIOV PF */
151 struct pci_bus *bus; /* PCI bus for partial hotplug */
152}; 150};
153 151
154static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev) 152static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
@@ -207,9 +205,8 @@ struct eeh_ops {
207 void* (*probe)(struct pci_dn *pdn, void *data); 205 void* (*probe)(struct pci_dn *pdn, void *data);
208 int (*set_option)(struct eeh_pe *pe, int option); 206 int (*set_option)(struct eeh_pe *pe, int option);
209 int (*get_pe_addr)(struct eeh_pe *pe); 207 int (*get_pe_addr)(struct eeh_pe *pe);
210 int (*get_state)(struct eeh_pe *pe, int *state); 208 int (*get_state)(struct eeh_pe *pe, int *delay);
211 int (*reset)(struct eeh_pe *pe, int option); 209 int (*reset)(struct eeh_pe *pe, int option);
212 int (*wait_state)(struct eeh_pe *pe, int max_wait);
213 int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); 210 int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
214 int (*configure_bridge)(struct eeh_pe *pe); 211 int (*configure_bridge)(struct eeh_pe *pe);
215 int (*err_inject)(struct eeh_pe *pe, int type, int func, 212 int (*err_inject)(struct eeh_pe *pe, int type, int func,
@@ -243,11 +240,7 @@ static inline bool eeh_has_flag(int flag)
243 240
244static inline bool eeh_enabled(void) 241static inline bool eeh_enabled(void)
245{ 242{
246 if (eeh_has_flag(EEH_FORCE_DISABLED) || 243 return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
247 !eeh_has_flag(EEH_ENABLED))
248 return false;
249
250 return true;
251} 244}
252 245
253static inline void eeh_serialize_lock(unsigned long *flags) 246static inline void eeh_serialize_lock(unsigned long *flags)
@@ -270,6 +263,7 @@ typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
270typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag); 263typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
271void eeh_set_pe_aux_size(int size); 264void eeh_set_pe_aux_size(int size);
272int eeh_phb_pe_create(struct pci_controller *phb); 265int eeh_phb_pe_create(struct pci_controller *phb);
266int eeh_wait_state(struct eeh_pe *pe, int max_wait);
273struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); 267struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
274struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); 268struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
275struct eeh_pe *eeh_pe_get(struct pci_controller *phb, 269struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h
new file mode 100644
index 000000000000..62fd24739852
--- /dev/null
+++ b/arch/powerpc/include/asm/error-injection.h
@@ -0,0 +1,13 @@
1/* SPDX-License-Identifier: GPL-2.0+ */
2
3#ifndef _ASM_ERROR_INJECTION_H
4#define _ASM_ERROR_INJECTION_H
5
6#include <linux/compiler.h>
7#include <linux/linkage.h>
8#include <asm/ptrace.h>
9#include <asm-generic/error-injection.h>
10
11void override_function_with_return(struct pt_regs *regs);
12
13#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index a86feddddad0..3b4767ed3ec5 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -61,14 +61,6 @@
61#define MAX_MCE_DEPTH 4 61#define MAX_MCE_DEPTH 4
62 62
63/* 63/*
64 * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
65 * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
66 * in the save area so it's not necessary to overlap them. Could be used
67 * for future savings though if another 4 byte register was to be saved.
68 */
69#define EX_LR EX_DAR
70
71/*
72 * EX_R3 is only used by the bad_stack handler. bad_stack reloads and 64 * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
73 * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap 65 * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
74 * with EX_DAR. 66 * with EX_DAR.
@@ -236,11 +228,10 @@
236 * PPR save/restore macros used in exceptions_64s.S 228 * PPR save/restore macros used in exceptions_64s.S
237 * Used for P7 or later processors 229 * Used for P7 or later processors
238 */ 230 */
239#define SAVE_PPR(area, ra, rb) \ 231#define SAVE_PPR(area, ra) \
240BEGIN_FTR_SECTION_NESTED(940) \ 232BEGIN_FTR_SECTION_NESTED(940) \
241 ld ra,PACACURRENT(r13); \ 233 ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
242 ld rb,area+EX_PPR(r13); /* Read PPR from paca */ \ 234 std ra,_PPR(r1); \
243 std rb,TASKTHREADPPR(ra); \
244END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) 235END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
245 236
246#define RESTORE_PPR_PACA(area, ra) \ 237#define RESTORE_PPR_PACA(area, ra) \
@@ -508,7 +499,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
5083: EXCEPTION_PROLOG_COMMON_1(); \ 4993: EXCEPTION_PROLOG_COMMON_1(); \
509 beq 4f; /* if from kernel mode */ \ 500 beq 4f; /* if from kernel mode */ \
510 ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ 501 ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
511 SAVE_PPR(area, r9, r10); \ 502 SAVE_PPR(area, r9); \
5124: EXCEPTION_PROLOG_COMMON_2(area) \ 5034: EXCEPTION_PROLOG_COMMON_2(area) \
513 EXCEPTION_PROLOG_COMMON_3(n) \ 504 EXCEPTION_PROLOG_COMMON_3(n) \
514 ACCOUNT_STOLEN_TIME 505 ACCOUNT_STOLEN_TIME
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 7a051bd21f87..00bc42d95679 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,6 +52,8 @@
52#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) 52#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
53#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) 53#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000)
54#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000) 54#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
55#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
56#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000)
55 57
56#ifndef __ASSEMBLY__ 58#ifndef __ASSEMBLY__
57 59
@@ -69,7 +71,8 @@ enum {
69 FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | 71 FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
70 FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | 72 FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
71 FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | 73 FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
72 FW_FEATURE_DRC_INFO, 74 FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
75 FW_FEATURE_PAPR_SCM,
73 FW_FEATURE_PSERIES_ALWAYS = 0, 76 FW_FEATURE_PSERIES_ALWAYS = 0,
74 FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, 77 FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
75 FW_FEATURE_POWERNV_ALWAYS = 0, 78 FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 41cc15c14eee..b9fbed84ddca 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -72,7 +72,7 @@ enum fixed_addresses {
72static inline void __set_fixmap(enum fixed_addresses idx, 72static inline void __set_fixmap(enum fixed_addresses idx,
73 phys_addr_t phys, pgprot_t flags) 73 phys_addr_t phys, pgprot_t flags)
74{ 74{
75 map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags)); 75 map_kernel_page(fix_to_virt(idx), phys, flags);
76} 76}
77 77
78#endif /* !__ASSEMBLY__ */ 78#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 2d00cc530083..383da1ab9e23 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -4,7 +4,6 @@
4 4
5#ifdef CONFIG_HUGETLB_PAGE 5#ifdef CONFIG_HUGETLB_PAGE
6#include <asm/page.h> 6#include <asm/page.h>
7#include <asm-generic/hugetlb.h>
8 7
9extern struct kmem_cache *hugepte_cache; 8extern struct kmem_cache *hugepte_cache;
10 9
@@ -110,31 +109,12 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
110void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); 109void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
111#endif 110#endif
112 111
112#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
113void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, 113void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
114 unsigned long end, unsigned long floor, 114 unsigned long end, unsigned long floor,
115 unsigned long ceiling); 115 unsigned long ceiling);
116 116
117/* 117#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
118 * If the arch doesn't supply something else, assume that hugepage
119 * size aligned regions are ok without further preparation.
120 */
121static inline int prepare_hugepage_range(struct file *file,
122 unsigned long addr, unsigned long len)
123{
124 struct hstate *h = hstate_file(file);
125 if (len & ~huge_page_mask(h))
126 return -EINVAL;
127 if (addr & ~huge_page_mask(h))
128 return -EINVAL;
129 return 0;
130}
131
132static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
133 pte_t *ptep, pte_t pte)
134{
135 set_pte_at(mm, addr, ptep, pte);
136}
137
138static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 118static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
139 unsigned long addr, pte_t *ptep) 119 unsigned long addr, pte_t *ptep)
140{ 120{
@@ -145,6 +125,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
145#endif 125#endif
146} 126}
147 127
128#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
148static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 129static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
149 unsigned long addr, pte_t *ptep) 130 unsigned long addr, pte_t *ptep)
150{ 131{
@@ -153,29 +134,17 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
153 flush_hugetlb_page(vma, addr); 134 flush_hugetlb_page(vma, addr);
154} 135}
155 136
156static inline int huge_pte_none(pte_t pte) 137#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
157{
158 return pte_none(pte);
159}
160
161static inline pte_t huge_pte_wrprotect(pte_t pte)
162{
163 return pte_wrprotect(pte);
164}
165
166extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, 138extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
167 unsigned long addr, pte_t *ptep, 139 unsigned long addr, pte_t *ptep,
168 pte_t pte, int dirty); 140 pte_t pte, int dirty);
169 141
170static inline pte_t huge_ptep_get(pte_t *ptep)
171{
172 return *ptep;
173}
174
175static inline void arch_clear_hugepage_flags(struct page *page) 142static inline void arch_clear_hugepage_flags(struct page *page)
176{ 143{
177} 144}
178 145
146#include <asm-generic/hugetlb.h>
147
179#else /* ! CONFIG_HUGETLB_PAGE */ 148#else /* ! CONFIG_HUGETLB_PAGE */
180static inline void flush_hugetlb_page(struct vm_area_struct *vma, 149static inline void flush_hugetlb_page(struct vm_area_struct *vma,
181 unsigned long vmaddr) 150 unsigned long vmaddr)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a0b17f9f1ea4..33a4fc891947 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -278,6 +278,7 @@
278#define H_COP 0x304 278#define H_COP 0x304
279#define H_GET_MPP_X 0x314 279#define H_GET_MPP_X 0x314
280#define H_SET_MODE 0x31C 280#define H_SET_MODE 0x31C
281#define H_BLOCK_REMOVE 0x328
281#define H_CLEAR_HPT 0x358 282#define H_CLEAR_HPT 0x358
282#define H_REQUEST_VMC 0x360 283#define H_REQUEST_VMC 0x360
283#define H_RESIZE_HPT_PREPARE 0x36C 284#define H_RESIZE_HPT_PREPARE 0x36C
@@ -295,7 +296,15 @@
295#define H_INT_ESB 0x3C8 296#define H_INT_ESB 0x3C8
296#define H_INT_SYNC 0x3CC 297#define H_INT_SYNC 0x3CC
297#define H_INT_RESET 0x3D0 298#define H_INT_RESET 0x3D0
298#define MAX_HCALL_OPCODE H_INT_RESET 299#define H_SCM_READ_METADATA 0x3E4
300#define H_SCM_WRITE_METADATA 0x3E8
301#define H_SCM_BIND_MEM 0x3EC
302#define H_SCM_UNBIND_MEM 0x3F0
303#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
304#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
305#define H_SCM_MEM_QUERY 0x3FC
306#define H_SCM_BLOCK_CLEAR 0x400
307#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR
299 308
300/* H_VIOCTL functions */ 309/* H_VIOCTL functions */
301#define H_GET_VIOA_DUMP_SIZE 0x01 310#define H_GET_VIOA_DUMP_SIZE 0x01
@@ -322,6 +331,11 @@
322#define H_GET_24X7_DATA 0xF07C 331#define H_GET_24X7_DATA 0xF07C
323#define H_GET_PERF_COUNTER_INFO 0xF080 332#define H_GET_PERF_COUNTER_INFO 0xF080
324 333
334/* Platform-specific hcalls used for nested HV KVM */
335#define H_SET_PARTITION_TABLE 0xF800
336#define H_ENTER_NESTED 0xF804
337#define H_TLB_INVALIDATE 0xF808
338
325/* Values for 2nd argument to H_SET_MODE */ 339/* Values for 2nd argument to H_SET_MODE */
326#define H_SET_MODE_RESOURCE_SET_CIABR 1 340#define H_SET_MODE_RESOURCE_SET_CIABR 1
327#define H_SET_MODE_RESOURCE_SET_DAWR 2 341#define H_SET_MODE_RESOURCE_SET_DAWR 2
@@ -461,6 +475,42 @@ struct h_cpu_char_result {
461 u64 behaviour; 475 u64 behaviour;
462}; 476};
463 477
478/* Register state for entering a nested guest with H_ENTER_NESTED */
479struct hv_guest_state {
480 u64 version; /* version of this structure layout */
481 u32 lpid;
482 u32 vcpu_token;
483 /* These registers are hypervisor privileged (at least for writing) */
484 u64 lpcr;
485 u64 pcr;
486 u64 amor;
487 u64 dpdes;
488 u64 hfscr;
489 s64 tb_offset;
490 u64 dawr0;
491 u64 dawrx0;
492 u64 ciabr;
493 u64 hdec_expiry;
494 u64 purr;
495 u64 spurr;
496 u64 ic;
497 u64 vtb;
498 u64 hdar;
499 u64 hdsisr;
500 u64 heir;
501 u64 asdr;
502 /* These are OS privileged but need to be set late in guest entry */
503 u64 srr0;
504 u64 srr1;
505 u64 sprg[4];
506 u64 pidr;
507 u64 cfar;
508 u64 ppr;
509};
510
511/* Latest version of hv_guest_state structure */
512#define HV_GUEST_STATE_VERSION 1
513
464#endif /* __ASSEMBLY__ */ 514#endif /* __ASSEMBLY__ */
465#endif /* __KERNEL__ */ 515#endif /* __KERNEL__ */
466#endif /* _ASM_POWERPC_HVCALL_H */ 516#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index e0331e754568..3ef40b703c4a 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -3,6 +3,9 @@
3#ifdef __KERNEL__ 3#ifdef __KERNEL__
4 4
5#define ARCH_HAS_IOREMAP_WC 5#define ARCH_HAS_IOREMAP_WC
6#ifdef CONFIG_PPC32
7#define ARCH_HAS_IOREMAP_WT
8#endif
6 9
7/* 10/*
8 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
@@ -108,25 +111,6 @@ extern bool isa_io_special;
108#define IO_SET_SYNC_FLAG() 111#define IO_SET_SYNC_FLAG()
109#endif 112#endif
110 113
111/* gcc 4.0 and older doesn't have 'Z' constraint */
112#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
113#define DEF_MMIO_IN_X(name, size, insn) \
114static inline u##size name(const volatile u##size __iomem *addr) \
115{ \
116 u##size ret; \
117 __asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync" \
118 : "=r" (ret) : "r" (addr), "m" (*addr) : "memory"); \
119 return ret; \
120}
121
122#define DEF_MMIO_OUT_X(name, size, insn) \
123static inline void name(volatile u##size __iomem *addr, u##size val) \
124{ \
125 __asm__ __volatile__("sync;"#insn" %1,0,%2" \
126 : "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \
127 IO_SET_SYNC_FLAG(); \
128}
129#else /* newer gcc */
130#define DEF_MMIO_IN_X(name, size, insn) \ 114#define DEF_MMIO_IN_X(name, size, insn) \
131static inline u##size name(const volatile u##size __iomem *addr) \ 115static inline u##size name(const volatile u##size __iomem *addr) \
132{ \ 116{ \
@@ -143,7 +127,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
143 : "=Z" (*addr) : "r" (val) : "memory"); \ 127 : "=Z" (*addr) : "r" (val) : "memory"); \
144 IO_SET_SYNC_FLAG(); \ 128 IO_SET_SYNC_FLAG(); \
145} 129}
146#endif
147 130
148#define DEF_MMIO_IN_D(name, size, insn) \ 131#define DEF_MMIO_IN_D(name, size, insn) \
149static inline u##size name(const volatile u##size __iomem *addr) \ 132static inline u##size name(const volatile u##size __iomem *addr) \
@@ -746,6 +729,10 @@ static inline void iosync(void)
746 * 729 *
747 * * ioremap_wc enables write combining 730 * * ioremap_wc enables write combining
748 * 731 *
732 * * ioremap_wt enables write through
733 *
734 * * ioremap_coherent maps coherent cached memory
735 *
749 * * iounmap undoes such a mapping and can be hooked 736 * * iounmap undoes such a mapping and can be hooked
750 * 737 *
751 * * __ioremap_at (and the pending __iounmap_at) are low level functions to 738 * * __ioremap_at (and the pending __iounmap_at) are low level functions to
@@ -767,6 +754,8 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
767extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, 754extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
768 unsigned long flags); 755 unsigned long flags);
769extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); 756extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
757void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
758void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
770#define ioremap_nocache(addr, size) ioremap((addr), (size)) 759#define ioremap_nocache(addr, size) ioremap((addr), (size))
771#define ioremap_uc(addr, size) ioremap((addr), (size)) 760#define ioremap_uc(addr, size) ioremap((addr), (size))
772#define ioremap_cache(addr, size) \ 761#define ioremap_cache(addr, size) \
@@ -777,12 +766,12 @@ extern void iounmap(volatile void __iomem *addr);
777extern void __iomem *__ioremap(phys_addr_t, unsigned long size, 766extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
778 unsigned long flags); 767 unsigned long flags);
779extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, 768extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
780 unsigned long flags, void *caller); 769 pgprot_t prot, void *caller);
781 770
782extern void __iounmap(volatile void __iomem *addr); 771extern void __iounmap(volatile void __iomem *addr);
783 772
784extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, 773extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
785 unsigned long size, unsigned long flags); 774 unsigned long size, pgprot_t prot);
786extern void __iounmap_at(void *ea, unsigned long size); 775extern void __iounmap_at(void *ea, unsigned long size);
787 776
788/* 777/*
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 3d4b88cb8599..35db0cbc9222 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -126,7 +126,7 @@ struct iommu_table {
126 int it_nid; 126 int it_nid;
127}; 127};
128 128
129#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \ 129#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
130 ((tbl)->it_ops->useraddrptr((tbl), (entry), false)) 130 ((tbl)->it_ops->useraddrptr((tbl), (entry), false))
131#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ 131#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
132 ((tbl)->it_ops->useraddrptr((tbl), (entry), true)) 132 ((tbl)->it_ops->useraddrptr((tbl), (entry), true))
diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h
index 9db24e77b9f4..a9e098a3b881 100644
--- a/arch/powerpc/include/asm/kgdb.h
+++ b/arch/powerpc/include/asm/kgdb.h
@@ -26,9 +26,12 @@
26#define BREAK_INSTR_SIZE 4 26#define BREAK_INSTR_SIZE 4
27#define BUFMAX ((NUMREGBYTES * 2) + 512) 27#define BUFMAX ((NUMREGBYTES * 2) + 512)
28#define OUTBUFMAX ((NUMREGBYTES * 2) + 512) 28#define OUTBUFMAX ((NUMREGBYTES * 2) + 512)
29
30#define BREAK_INSTR 0x7d821008 /* twge r2, r2 */
31
29static inline void arch_kgdb_breakpoint(void) 32static inline void arch_kgdb_breakpoint(void)
30{ 33{
31 asm(".long 0x7d821008"); /* twge r2, r2 */ 34 asm(stringify_in_c(.long BREAK_INSTR));
32} 35}
33#define CACHE_FLUSH_IS_SAFE 1 36#define CACHE_FLUSH_IS_SAFE 1
34#define DBG_MAX_REG_NUM 70 37#define DBG_MAX_REG_NUM 70
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a790d5cf6ea3..1f321914676d 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -84,7 +84,6 @@
84#define BOOK3S_INTERRUPT_INST_STORAGE 0x400 84#define BOOK3S_INTERRUPT_INST_STORAGE 0x400
85#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 85#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
86#define BOOK3S_INTERRUPT_EXTERNAL 0x500 86#define BOOK3S_INTERRUPT_EXTERNAL 0x500
87#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
88#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502 87#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
89#define BOOK3S_INTERRUPT_ALIGNMENT 0x600 88#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
90#define BOOK3S_INTERRUPT_PROGRAM 0x700 89#define BOOK3S_INTERRUPT_PROGRAM 0x700
@@ -134,8 +133,7 @@
134#define BOOK3S_IRQPRIO_EXTERNAL 14 133#define BOOK3S_IRQPRIO_EXTERNAL 14
135#define BOOK3S_IRQPRIO_DECREMENTER 15 134#define BOOK3S_IRQPRIO_DECREMENTER 15
136#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16 135#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
137#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17 136#define BOOK3S_IRQPRIO_MAX 17
138#define BOOK3S_IRQPRIO_MAX 18
139 137
140#define BOOK3S_HFLAG_DCBZ32 0x1 138#define BOOK3S_HFLAG_DCBZ32 0x1
141#define BOOK3S_HFLAG_SLB 0x2 139#define BOOK3S_HFLAG_SLB 0x2
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 83a9aa3cf689..09f8e9ba69bc 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
188extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 188extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
189 struct kvm_vcpu *vcpu, 189 struct kvm_vcpu *vcpu,
190 unsigned long ea, unsigned long dsisr); 190 unsigned long ea, unsigned long dsisr);
191extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
192 struct kvmppc_pte *gpte, u64 root,
193 u64 *pte_ret_p);
194extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
195 struct kvmppc_pte *gpte, u64 table,
196 int table_index, u64 *pte_ret_p);
191extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 197extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
192 struct kvmppc_pte *gpte, bool data, bool iswrite); 198 struct kvmppc_pte *gpte, bool data, bool iswrite);
199extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
200 unsigned int shift, struct kvm_memory_slot *memslot,
201 unsigned int lpid);
202extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
203 bool writing, unsigned long gpa,
204 unsigned int lpid);
205extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
206 unsigned long gpa,
207 struct kvm_memory_slot *memslot,
208 bool writing, bool kvm_ro,
209 pte_t *inserted_pte, unsigned int *levelp);
193extern int kvmppc_init_vm_radix(struct kvm *kvm); 210extern int kvmppc_init_vm_radix(struct kvm *kvm);
194extern void kvmppc_free_radix(struct kvm *kvm); 211extern void kvmppc_free_radix(struct kvm *kvm);
212extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
213 unsigned int lpid);
195extern int kvmppc_radix_init(void); 214extern int kvmppc_radix_init(void);
196extern void kvmppc_radix_exit(void); 215extern void kvmppc_radix_exit(void);
197extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 216extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
198 unsigned long gfn); 217 unsigned long gfn);
218extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
219 unsigned long gpa, unsigned int shift,
220 struct kvm_memory_slot *memslot,
221 unsigned int lpid);
199extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 222extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
200 unsigned long gfn); 223 unsigned long gfn);
201extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 224extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
271static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} 294static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
272#endif 295#endif
273 296
297long kvmhv_nested_init(void);
298void kvmhv_nested_exit(void);
299void kvmhv_vm_nested_init(struct kvm *kvm);
300long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
301void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
302void kvmhv_release_all_nested(struct kvm *kvm);
303long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
304long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
305int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
306 u64 time_limit, unsigned long lpcr);
307void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
308void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
309 struct hv_guest_state *hr);
310long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
311
274void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); 312void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
275 313
276extern int kvm_irq_bypass; 314extern int kvm_irq_bypass;
@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
301 339
302static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 340static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
303{ 341{
304 vcpu->arch.cr = val; 342 vcpu->arch.regs.ccr = val;
305} 343}
306 344
307static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 345static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
308{ 346{
309 return vcpu->arch.cr; 347 return vcpu->arch.regs.ccr;
310} 348}
311 349
312static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) 350static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
384/* TO = 31 for unconditional trap */ 422/* TO = 31 for unconditional trap */
385#define INS_TW 0x7fe00008 423#define INS_TW 0x7fe00008
386 424
387/* LPIDs we support with this build -- runtime limit may be lower */
388#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
389
390#define SPLIT_HACK_MASK 0xff000000 425#define SPLIT_HACK_MASK 0xff000000
391#define SPLIT_HACK_OFFS 0xfb000000 426#define SPLIT_HACK_OFFS 0xfb000000
392 427
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index dc435a5af7d6..6d298145d564 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -23,6 +23,108 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <asm/bitops.h> 24#include <asm/bitops.h>
25#include <asm/book3s/64/mmu-hash.h> 25#include <asm/book3s/64/mmu-hash.h>
26#include <asm/cpu_has_feature.h>
27#include <asm/ppc-opcode.h>
28
29#ifdef CONFIG_PPC_PSERIES
30static inline bool kvmhv_on_pseries(void)
31{
32 return !cpu_has_feature(CPU_FTR_HVMODE);
33}
34#else
35static inline bool kvmhv_on_pseries(void)
36{
37 return false;
38}
39#endif
40
41/*
42 * Structure for a nested guest, that is, for a guest that is managed by
43 * one of our guests.
44 */
45struct kvm_nested_guest {
46 struct kvm *l1_host; /* L1 VM that owns this nested guest */
47 int l1_lpid; /* lpid L1 guest thinks this guest is */
48 int shadow_lpid; /* real lpid of this nested guest */
49 pgd_t *shadow_pgtable; /* our page table for this guest */
50 u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
51 u64 process_table; /* process table entry for this guest */
52 long refcnt; /* number of pointers to this struct */
53 struct mutex tlb_lock; /* serialize page faults and tlbies */
54 struct kvm_nested_guest *next;
55 cpumask_t need_tlb_flush;
56 cpumask_t cpu_in_guest;
57 short prev_cpu[NR_CPUS];
58};
59
60/*
61 * We define a nested rmap entry as a single 64-bit quantity
62 * 0xFFF0000000000000 12-bit lpid field
63 * 0x000FFFFFFFFFF000 40-bit guest 4k page frame number
64 * 0x0000000000000001 1-bit single entry flag
65 */
66#define RMAP_NESTED_LPID_MASK 0xFFF0000000000000UL
67#define RMAP_NESTED_LPID_SHIFT (52)
68#define RMAP_NESTED_GPA_MASK 0x000FFFFFFFFFF000UL
69#define RMAP_NESTED_IS_SINGLE_ENTRY 0x0000000000000001UL
70
71/* Structure for a nested guest rmap entry */
72struct rmap_nested {
73 struct llist_node list;
74 u64 rmap;
75};
76
77/*
78 * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
79 * safe against removal of the list entry or NULL list
80 * @pos: a (struct rmap_nested *) to use as a loop cursor
81 * @node: pointer to the first entry
82 * NOTE: this can be NULL
83 * @rmapp: an (unsigned long *) in which to return the rmap entries on each
84 * iteration
85 * NOTE: this must point to already allocated memory
86 *
87 * The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
88 * rmap entry in the memslot. The list is always terminated by a "single entry"
89 * stored in the list element of the final entry of the llist. If there is ONLY
90 * a single entry then this is itself in the rmap entry of the memslot, not a
91 * llist head pointer.
92 *
93 * Note that the iterator below assumes that a nested rmap entry is always
94 * non-zero. This is true for our usage because the LPID field is always
95 * non-zero (zero is reserved for the host).
96 *
97 * This should be used to iterate over the list of rmap_nested entries with
98 * processing done on the u64 rmap value given by each iteration. This is safe
99 * against removal of list entries and it is always safe to call free on (pos).
100 *
101 * e.g.
102 * struct rmap_nested *cursor;
103 * struct llist_node *first;
104 * unsigned long rmap;
105 * for_each_nest_rmap_safe(cursor, first, &rmap) {
106 * do_something(rmap);
107 * free(cursor);
108 * }
109 */
110#define for_each_nest_rmap_safe(pos, node, rmapp) \
111 for ((pos) = llist_entry((node), typeof(*(pos)), list); \
112 (node) && \
113 (*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
114 ((u64) (node)) : ((pos)->rmap))) && \
115 (((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
116 ((struct llist_node *) ((pos) = NULL)) : \
117 (pos)->list.next)), true); \
118 (pos) = llist_entry((node), typeof(*(pos)), list))
119
120struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
121 bool create);
122void kvmhv_put_nested(struct kvm_nested_guest *gp);
123int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
124
125/* Encoding of first parameter for H_TLB_INVALIDATE */
126#define H_TLBIE_P1_ENC(ric, prs, r) (___PPC_RIC(ric) | ___PPC_PRS(prs) | \
127 ___PPC_R(r))
26 128
27/* Power architecture requires HPT is at least 256kiB, at most 64TiB */ 129/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
28#define PPC_MIN_HPT_ORDER 18 130#define PPC_MIN_HPT_ORDER 18
@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
435} 537}
436 538
437extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); 539extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
540extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
438 541
439extern void kvmhv_rm_send_ipi(int cpu); 542extern void kvmhv_rm_send_ipi(int cpu);
440 543
@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr)
482#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 585#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
483static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) 586static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
484{ 587{
485 vcpu->arch.cr = vcpu->arch.cr_tm; 588 vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
486 vcpu->arch.regs.xer = vcpu->arch.xer_tm; 589 vcpu->arch.regs.xer = vcpu->arch.xer_tm;
487 vcpu->arch.regs.link = vcpu->arch.lr_tm; 590 vcpu->arch.regs.link = vcpu->arch.lr_tm;
488 vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; 591 vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
499 602
500static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) 603static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
501{ 604{
502 vcpu->arch.cr_tm = vcpu->arch.cr; 605 vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
503 vcpu->arch.xer_tm = vcpu->arch.regs.xer; 606 vcpu->arch.xer_tm = vcpu->arch.regs.xer;
504 vcpu->arch.lr_tm = vcpu->arch.regs.link; 607 vcpu->arch.lr_tm = vcpu->arch.regs.link;
505 vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; 608 vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
515} 618}
516#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 619#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
517 620
621extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
622 unsigned long gpa, unsigned int level,
623 unsigned long mmu_seq, unsigned int lpid,
624 unsigned long *rmapp, struct rmap_nested **n_rmap);
625extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
626 struct rmap_nested **n_rmap);
627extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
628 struct kvm_memory_slot *memslot,
629 unsigned long gpa, unsigned long hpa,
630 unsigned long nbytes);
631
518#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 632#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
519 633
520#endif /* __ASM_KVM_BOOK3S_64_H__ */ 634#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index d978fdf698af..eb3ba6390108 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,9 @@
25#define XICS_MFRR 0xc 25#define XICS_MFRR 0xc
26#define XICS_IPI 2 /* interrupt source # for IPIs */ 26#define XICS_IPI 2 /* interrupt source # for IPIs */
27 27
28/* LPIDs we support with this build -- runtime limit may be lower */
29#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
30
28/* Maximum number of threads per physical core */ 31/* Maximum number of threads per physical core */
29#define MAX_SMT_THREADS 8 32#define MAX_SMT_THREADS 8
30 33
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index d513e3ed1c65..f0cef625f17c 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
46 46
47static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 47static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
48{ 48{
49 vcpu->arch.cr = val; 49 vcpu->arch.regs.ccr = val;
50} 50}
51 51
52static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 52static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
53{ 53{
54 return vcpu->arch.cr; 54 return vcpu->arch.regs.ccr;
55} 55}
56 56
57static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) 57static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 906bcbdfd2a1..fac6f631ed29 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
46#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 46#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
47#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */ 47#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
48#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES) 48#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
49#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
49 50
50#else 51#else
51#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS 52#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -94,6 +95,7 @@ struct dtl_entry;
94 95
95struct kvmppc_vcpu_book3s; 96struct kvmppc_vcpu_book3s;
96struct kvmppc_book3s_shadow_vcpu; 97struct kvmppc_book3s_shadow_vcpu;
98struct kvm_nested_guest;
97 99
98struct kvm_vm_stat { 100struct kvm_vm_stat {
99 ulong remote_tlb_flush; 101 ulong remote_tlb_flush;
@@ -287,10 +289,12 @@ struct kvm_arch {
287 u8 radix; 289 u8 radix;
288 u8 fwnmi_enabled; 290 u8 fwnmi_enabled;
289 bool threads_indep; 291 bool threads_indep;
292 bool nested_enable;
290 pgd_t *pgtable; 293 pgd_t *pgtable;
291 u64 process_table; 294 u64 process_table;
292 struct dentry *debugfs_dir; 295 struct dentry *debugfs_dir;
293 struct dentry *htab_dentry; 296 struct dentry *htab_dentry;
297 struct dentry *radix_dentry;
294 struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ 298 struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
295#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 299#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
296#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 300#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -311,6 +315,9 @@ struct kvm_arch {
311#endif 315#endif
312 struct kvmppc_ops *kvm_ops; 316 struct kvmppc_ops *kvm_ops;
313#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 317#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
318 u64 l1_ptcr;
319 int max_nested_lpid;
320 struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
314 /* This array can grow quite large, keep it at the end */ 321 /* This array can grow quite large, keep it at the end */
315 struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; 322 struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
316#endif 323#endif
@@ -360,7 +367,9 @@ struct kvmppc_pte {
360 bool may_write : 1; 367 bool may_write : 1;
361 bool may_execute : 1; 368 bool may_execute : 1;
362 unsigned long wimg; 369 unsigned long wimg;
370 unsigned long rc;
363 u8 page_size; /* MMU_PAGE_xxx */ 371 u8 page_size; /* MMU_PAGE_xxx */
372 u8 page_shift;
364}; 373};
365 374
366struct kvmppc_mmu { 375struct kvmppc_mmu {
@@ -537,8 +546,6 @@ struct kvm_vcpu_arch {
537 ulong tar; 546 ulong tar;
538#endif 547#endif
539 548
540 u32 cr;
541
542#ifdef CONFIG_PPC_BOOK3S 549#ifdef CONFIG_PPC_BOOK3S
543 ulong hflags; 550 ulong hflags;
544 ulong guest_owned_ext; 551 ulong guest_owned_ext;
@@ -707,6 +714,7 @@ struct kvm_vcpu_arch {
707 u8 hcall_needed; 714 u8 hcall_needed;
708 u8 epr_flags; /* KVMPPC_EPR_xxx */ 715 u8 epr_flags; /* KVMPPC_EPR_xxx */
709 u8 epr_needed; 716 u8 epr_needed;
717 u8 external_oneshot; /* clear external irq after delivery */
710 718
711 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ 719 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
712 720
@@ -781,6 +789,10 @@ struct kvm_vcpu_arch {
781 u32 emul_inst; 789 u32 emul_inst;
782 790
783 u32 online; 791 u32 online;
792
793 /* For support of nested guests */
794 struct kvm_nested_guest *nested;
795 u32 nested_vcpu_id;
784#endif 796#endif
785 797
786#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 798#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e991821dd7fa..9b89b1918dfc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
194 (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ 194 (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
195 (stt)->size, (ioba), (npages)) ? \ 195 (stt)->size, (ioba), (npages)) ? \
196 H_PARAMETER : H_SUCCESS) 196 H_PARAMETER : H_SUCCESS)
197extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, 197extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
198 unsigned long tce);
199extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
200 unsigned long *ua, unsigned long **prmap); 198 unsigned long *ua, unsigned long **prmap);
201extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, 199extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
202 unsigned long idx, unsigned long tce); 200 unsigned long idx, unsigned long tce);
@@ -327,6 +325,7 @@ struct kvmppc_ops {
327 int (*set_smt_mode)(struct kvm *kvm, unsigned long mode, 325 int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
328 unsigned long flags); 326 unsigned long flags);
329 void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr); 327 void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
328 int (*enable_nested)(struct kvm *kvm);
330}; 329};
331 330
332extern struct kvmppc_ops *kvmppc_hv_ops; 331extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -585,6 +584,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
585 584
586extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, 585extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
587 int level, bool line_status); 586 int level, bool line_status);
587extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
588#else 588#else
589static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, 589static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
590 u32 priority) { return -1; } 590 u32 priority) { return -1; }
@@ -607,6 +607,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
607 607
608static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, 608static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
609 int level, bool line_status) { return -ENODEV; } 609 int level, bool line_status) { return -ENODEV; }
610static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
610#endif /* CONFIG_KVM_XIVE */ 611#endif /* CONFIG_KVM_XIVE */
611 612
612/* 613/*
@@ -652,6 +653,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
652 unsigned long mfrr); 653 unsigned long mfrr);
653int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); 654int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
654int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); 655int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
656void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
655 657
656/* 658/*
657 * Host-side operations we want to set up while running in real 659 * Host-side operations we want to set up while running in real
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index a47de82fb8e2..8311869005fa 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -35,7 +35,7 @@ struct machdep_calls {
35 char *name; 35 char *name;
36#ifdef CONFIG_PPC64 36#ifdef CONFIG_PPC64
37 void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, 37 void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size,
38 unsigned long flags, void *caller); 38 pgprot_t prot, void *caller);
39 void (*iounmap)(volatile void __iomem *token); 39 void (*iounmap)(volatile void __iomem *token);
40 40
41#ifdef CONFIG_PM 41#ifdef CONFIG_PM
@@ -108,6 +108,7 @@ struct machdep_calls {
108 108
109 /* Early exception handlers called in realmode */ 109 /* Early exception handlers called in realmode */
110 int (*hmi_exception_early)(struct pt_regs *regs); 110 int (*hmi_exception_early)(struct pt_regs *regs);
111 long (*machine_check_early)(struct pt_regs *regs);
111 112
112 /* Called during machine check exception to retrive fixup address. */ 113 /* Called during machine check exception to retrive fixup address. */
113 bool (*mce_check_early_recovery)(struct pt_regs *regs); 114 bool (*mce_check_early_recovery)(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 3a1226e9b465..a8b8903e1844 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -210,4 +210,7 @@ extern void release_mce_event(void);
210extern void machine_check_queue_event(void); 210extern void machine_check_queue_event(void);
211extern void machine_check_print_event_info(struct machine_check_event *evt, 211extern void machine_check_print_event_info(struct machine_check_event *evt,
212 bool user_mode); 212 bool user_mode);
213#ifdef CONFIG_PPC_BOOK3S_64
214void flush_and_reload_slb(void);
215#endif /* CONFIG_PPC_BOOK3S_64 */
213#endif /* __ASM_PPC64_MCE_H__ */ 216#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 13ea441ac531..eb20eb3b8fb0 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -309,6 +309,21 @@ static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
309 */ 309 */
310#define MMU_PAGE_COUNT 16 310#define MMU_PAGE_COUNT 16
311 311
312/*
313 * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
314 * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
315 * page_to_nid does a page->section->node lookup
316 * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
317 * memory requirements with large number of sections.
318 * 51 bits is the max physical real address on POWER9
319 */
320#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
321 defined (CONFIG_PPC_64K_PAGES)
322#define MAX_PHYSMEM_BITS 51
323#else
324#define MAX_PHYSMEM_BITS 46
325#endif
326
312#ifdef CONFIG_PPC_BOOK3S_64 327#ifdef CONFIG_PPC_BOOK3S_64
313#include <asm/book3s/64/mmu.h> 328#include <asm/book3s/64/mmu.h>
314#else /* CONFIG_PPC_BOOK3S_64 */ 329#else /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b694d6af1150..0381394a425b 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -82,7 +82,7 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
82{ 82{
83 int context_id; 83 int context_id;
84 84
85 context_id = get_ea_context(&mm->context, ea); 85 context_id = get_user_context(&mm->context, ea);
86 if (!context_id) 86 if (!context_id)
87 return true; 87 return true;
88 return false; 88 return false;
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index fad8ddd697ac..0abf2e7fd222 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -393,7 +393,14 @@ extern struct bus_type mpic_subsys;
393#define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ 393#define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */
394 394
395/* Get the version of primary MPIC */ 395/* Get the version of primary MPIC */
396#ifdef CONFIG_MPIC
396extern u32 fsl_mpic_primary_get_version(void); 397extern u32 fsl_mpic_primary_get_version(void);
398#else
399static inline u32 fsl_mpic_primary_get_version(void)
400{
401 return 0;
402}
403#endif
397 404
398/* Allocate the controller structure and setup the linux irq descs 405/* Allocate the controller structure and setup the linux irq descs
399 * for the range if interrupts passed in. No HW initialization is 406 * for the range if interrupts passed in. No HW initialization is
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index a507a65b0866..3ffb0ff5a038 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -128,14 +128,65 @@ extern int icache_44x_need_flush;
128#include <asm/nohash/32/pte-8xx.h> 128#include <asm/nohash/32/pte-8xx.h>
129#endif 129#endif
130 130
131/* And here we include common definitions */ 131/*
132#include <asm/pte-common.h> 132 * Location of the PFN in the PTE. Most 32-bit platforms use the same
133 * as _PAGE_SHIFT here (ie, naturally aligned).
134 * Platform who don't just pre-define the value so we don't override it here.
135 */
136#ifndef PTE_RPN_SHIFT
137#define PTE_RPN_SHIFT (PAGE_SHIFT)
138#endif
139
140/*
141 * The mask covered by the RPN must be a ULL on 32-bit platforms with
142 * 64-bit PTEs.
143 */
144#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
145#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
146#else
147#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
148#endif
149
150/*
151 * _PAGE_CHG_MASK masks of bits that are to be preserved across
152 * pgprot changes.
153 */
154#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
133 155
134#ifndef __ASSEMBLY__ 156#ifndef __ASSEMBLY__
135 157
136#define pte_clear(mm, addr, ptep) \ 158#define pte_clear(mm, addr, ptep) \
137 do { pte_update(ptep, ~0, 0); } while (0) 159 do { pte_update(ptep, ~0, 0); } while (0)
138 160
161#ifndef pte_mkwrite
162static inline pte_t pte_mkwrite(pte_t pte)
163{
164 return __pte(pte_val(pte) | _PAGE_RW);
165}
166#endif
167
168static inline pte_t pte_mkdirty(pte_t pte)
169{
170 return __pte(pte_val(pte) | _PAGE_DIRTY);
171}
172
173static inline pte_t pte_mkyoung(pte_t pte)
174{
175 return __pte(pte_val(pte) | _PAGE_ACCESSED);
176}
177
178#ifndef pte_wrprotect
179static inline pte_t pte_wrprotect(pte_t pte)
180{
181 return __pte(pte_val(pte) & ~_PAGE_RW);
182}
183#endif
184
185static inline pte_t pte_mkexec(pte_t pte)
186{
187 return __pte(pte_val(pte) | _PAGE_EXEC);
188}
189
139#define pmd_none(pmd) (!pmd_val(pmd)) 190#define pmd_none(pmd) (!pmd_val(pmd))
140#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) 191#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
141#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK) 192#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
@@ -244,23 +295,21 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
244static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, 295static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
245 pte_t *ptep) 296 pte_t *ptep)
246{ 297{
247 pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO); 298 unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
248} 299 unsigned long set = pte_val(pte_wrprotect(__pte(0)));
249static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
250 unsigned long addr, pte_t *ptep)
251{
252 ptep_set_wrprotect(mm, addr, ptep);
253}
254 300
301 pte_update(ptep, clr, set);
302}
255 303
256static inline void __ptep_set_access_flags(struct vm_area_struct *vma, 304static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
257 pte_t *ptep, pte_t entry, 305 pte_t *ptep, pte_t entry,
258 unsigned long address, 306 unsigned long address,
259 int psize) 307 int psize)
260{ 308{
261 unsigned long set = pte_val(entry) & 309 pte_t pte_set = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(0)))));
262 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); 310 pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0)))));
263 unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA); 311 unsigned long set = pte_val(entry) & pte_val(pte_set);
312 unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr);
264 313
265 pte_update(ptep, clr, set); 314 pte_update(ptep, clr, set);
266 315
@@ -323,7 +372,7 @@ static inline int pte_young(pte_t pte)
323#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) 372#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
324#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) 373#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
325 374
326int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); 375int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
327 376
328#endif /* !__ASSEMBLY__ */ 377#endif /* !__ASSEMBLY__ */
329 378
diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h
index bb4b3a4b92a0..661f4599f2fc 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -50,13 +50,56 @@
50#define _PAGE_EXEC 0x200 /* hardware: EX permission */ 50#define _PAGE_EXEC 0x200 /* hardware: EX permission */
51#define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ 51#define _PAGE_ACCESSED 0x400 /* software: R: page referenced */
52 52
53/* No page size encoding in the linux PTE */
54#define _PAGE_PSIZE 0
55
56/* cache related flags non existing on 40x */
57#define _PAGE_COHERENT 0
58
59#define _PAGE_KERNEL_RO 0
60#define _PAGE_KERNEL_ROX _PAGE_EXEC
61#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
62#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
63
53#define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */ 64#define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */
65#define _PMD_PRESENT_MASK _PMD_PRESENT
54#define _PMD_BAD 0x802 66#define _PMD_BAD 0x802
55#define _PMD_SIZE_4M 0x0c0 67#define _PMD_SIZE_4M 0x0c0
56#define _PMD_SIZE_16M 0x0e0 68#define _PMD_SIZE_16M 0x0e0
69#define _PMD_USER 0
70
71#define _PTE_NONE_MASK 0
57 72
58/* Until my rework is finished, 40x still needs atomic PTE updates */ 73/* Until my rework is finished, 40x still needs atomic PTE updates */
59#define PTE_ATOMIC_UPDATES 1 74#define PTE_ATOMIC_UPDATES 1
60 75
76#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
77#define _PAGE_BASE (_PAGE_BASE_NC)
78
79/* Permission masks used to generate the __P and __S table */
80#define PAGE_NONE __pgprot(_PAGE_BASE)
81#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
82#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
83#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
84#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
85#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
86#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
87
88#ifndef __ASSEMBLY__
89static inline pte_t pte_wrprotect(pte_t pte)
90{
91 return __pte(pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE));
92}
93
94#define pte_wrprotect pte_wrprotect
95
96static inline pte_t pte_mkclean(pte_t pte)
97{
98 return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
99}
100
101#define pte_mkclean pte_mkclean
102#endif
103
61#endif /* __KERNEL__ */ 104#endif /* __KERNEL__ */
62#endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */ 105#endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h
index f812c0272364..78bc304f750e 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h
@@ -85,14 +85,44 @@
85#define _PAGE_NO_CACHE 0x00000400 /* H: I bit */ 85#define _PAGE_NO_CACHE 0x00000400 /* H: I bit */
86#define _PAGE_WRITETHRU 0x00000800 /* H: W bit */ 86#define _PAGE_WRITETHRU 0x00000800 /* H: W bit */
87 87
88/* No page size encoding in the linux PTE */
89#define _PAGE_PSIZE 0
90
91#define _PAGE_KERNEL_RO 0
92#define _PAGE_KERNEL_ROX _PAGE_EXEC
93#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW)
94#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
95
88/* TODO: Add large page lowmem mapping support */ 96/* TODO: Add large page lowmem mapping support */
89#define _PMD_PRESENT 0 97#define _PMD_PRESENT 0
90#define _PMD_PRESENT_MASK (PAGE_MASK) 98#define _PMD_PRESENT_MASK (PAGE_MASK)
91#define _PMD_BAD (~PAGE_MASK) 99#define _PMD_BAD (~PAGE_MASK)
100#define _PMD_USER 0
92 101
93/* ERPN in a PTE never gets cleared, ignore it */ 102/* ERPN in a PTE never gets cleared, ignore it */
94#define _PTE_NONE_MASK 0xffffffff00000000ULL 103#define _PTE_NONE_MASK 0xffffffff00000000ULL
95 104
105/*
106 * We define 2 sets of base prot bits, one for basic pages (ie,
107 * cacheable kernel and user pages) and one for non cacheable
108 * pages. We always set _PAGE_COHERENT when SMP is enabled or
109 * the processor might need it for DMA coherency.
110 */
111#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
112#if defined(CONFIG_SMP)
113#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
114#else
115#define _PAGE_BASE (_PAGE_BASE_NC)
116#endif
117
118/* Permission masks used to generate the __P and __S table */
119#define PAGE_NONE __pgprot(_PAGE_BASE)
120#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
121#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
122#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
123#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
124#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
125#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
96 126
97#endif /* __KERNEL__ */ 127#endif /* __KERNEL__ */
98#endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */ 128#endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index f04cb46ae8a1..6bfe041ef59d 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -29,10 +29,10 @@
29 */ 29 */
30 30
31/* Definitions for 8xx embedded chips. */ 31/* Definitions for 8xx embedded chips. */
32#define _PAGE_PRESENT 0x0001 /* Page is valid */ 32#define _PAGE_PRESENT 0x0001 /* V: Page is valid */
33#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ 33#define _PAGE_NO_CACHE 0x0002 /* CI: cache inhibit */
34#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */ 34#define _PAGE_SH 0x0004 /* SH: No ASID (context) compare */
35#define _PAGE_HUGE 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/ 35#define _PAGE_SPS 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
36#define _PAGE_DIRTY 0x0100 /* C: page changed */ 36#define _PAGE_DIRTY 0x0100 /* C: page changed */
37 37
38/* These 4 software bits must be masked out when the L2 entry is loaded 38/* These 4 software bits must be masked out when the L2 entry is loaded
@@ -46,18 +46,95 @@
46#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ 46#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
47#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ 47#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
48 48
49/* cache related flags non existing on 8xx */
50#define _PAGE_COHERENT 0
51#define _PAGE_WRITETHRU 0
52
53#define _PAGE_KERNEL_RO (_PAGE_SH | _PAGE_RO)
54#define _PAGE_KERNEL_ROX (_PAGE_SH | _PAGE_RO | _PAGE_EXEC)
55#define _PAGE_KERNEL_RW (_PAGE_SH | _PAGE_DIRTY)
56#define _PAGE_KERNEL_RWX (_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
57
49#define _PMD_PRESENT 0x0001 58#define _PMD_PRESENT 0x0001
59#define _PMD_PRESENT_MASK _PMD_PRESENT
50#define _PMD_BAD 0x0fd0 60#define _PMD_BAD 0x0fd0
51#define _PMD_PAGE_MASK 0x000c 61#define _PMD_PAGE_MASK 0x000c
52#define _PMD_PAGE_8M 0x000c 62#define _PMD_PAGE_8M 0x000c
53#define _PMD_PAGE_512K 0x0004 63#define _PMD_PAGE_512K 0x0004
54#define _PMD_USER 0x0020 /* APG 1 */ 64#define _PMD_USER 0x0020 /* APG 1 */
55 65
66#define _PTE_NONE_MASK 0
67
56/* Until my rework is finished, 8xx still needs atomic PTE updates */ 68/* Until my rework is finished, 8xx still needs atomic PTE updates */
57#define PTE_ATOMIC_UPDATES 1 69#define PTE_ATOMIC_UPDATES 1
58 70
59#ifdef CONFIG_PPC_16K_PAGES 71#ifdef CONFIG_PPC_16K_PAGES
60#define _PAGE_PSIZE _PAGE_HUGE 72#define _PAGE_PSIZE _PAGE_SPS
73#else
74#define _PAGE_PSIZE 0
75#endif
76
77#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
78#define _PAGE_BASE (_PAGE_BASE_NC)
79
80/* Permission masks used to generate the __P and __S table */
81#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA)
82#define PAGE_SHARED __pgprot(_PAGE_BASE)
83#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_EXEC)
84#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO)
85#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
86#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO)
87#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
88
89#ifndef __ASSEMBLY__
90static inline pte_t pte_wrprotect(pte_t pte)
91{
92 return __pte(pte_val(pte) | _PAGE_RO);
93}
94
95#define pte_wrprotect pte_wrprotect
96
97static inline int pte_write(pte_t pte)
98{
99 return !(pte_val(pte) & _PAGE_RO);
100}
101
102#define pte_write pte_write
103
104static inline pte_t pte_mkwrite(pte_t pte)
105{
106 return __pte(pte_val(pte) & ~_PAGE_RO);
107}
108
109#define pte_mkwrite pte_mkwrite
110
111static inline bool pte_user(pte_t pte)
112{
113 return !(pte_val(pte) & _PAGE_SH);
114}
115
116#define pte_user pte_user
117
118static inline pte_t pte_mkprivileged(pte_t pte)
119{
120 return __pte(pte_val(pte) | _PAGE_SH);
121}
122
123#define pte_mkprivileged pte_mkprivileged
124
125static inline pte_t pte_mkuser(pte_t pte)
126{
127 return __pte(pte_val(pte) & ~_PAGE_SH);
128}
129
130#define pte_mkuser pte_mkuser
131
132static inline pte_t pte_mkhuge(pte_t pte)
133{
134 return __pte(pte_val(pte) | _PAGE_SPS);
135}
136
137#define pte_mkhuge pte_mkhuge
61#endif 138#endif
62 139
63#endif /* __KERNEL__ */ 140#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
index d1ee24e9e137..0fc1bd42bb3e 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
@@ -31,11 +31,44 @@
31#define _PAGE_WRITETHRU 0x00400 /* H: W bit */ 31#define _PAGE_WRITETHRU 0x00400 /* H: W bit */
32#define _PAGE_SPECIAL 0x00800 /* S: Special page */ 32#define _PAGE_SPECIAL 0x00800 /* S: Special page */
33 33
34#define _PAGE_KERNEL_RO 0
35#define _PAGE_KERNEL_ROX _PAGE_EXEC
36#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW)
37#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
38
39/* No page size encoding in the linux PTE */
40#define _PAGE_PSIZE 0
41
34#define _PMD_PRESENT 0 42#define _PMD_PRESENT 0
35#define _PMD_PRESENT_MASK (PAGE_MASK) 43#define _PMD_PRESENT_MASK (PAGE_MASK)
36#define _PMD_BAD (~PAGE_MASK) 44#define _PMD_BAD (~PAGE_MASK)
45#define _PMD_USER 0
46
47#define _PTE_NONE_MASK 0
37 48
38#define PTE_WIMGE_SHIFT (6) 49#define PTE_WIMGE_SHIFT (6)
39 50
51/*
52 * We define 2 sets of base prot bits, one for basic pages (ie,
53 * cacheable kernel and user pages) and one for non cacheable
54 * pages. We always set _PAGE_COHERENT when SMP is enabled or
55 * the processor might need it for DMA coherency.
56 */
57#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
58#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
59#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
60#else
61#define _PAGE_BASE (_PAGE_BASE_NC)
62#endif
63
64/* Permission masks used to generate the __P and __S table */
65#define PAGE_NONE __pgprot(_PAGE_BASE)
66#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
67#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
68#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
69#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
70#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
71#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
72
40#endif /* __KERNEL__ */ 73#endif /* __KERNEL__ */
41#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */ 74#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 7cd6809f4d33..e77ed9761632 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -89,11 +89,47 @@
89 * Include the PTE bits definitions 89 * Include the PTE bits definitions
90 */ 90 */
91#include <asm/nohash/pte-book3e.h> 91#include <asm/nohash/pte-book3e.h>
92#include <asm/pte-common.h> 92
93#define _PAGE_SAO 0
94
95#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
96
97/*
98 * _PAGE_CHG_MASK masks of bits that are to be preserved across
99 * pgprot changes.
100 */
101#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
102
103#define H_PAGE_4K_PFN 0
93 104
94#ifndef __ASSEMBLY__ 105#ifndef __ASSEMBLY__
95/* pte_clear moved to later in this file */ 106/* pte_clear moved to later in this file */
96 107
108static inline pte_t pte_mkwrite(pte_t pte)
109{
110 return __pte(pte_val(pte) | _PAGE_RW);
111}
112
113static inline pte_t pte_mkdirty(pte_t pte)
114{
115 return __pte(pte_val(pte) | _PAGE_DIRTY);
116}
117
118static inline pte_t pte_mkyoung(pte_t pte)
119{
120 return __pte(pte_val(pte) | _PAGE_ACCESSED);
121}
122
123static inline pte_t pte_wrprotect(pte_t pte)
124{
125 return __pte(pte_val(pte) & ~_PAGE_RW);
126}
127
128static inline pte_t pte_mkexec(pte_t pte)
129{
130 return __pte(pte_val(pte) | _PAGE_EXEC);
131}
132
97#define PMD_BAD_BITS (PTE_TABLE_SIZE-1) 133#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
98#define PUD_BAD_BITS (PMD_TABLE_SIZE-1) 134#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
99 135
@@ -239,6 +275,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
239 pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); 275 pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
240} 276}
241 277
278#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
242static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, 279static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
243 unsigned long addr, pte_t *ptep) 280 unsigned long addr, pte_t *ptep)
244{ 281{
@@ -313,9 +350,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
313#define MAX_SWAPFILES_CHECK() do { \ 350#define MAX_SWAPFILES_CHECK() do { \
314 BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ 351 BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
315 } while (0) 352 } while (0)
316/* 353
317 * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
318 */
319#define SWP_TYPE_BITS 5 354#define SWP_TYPE_BITS 5
320#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ 355#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
321 & ((1UL << SWP_TYPE_BITS) - 1)) 356 & ((1UL << SWP_TYPE_BITS) - 1))
@@ -327,8 +362,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
327#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) 362#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
328#define __swp_entry_to_pte(x) __pte((x).val) 363#define __swp_entry_to_pte(x) __pte((x).val)
329 364
330extern int map_kernel_page(unsigned long ea, unsigned long pa, 365int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
331 unsigned long flags);
332extern int __meminit vmemmap_create_mapping(unsigned long start, 366extern int __meminit vmemmap_create_mapping(unsigned long start,
333 unsigned long page_size, 367 unsigned long page_size,
334 unsigned long phys); 368 unsigned long phys);
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index b321c82b3624..70ff23974b59 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -8,18 +8,50 @@
8#include <asm/nohash/32/pgtable.h> 8#include <asm/nohash/32/pgtable.h>
9#endif 9#endif
10 10
11/* Permission masks used for kernel mappings */
12#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
13#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
14#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
15 _PAGE_NO_CACHE | _PAGE_GUARDED)
16#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
17#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
18#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
19
20/*
21 * Protection used for kernel text. We want the debuggers to be able to
22 * set breakpoints anywhere, so don't write protect the kernel text
23 * on platforms where such control is possible.
24 */
25#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
26 defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
27#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
28#else
29#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
30#endif
31
32/* Make modules code happy. We don't set RO yet */
33#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
34
35/* Advertise special mapping type for AGP */
36#define PAGE_AGP (PAGE_KERNEL_NC)
37#define HAVE_PAGE_AGP
38
11#ifndef __ASSEMBLY__ 39#ifndef __ASSEMBLY__
12 40
13/* Generic accessors to PTE bits */ 41/* Generic accessors to PTE bits */
42#ifndef pte_write
14static inline int pte_write(pte_t pte) 43static inline int pte_write(pte_t pte)
15{ 44{
16 return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; 45 return pte_val(pte) & _PAGE_RW;
17} 46}
47#endif
18static inline int pte_read(pte_t pte) { return 1; } 48static inline int pte_read(pte_t pte) { return 1; }
19static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 49static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
20static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } 50static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
21static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } 51static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
22static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } 52static inline bool pte_hashpte(pte_t pte) { return false; }
53static inline bool pte_ci(pte_t pte) { return pte_val(pte) & _PAGE_NO_CACHE; }
54static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
23 55
24#ifdef CONFIG_NUMA_BALANCING 56#ifdef CONFIG_NUMA_BALANCING
25/* 57/*
@@ -29,8 +61,7 @@ static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PA
29 */ 61 */
30static inline int pte_protnone(pte_t pte) 62static inline int pte_protnone(pte_t pte)
31{ 63{
32 return (pte_val(pte) & 64 return pte_present(pte) && !pte_user(pte);
33 (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
34} 65}
35 66
36static inline int pmd_protnone(pmd_t pmd) 67static inline int pmd_protnone(pmd_t pmd)
@@ -44,6 +75,23 @@ static inline int pte_present(pte_t pte)
44 return pte_val(pte) & _PAGE_PRESENT; 75 return pte_val(pte) & _PAGE_PRESENT;
45} 76}
46 77
78static inline bool pte_hw_valid(pte_t pte)
79{
80 return pte_val(pte) & _PAGE_PRESENT;
81}
82
83/*
84 * Don't just check for any non zero bits in __PAGE_USER, since for book3e
85 * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
86 * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too.
87 */
88#ifndef pte_user
89static inline bool pte_user(pte_t pte)
90{
91 return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
92}
93#endif
94
47/* 95/*
48 * We only find page table entry in the last level 96 * We only find page table entry in the last level
49 * Hence no need for other accessors 97 * Hence no need for other accessors
@@ -77,53 +125,53 @@ static inline unsigned long pte_pfn(pte_t pte) {
77 return pte_val(pte) >> PTE_RPN_SHIFT; } 125 return pte_val(pte) >> PTE_RPN_SHIFT; }
78 126
79/* Generic modifiers for PTE bits */ 127/* Generic modifiers for PTE bits */
80static inline pte_t pte_wrprotect(pte_t pte) 128static inline pte_t pte_exprotect(pte_t pte)
81{ 129{
82 pte_basic_t ptev; 130 return __pte(pte_val(pte) & ~_PAGE_EXEC);
83
84 ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
85 ptev |= _PAGE_RO;
86 return __pte(ptev);
87} 131}
88 132
133#ifndef pte_mkclean
89static inline pte_t pte_mkclean(pte_t pte) 134static inline pte_t pte_mkclean(pte_t pte)
90{ 135{
91 return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE)); 136 return __pte(pte_val(pte) & ~_PAGE_DIRTY);
92} 137}
138#endif
93 139
94static inline pte_t pte_mkold(pte_t pte) 140static inline pte_t pte_mkold(pte_t pte)
95{ 141{
96 return __pte(pte_val(pte) & ~_PAGE_ACCESSED); 142 return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
97} 143}
98 144
99static inline pte_t pte_mkwrite(pte_t pte) 145static inline pte_t pte_mkpte(pte_t pte)
100{ 146{
101 pte_basic_t ptev; 147 return pte;
102
103 ptev = pte_val(pte) & ~_PAGE_RO;
104 ptev |= _PAGE_RW;
105 return __pte(ptev);
106} 148}
107 149
108static inline pte_t pte_mkdirty(pte_t pte) 150static inline pte_t pte_mkspecial(pte_t pte)
109{ 151{
110 return __pte(pte_val(pte) | _PAGE_DIRTY); 152 return __pte(pte_val(pte) | _PAGE_SPECIAL);
111} 153}
112 154
113static inline pte_t pte_mkyoung(pte_t pte) 155#ifndef pte_mkhuge
156static inline pte_t pte_mkhuge(pte_t pte)
114{ 157{
115 return __pte(pte_val(pte) | _PAGE_ACCESSED); 158 return __pte(pte_val(pte));
116} 159}
160#endif
117 161
118static inline pte_t pte_mkspecial(pte_t pte) 162#ifndef pte_mkprivileged
163static inline pte_t pte_mkprivileged(pte_t pte)
119{ 164{
120 return __pte(pte_val(pte) | _PAGE_SPECIAL); 165 return __pte(pte_val(pte) & ~_PAGE_USER);
121} 166}
167#endif
122 168
123static inline pte_t pte_mkhuge(pte_t pte) 169#ifndef pte_mkuser
170static inline pte_t pte_mkuser(pte_t pte)
124{ 171{
125 return __pte(pte_val(pte) | _PAGE_HUGE); 172 return __pte(pte_val(pte) | _PAGE_USER);
126} 173}
174#endif
127 175
128static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 176static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
129{ 177{
@@ -197,6 +245,8 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre
197#if _PAGE_WRITETHRU != 0 245#if _PAGE_WRITETHRU != 0
198#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ 246#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
199 _PAGE_COHERENT | _PAGE_WRITETHRU)) 247 _PAGE_COHERENT | _PAGE_WRITETHRU))
248#else
249#define pgprot_cached_wthru(prot) pgprot_noncached(prot)
200#endif 250#endif
201 251
202#define pgprot_cached_noncoherent(prot) \ 252#define pgprot_cached_noncoherent(prot) \
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index 12730b81cd98..dd40d200f274 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -77,7 +77,48 @@
77#define _PMD_PRESENT 0 77#define _PMD_PRESENT 0
78#define _PMD_PRESENT_MASK (PAGE_MASK) 78#define _PMD_PRESENT_MASK (PAGE_MASK)
79#define _PMD_BAD (~PAGE_MASK) 79#define _PMD_BAD (~PAGE_MASK)
80#define _PMD_USER 0
81#else
82#define _PTE_NONE_MASK 0
83#endif
84
85/*
86 * We define 2 sets of base prot bits, one for basic pages (ie,
87 * cacheable kernel and user pages) and one for non cacheable
88 * pages. We always set _PAGE_COHERENT when SMP is enabled or
89 * the processor might need it for DMA coherency.
90 */
91#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
92#if defined(CONFIG_SMP)
93#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
94#else
95#define _PAGE_BASE (_PAGE_BASE_NC)
80#endif 96#endif
81 97
98/* Permission masks used to generate the __P and __S table */
99#define PAGE_NONE __pgprot(_PAGE_BASE)
100#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
101#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
102#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
103#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
104#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
105#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
106
107#ifndef __ASSEMBLY__
108static inline pte_t pte_mkprivileged(pte_t pte)
109{
110 return __pte((pte_val(pte) & ~_PAGE_USER) | _PAGE_PRIVILEGED);
111}
112
113#define pte_mkprivileged pte_mkprivileged
114
115static inline pte_t pte_mkuser(pte_t pte)
116{
117 return __pte((pte_val(pte) & ~_PAGE_PRIVILEGED) | _PAGE_USER);
118}
119
120#define pte_mkuser pte_mkuser
121#endif /* __ASSEMBLY__ */
122
82#endif /* __KERNEL__ */ 123#endif /* __KERNEL__ */
83#endif /* _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */ 124#endif /* _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 8365353330b4..870fb7b239ea 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1050,6 +1050,7 @@ enum OpalSysCooling {
1050enum { 1050enum {
1051 OPAL_REBOOT_NORMAL = 0, 1051 OPAL_REBOOT_NORMAL = 0,
1052 OPAL_REBOOT_PLATFORM_ERROR = 1, 1052 OPAL_REBOOT_PLATFORM_ERROR = 1,
1053 OPAL_REBOOT_FULL_IPL = 2,
1053}; 1054};
1054 1055
1055/* Argument to OPAL_PCI_TCE_KILL */ 1056/* Argument to OPAL_PCI_TCE_KILL */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ad4f16164619..e843bc5d1a0f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -113,7 +113,13 @@ struct paca_struct {
113 * on the linear mapping */ 113 * on the linear mapping */
114 /* SLB related definitions */ 114 /* SLB related definitions */
115 u16 vmalloc_sllp; 115 u16 vmalloc_sllp;
116 u16 slb_cache_ptr; 116 u8 slb_cache_ptr;
117 u8 stab_rr; /* stab/slb round-robin counter */
118#ifdef CONFIG_DEBUG_VM
119 u8 in_kernel_slb_handler;
120#endif
121 u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */
122 u32 slb_kern_bitmap;
117 u32 slb_cache[SLB_CACHE_ENTRIES]; 123 u32 slb_cache[SLB_CACHE_ENTRIES];
118#endif /* CONFIG_PPC_BOOK3S_64 */ 124#endif /* CONFIG_PPC_BOOK3S_64 */
119 125
@@ -160,7 +166,6 @@ struct paca_struct {
160 */ 166 */
161 struct task_struct *__current; /* Pointer to current */ 167 struct task_struct *__current; /* Pointer to current */
162 u64 kstack; /* Saved Kernel stack addr */ 168 u64 kstack; /* Saved Kernel stack addr */
163 u64 stab_rr; /* stab/slb round-robin counter */
164 u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ 169 u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */
165 u64 saved_msr; /* MSR saved here by enter_rtas */ 170 u64 saved_msr; /* MSR saved here by enter_rtas */
166 u16 trap_save; /* Used when bad stack is encountered */ 171 u16 trap_save; /* Used when bad stack is encountered */
@@ -250,6 +255,15 @@ struct paca_struct {
250#ifdef CONFIG_PPC_PSERIES 255#ifdef CONFIG_PPC_PSERIES
251 u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ 256 u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */
252#endif /* CONFIG_PPC_PSERIES */ 257#endif /* CONFIG_PPC_PSERIES */
258
259#ifdef CONFIG_PPC_BOOK3S_64
260 /* Capture SLB related old contents in MCE handler. */
261 struct slb_entry *mce_faulty_slbs;
262 u16 slb_save_cache_ptr;
263#endif /* CONFIG_PPC_BOOK3S_64 */
264#ifdef CONFIG_STACKPROTECTOR
265 unsigned long canary;
266#endif
253} ____cacheline_aligned; 267} ____cacheline_aligned;
254 268
255extern void copy_mm_to_paca(struct mm_struct *mm); 269extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 14c79a7dc855..9679b7519a35 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -20,6 +20,25 @@ struct mm_struct;
20#include <asm/nohash/pgtable.h> 20#include <asm/nohash/pgtable.h>
21#endif /* !CONFIG_PPC_BOOK3S */ 21#endif /* !CONFIG_PPC_BOOK3S */
22 22
23/* Note due to the way vm flags are laid out, the bits are XWR */
24#define __P000 PAGE_NONE
25#define __P001 PAGE_READONLY
26#define __P010 PAGE_COPY
27#define __P011 PAGE_COPY
28#define __P100 PAGE_READONLY_X
29#define __P101 PAGE_READONLY_X
30#define __P110 PAGE_COPY_X
31#define __P111 PAGE_COPY_X
32
33#define __S000 PAGE_NONE
34#define __S001 PAGE_READONLY
35#define __S010 PAGE_SHARED
36#define __S011 PAGE_SHARED
37#define __S100 PAGE_READONLY_X
38#define __S101 PAGE_READONLY_X
39#define __S110 PAGE_SHARED_X
40#define __S111 PAGE_SHARED_X
41
23#ifndef __ASSEMBLY__ 42#ifndef __ASSEMBLY__
24 43
25#include <asm/tlbflush.h> 44#include <asm/tlbflush.h>
@@ -27,6 +46,16 @@ struct mm_struct;
27/* Keep these as a macros to avoid include dependency mess */ 46/* Keep these as a macros to avoid include dependency mess */
28#define pte_page(x) pfn_to_page(pte_pfn(x)) 47#define pte_page(x) pfn_to_page(pte_pfn(x))
29#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) 48#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
49/*
50 * Select all bits except the pfn
51 */
52static inline pgprot_t pte_pgprot(pte_t pte)
53{
54 unsigned long pte_flags;
55
56 pte_flags = pte_val(pte) & ~PTE_RPN_MASK;
57 return __pgprot(pte_flags);
58}
30 59
31/* 60/*
32 * ZERO_PAGE is a global shared page that is always zero: used 61 * ZERO_PAGE is a global shared page that is always zero: used
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 7f627e3f4da4..630eb8b1b7ed 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -54,7 +54,6 @@ void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
54 54
55struct pnv_php_slot { 55struct pnv_php_slot {
56 struct hotplug_slot slot; 56 struct hotplug_slot slot;
57 struct hotplug_slot_info slot_info;
58 uint64_t id; 57 uint64_t id;
59 char *name; 58 char *name;
60 int slot_no; 59 int slot_no;
@@ -72,6 +71,7 @@ struct pnv_php_slot {
72 struct pci_dev *pdev; 71 struct pci_dev *pdev;
73 struct pci_bus *bus; 72 struct pci_bus *bus;
74 bool power_state_check; 73 bool power_state_check;
74 u8 attention_state;
75 void *fdt; 75 void *fdt;
76 void *dt; 76 void *dt;
77 struct of_changeset ocs; 77 struct of_changeset ocs;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 665af14850e4..6093bc8f74e5 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -104,6 +104,7 @@
104#define OP_31_XOP_LHZUX 311 104#define OP_31_XOP_LHZUX 311
105#define OP_31_XOP_MSGSNDP 142 105#define OP_31_XOP_MSGSNDP 142
106#define OP_31_XOP_MSGCLRP 174 106#define OP_31_XOP_MSGCLRP 174
107#define OP_31_XOP_TLBIE 306
107#define OP_31_XOP_MFSPR 339 108#define OP_31_XOP_MFSPR 339
108#define OP_31_XOP_LWAX 341 109#define OP_31_XOP_LWAX 341
109#define OP_31_XOP_LHAX 343 110#define OP_31_XOP_LHAX 343
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 726288048652..f67da277d652 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -58,6 +58,7 @@ void eeh_save_bars(struct eeh_dev *edev);
58int rtas_write_config(struct pci_dn *, int where, int size, u32 val); 58int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
59int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); 59int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
60void eeh_pe_state_mark(struct eeh_pe *pe, int state); 60void eeh_pe_state_mark(struct eeh_pe *pe, int state);
61void eeh_pe_mark_isolated(struct eeh_pe *pe);
61void eeh_pe_state_clear(struct eeh_pe *pe, int state); 62void eeh_pe_state_clear(struct eeh_pe *pe, int state);
62void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state); 63void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state);
63void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); 64void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 52fadded5c1e..ee58526cb6c2 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -32,9 +32,9 @@
32/* Default SMT priority is set to 3. Use 11- 13bits to save priority. */ 32/* Default SMT priority is set to 3. Use 11- 13bits to save priority. */
33#define PPR_PRIORITY 3 33#define PPR_PRIORITY 3
34#ifdef __ASSEMBLY__ 34#ifdef __ASSEMBLY__
35#define INIT_PPR (PPR_PRIORITY << 50) 35#define DEFAULT_PPR (PPR_PRIORITY << 50)
36#else 36#else
37#define INIT_PPR ((u64)PPR_PRIORITY << 50) 37#define DEFAULT_PPR ((u64)PPR_PRIORITY << 50)
38#endif /* __ASSEMBLY__ */ 38#endif /* __ASSEMBLY__ */
39#endif /* CONFIG_PPC64 */ 39#endif /* CONFIG_PPC64 */
40 40
@@ -67,12 +67,6 @@ extern int _chrp_type;
67 67
68#endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */ 68#endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */
69 69
70/*
71 * Default implementation of macro that returns current
72 * instruction pointer ("program counter").
73 */
74#define current_text_addr() ({ __label__ _l; _l: &&_l;})
75
76/* Macros for adjusting thread priority (hardware multi-threading) */ 70/* Macros for adjusting thread priority (hardware multi-threading) */
77#define HMT_very_low() asm volatile("or 31,31,31 # very low priority") 71#define HMT_very_low() asm volatile("or 31,31,31 # very low priority")
78#define HMT_low() asm volatile("or 1,1,1 # low priority") 72#define HMT_low() asm volatile("or 1,1,1 # low priority")
@@ -273,6 +267,7 @@ struct thread_struct {
273#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 267#endif /* CONFIG_HAVE_HW_BREAKPOINT */
274 struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ 268 struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
275 unsigned long trap_nr; /* last trap # on this thread */ 269 unsigned long trap_nr; /* last trap # on this thread */
270 u8 load_slb; /* Ages out SLB preload cache entries */
276 u8 load_fp; 271 u8 load_fp;
277#ifdef CONFIG_ALTIVEC 272#ifdef CONFIG_ALTIVEC
278 u8 load_vec; 273 u8 load_vec;
@@ -341,7 +336,6 @@ struct thread_struct {
341 * onwards. 336 * onwards.
342 */ 337 */
343 int dscr_inherit; 338 int dscr_inherit;
344 unsigned long ppr; /* used to save/restore SMT priority */
345 unsigned long tidr; 339 unsigned long tidr;
346#endif 340#endif
347#ifdef CONFIG_PPC_BOOK3S_64 341#ifdef CONFIG_PPC_BOOK3S_64
@@ -389,7 +383,6 @@ struct thread_struct {
389 .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ 383 .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
390 .addr_limit = KERNEL_DS, \ 384 .addr_limit = KERNEL_DS, \
391 .fpexc_mode = 0, \ 385 .fpexc_mode = 0, \
392 .ppr = INIT_PPR, \
393 .fscr = FSCR_TAR | FSCR_EBB \ 386 .fscr = FSCR_TAR | FSCR_EBB \
394} 387}
395#endif 388#endif
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
deleted file mode 100644
index bef56141a549..000000000000
--- a/arch/powerpc/include/asm/pte-common.h
+++ /dev/null
@@ -1,219 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Included from asm/pgtable-*.h only ! */
3
4/*
5 * Some bits are only used on some cpu families... Make sure that all
6 * the undefined gets a sensible default
7 */
8#ifndef _PAGE_HASHPTE
9#define _PAGE_HASHPTE 0
10#endif
11#ifndef _PAGE_HWWRITE
12#define _PAGE_HWWRITE 0
13#endif
14#ifndef _PAGE_EXEC
15#define _PAGE_EXEC 0
16#endif
17#ifndef _PAGE_ENDIAN
18#define _PAGE_ENDIAN 0
19#endif
20#ifndef _PAGE_COHERENT
21#define _PAGE_COHERENT 0
22#endif
23#ifndef _PAGE_WRITETHRU
24#define _PAGE_WRITETHRU 0
25#endif
26#ifndef _PAGE_4K_PFN
27#define _PAGE_4K_PFN 0
28#endif
29#ifndef _PAGE_SAO
30#define _PAGE_SAO 0
31#endif
32#ifndef _PAGE_PSIZE
33#define _PAGE_PSIZE 0
34#endif
35/* _PAGE_RO and _PAGE_RW shall not be defined at the same time */
36#ifndef _PAGE_RO
37#define _PAGE_RO 0
38#else
39#define _PAGE_RW 0
40#endif
41
42#ifndef _PAGE_PTE
43#define _PAGE_PTE 0
44#endif
45/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */
46#ifndef _PAGE_PRIVILEGED
47#define _PAGE_PRIVILEGED 0
48#else
49#ifndef _PAGE_USER
50#define _PAGE_USER 0
51#endif
52#endif
53#ifndef _PAGE_NA
54#define _PAGE_NA 0
55#endif
56#ifndef _PAGE_HUGE
57#define _PAGE_HUGE 0
58#endif
59
60#ifndef _PMD_PRESENT_MASK
61#define _PMD_PRESENT_MASK _PMD_PRESENT
62#endif
63#ifndef _PMD_USER
64#define _PMD_USER 0
65#endif
66#ifndef _PAGE_KERNEL_RO
67#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO)
68#endif
69#ifndef _PAGE_KERNEL_ROX
70#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
71#endif
72#ifndef _PAGE_KERNEL_RW
73#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
74 _PAGE_HWWRITE)
75#endif
76#ifndef _PAGE_KERNEL_RWX
77#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
78 _PAGE_HWWRITE | _PAGE_EXEC)
79#endif
80#ifndef _PAGE_HPTEFLAGS
81#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
82#endif
83#ifndef _PTE_NONE_MASK
84#define _PTE_NONE_MASK _PAGE_HPTEFLAGS
85#endif
86
87#ifndef __ASSEMBLY__
88
89/*
90 * Don't just check for any non zero bits in __PAGE_USER, since for book3e
91 * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
92 * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too.
93 */
94static inline bool pte_user(pte_t pte)
95{
96 return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
97}
98#endif /* __ASSEMBLY__ */
99
100/* Location of the PFN in the PTE. Most 32-bit platforms use the same
101 * as _PAGE_SHIFT here (ie, naturally aligned).
102 * Platform who don't just pre-define the value so we don't override it here
103 */
104#ifndef PTE_RPN_SHIFT
105#define PTE_RPN_SHIFT (PAGE_SHIFT)
106#endif
107
108/* The mask covered by the RPN must be a ULL on 32-bit platforms with
109 * 64-bit PTEs
110 */
111#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
112#define PTE_RPN_MASK (~((1ULL<<PTE_RPN_SHIFT)-1))
113#else
114#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
115#endif
116
117/* _PAGE_CHG_MASK masks of bits that are to be preserved across
118 * pgprot changes
119 */
120#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
121 _PAGE_ACCESSED | _PAGE_SPECIAL)
122
123/* Mask of bits returned by pte_pgprot() */
124#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
125 _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
126 _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
127 _PAGE_PRIVILEGED | \
128 _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
129
130/*
131 * We define 2 sets of base prot bits, one for basic pages (ie,
132 * cacheable kernel and user pages) and one for non cacheable
133 * pages. We always set _PAGE_COHERENT when SMP is enabled or
134 * the processor might need it for DMA coherency.
135 */
136#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
137#if defined(CONFIG_SMP) || defined(CONFIG_PPC_STD_MMU) || \
138 defined(CONFIG_PPC_E500MC)
139#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
140#else
141#define _PAGE_BASE (_PAGE_BASE_NC)
142#endif
143
144/* Permission masks used to generate the __P and __S table,
145 *
146 * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
147 *
148 * Write permissions imply read permissions for now (we could make write-only
149 * pages on BookE but we don't bother for now). Execute permission control is
150 * possible on platforms that define _PAGE_EXEC
151 *
152 * Note due to the way vm flags are laid out, the bits are XWR
153 */
154#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA)
155#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
156#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
157 _PAGE_EXEC)
158#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
159#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
160 _PAGE_EXEC)
161#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
162#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
163 _PAGE_EXEC)
164
165#define __P000 PAGE_NONE
166#define __P001 PAGE_READONLY
167#define __P010 PAGE_COPY
168#define __P011 PAGE_COPY
169#define __P100 PAGE_READONLY_X
170#define __P101 PAGE_READONLY_X
171#define __P110 PAGE_COPY_X
172#define __P111 PAGE_COPY_X
173
174#define __S000 PAGE_NONE
175#define __S001 PAGE_READONLY
176#define __S010 PAGE_SHARED
177#define __S011 PAGE_SHARED
178#define __S100 PAGE_READONLY_X
179#define __S101 PAGE_READONLY_X
180#define __S110 PAGE_SHARED_X
181#define __S111 PAGE_SHARED_X
182
183/* Permission masks used for kernel mappings */
184#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
185#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
186 _PAGE_NO_CACHE)
187#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
188 _PAGE_NO_CACHE | _PAGE_GUARDED)
189#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
190#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
191#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
192
193/* Protection used for kernel text. We want the debuggers to be able to
194 * set breakpoints anywhere, so don't write protect the kernel text
195 * on platforms where such control is possible.
196 */
197#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
198 defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
199#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
200#else
201#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
202#endif
203
204/* Make modules code happy. We don't set RO yet */
205#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
206
207/* Advertise special mapping type for AGP */
208#define PAGE_AGP (PAGE_KERNEL_NC)
209#define HAVE_PAGE_AGP
210
211#ifndef _PAGE_READ
212/* if not defined, we should not find _PAGE_WRITE too */
213#define _PAGE_READ 0
214#define _PAGE_WRITE _PAGE_RW
215#endif
216
217#ifndef H_PAGE_4K_PFN
218#define H_PAGE_4K_PFN 0
219#endif
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 447cbd1bee99..f73886a1a7f5 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -26,6 +26,37 @@
26#include <uapi/asm/ptrace.h> 26#include <uapi/asm/ptrace.h>
27#include <asm/asm-const.h> 27#include <asm/asm-const.h>
28 28
29#ifndef __ASSEMBLY__
30struct pt_regs
31{
32 union {
33 struct user_pt_regs user_regs;
34 struct {
35 unsigned long gpr[32];
36 unsigned long nip;
37 unsigned long msr;
38 unsigned long orig_gpr3;
39 unsigned long ctr;
40 unsigned long link;
41 unsigned long xer;
42 unsigned long ccr;
43#ifdef CONFIG_PPC64
44 unsigned long softe;
45#else
46 unsigned long mq;
47#endif
48 unsigned long trap;
49 unsigned long dar;
50 unsigned long dsisr;
51 unsigned long result;
52 };
53 };
54
55#ifdef CONFIG_PPC64
56 unsigned long ppr;
57#endif
58};
59#endif
29 60
30#ifdef __powerpc64__ 61#ifdef __powerpc64__
31 62
@@ -102,6 +133,11 @@ static inline long regs_return_value(struct pt_regs *regs)
102 return -regs->gpr[3]; 133 return -regs->gpr[3];
103} 134}
104 135
136static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
137{
138 regs->gpr[3] = rc;
139}
140
105#ifdef __powerpc64__ 141#ifdef __powerpc64__
106#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) 142#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
107#else 143#else
@@ -149,7 +185,7 @@ do { \
149 185
150#define arch_has_single_step() (1) 186#define arch_has_single_step() (1)
151#define arch_has_block_step() (!cpu_has_feature(CPU_FTR_601)) 187#define arch_has_block_step() (!cpu_has_feature(CPU_FTR_601))
152#define ARCH_HAS_USER_SINGLE_STEP_INFO 188#define ARCH_HAS_USER_SINGLE_STEP_REPORT
153 189
154/* 190/*
155 * kprobe-based event tracer support 191 * kprobe-based event tracer support
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e5b314ed054e..de52c3166ba4 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -118,11 +118,16 @@
118#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */ 118#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */
119#define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */ 119#define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */
120#define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */ 120#define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */
121#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
122#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */ 121#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */
123#define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) 122#define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T)
124#define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) 123#define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S)
125 124
125#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
126#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
127#else
128#define MSR_TM_ACTIVE(x) 0
129#endif
130
126#if defined(CONFIG_PPC_BOOK3S_64) 131#if defined(CONFIG_PPC_BOOK3S_64)
127#define MSR_64BIT MSR_SF 132#define MSR_64BIT MSR_SF
128 133
@@ -415,6 +420,7 @@
415#define HFSCR_DSCR __MASK(FSCR_DSCR_LG) 420#define HFSCR_DSCR __MASK(FSCR_DSCR_LG)
416#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG) 421#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
417#define HFSCR_FP __MASK(FSCR_FP_LG) 422#define HFSCR_FP __MASK(FSCR_FP_LG)
423#define HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */
418#define SPRN_TAR 0x32f /* Target Address Register */ 424#define SPRN_TAR 0x32f /* Target Address Register */
419#define SPRN_LPCR 0x13E /* LPAR Control Register */ 425#define SPRN_LPCR 0x13E /* LPAR Control Register */
420#define LPCR_VPM0 ASM_CONST(0x8000000000000000) 426#define LPCR_VPM0 ASM_CONST(0x8000000000000000)
@@ -766,6 +772,7 @@
766#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ 772#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
767#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ 773#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
768#define HSRR1_DENORM 0x00100000 /* Denorm exception */ 774#define HSRR1_DENORM 0x00100000 /* Denorm exception */
775#define HSRR1_HISI_WRITE 0x00010000 /* HISI bcs couldn't update mem */
769 776
770#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */ 777#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */
771#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */ 778#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 71e393c46a49..bb38dd67d47d 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -125,6 +125,7 @@ struct rtas_suspend_me_data {
125#define RTAS_TYPE_INFO 0xE2 125#define RTAS_TYPE_INFO 0xE2
126#define RTAS_TYPE_DEALLOC 0xE3 126#define RTAS_TYPE_DEALLOC 0xE3
127#define RTAS_TYPE_DUMP 0xE4 127#define RTAS_TYPE_DUMP 0xE4
128#define RTAS_TYPE_HOTPLUG 0xE5
128/* I don't add PowerMGM events right now, this is a different topic */ 129/* I don't add PowerMGM events right now, this is a different topic */
129#define RTAS_TYPE_PMGM_POWER_SW_ON 0x60 130#define RTAS_TYPE_PMGM_POWER_SW_ON 0x60
130#define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61 131#define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61
@@ -185,11 +186,23 @@ static inline uint8_t rtas_error_disposition(const struct rtas_error_log *elog)
185 return (elog->byte1 & 0x18) >> 3; 186 return (elog->byte1 & 0x18) >> 3;
186} 187}
187 188
189static inline
190void rtas_set_disposition_recovered(struct rtas_error_log *elog)
191{
192 elog->byte1 &= ~0x18;
193 elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3);
194}
195
188static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog) 196static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
189{ 197{
190 return (elog->byte1 & 0x04) >> 2; 198 return (elog->byte1 & 0x04) >> 2;
191} 199}
192 200
201static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
202{
203 return (elog->byte2 & 0xf0) >> 4;
204}
205
193#define rtas_error_type(x) ((x)->byte3) 206#define rtas_error_type(x) ((x)->byte3)
194 207
195static inline 208static inline
@@ -275,6 +288,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log)
275#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H') 288#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
276#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D') 289#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D')
277#define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P') 290#define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P')
291#define PSERIES_ELOG_SECT_ID_MCE (('M' << 8) | 'C')
278 292
279/* Vendor specific Platform Event Log Format, Version 6, section header */ 293/* Vendor specific Platform Event Log Format, Version 6, section header */
280struct pseries_errorlog { 294struct pseries_errorlog {
@@ -316,6 +330,7 @@ struct pseries_hp_errorlog {
316#define PSERIES_HP_ELOG_RESOURCE_MEM 2 330#define PSERIES_HP_ELOG_RESOURCE_MEM 2
317#define PSERIES_HP_ELOG_RESOURCE_SLOT 3 331#define PSERIES_HP_ELOG_RESOURCE_SLOT 3
318#define PSERIES_HP_ELOG_RESOURCE_PHB 4 332#define PSERIES_HP_ELOG_RESOURCE_PHB 4
333#define PSERIES_HP_ELOG_RESOURCE_PMEM 6
319 334
320#define PSERIES_HP_ELOG_ACTION_ADD 1 335#define PSERIES_HP_ELOG_ACTION_ADD 1
321#define PSERIES_HP_ELOG_ACTION_REMOVE 2 336#define PSERIES_HP_ELOG_ACTION_REMOVE 2
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
index e40406cf5628..a595461c9cb0 100644
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -32,6 +32,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
32 unsigned long len, unsigned int psize); 32 unsigned long len, unsigned int psize);
33 33
34void slice_init_new_context_exec(struct mm_struct *mm); 34void slice_init_new_context_exec(struct mm_struct *mm);
35void slice_setup_new_exec(void);
35 36
36#endif /* __ASSEMBLY__ */ 37#endif /* __ASSEMBLY__ */
37 38
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 95b66a0c639b..41695745032c 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -100,6 +100,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
100DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); 100DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
101DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); 101DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
102DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); 102DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
103DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
103 104
104static inline struct cpumask *cpu_sibling_mask(int cpu) 105static inline struct cpumask *cpu_sibling_mask(int cpu)
105{ 106{
@@ -116,6 +117,11 @@ static inline struct cpumask *cpu_l2_cache_mask(int cpu)
116 return per_cpu(cpu_l2_cache_map, cpu); 117 return per_cpu(cpu_l2_cache_map, cpu);
117} 118}
118 119
120static inline struct cpumask *cpu_smallcore_mask(int cpu)
121{
122 return per_cpu(cpu_smallcore_map, cpu);
123}
124
119extern int cpu_to_core_id(int cpu); 125extern int cpu_to_core_id(int cpu);
120 126
121/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. 127/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
@@ -166,6 +172,11 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu)
166 return cpumask_of(cpu); 172 return cpumask_of(cpu);
167} 173}
168 174
175static inline const struct cpumask *cpu_smallcore_mask(int cpu)
176{
177 return cpumask_of(cpu);
178}
179
169#endif /* CONFIG_SMP */ 180#endif /* CONFIG_SMP */
170 181
171#ifdef CONFIG_PPC64 182#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 28f5dae25db6..68da49320592 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -9,17 +9,6 @@
9 * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space 9 * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
10 */ 10 */
11#define SECTION_SIZE_BITS 24 11#define SECTION_SIZE_BITS 24
12/*
13 * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
14 * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
15 * page_to_nid does a page->section->node lookup
16 * Hence only increase for VMEMMAP.
17 */
18#ifdef CONFIG_SPARSEMEM_VMEMMAP
19#define MAX_PHYSMEM_BITS 47
20#else
21#define MAX_PHYSMEM_BITS 46
22#endif
23 12
24#endif /* CONFIG_SPARSEMEM */ 13#endif /* CONFIG_SPARSEMEM */
25 14
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
new file mode 100644
index 000000000000..1c8460e23583
--- /dev/null
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * GCC stack protector support.
4 *
5 */
6
7#ifndef _ASM_STACKPROTECTOR_H
8#define _ASM_STACKPROTECTOR_H
9
10#include <linux/random.h>
11#include <linux/version.h>
12#include <asm/reg.h>
13#include <asm/current.h>
14#include <asm/paca.h>
15
16/*
17 * Initialize the stackprotector canary value.
18 *
19 * NOTE: this must only be called from functions that never return,
20 * and it must always be inlined.
21 */
22static __always_inline void boot_init_stack_canary(void)
23{
24 unsigned long canary;
25
26 /* Try to get a semi random initial value. */
27 canary = get_random_canary();
28 canary ^= mftb();
29 canary ^= LINUX_VERSION_CODE;
30 canary &= CANARY_MASK;
31
32 current->stack_canary = canary;
33#ifdef CONFIG_PPC64
34 get_paca()->canary = canary;
35#endif
36}
37
38#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 3c0002044bc9..544cac0474cb 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -29,6 +29,7 @@
29#include <asm/page.h> 29#include <asm/page.h>
30#include <asm/accounting.h> 30#include <asm/accounting.h>
31 31
32#define SLB_PRELOAD_NR 16U
32/* 33/*
33 * low level task data. 34 * low level task data.
34 */ 35 */
@@ -44,6 +45,10 @@ struct thread_info {
44#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) 45#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
45 struct cpu_accounting_data accounting; 46 struct cpu_accounting_data accounting;
46#endif 47#endif
48 unsigned char slb_preload_nr;
49 unsigned char slb_preload_tail;
50 u32 slb_preload_esid[SLB_PRELOAD_NR];
51
47 /* low level flags - has atomic operations done on it */ 52 /* low level flags - has atomic operations done on it */
48 unsigned long flags ____cacheline_aligned_in_smp; 53 unsigned long flags ____cacheline_aligned_in_smp;
49}; 54};
@@ -72,6 +77,12 @@ static inline struct thread_info *current_thread_info(void)
72} 77}
73 78
74extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); 79extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
80
81#ifdef CONFIG_PPC_BOOK3S_64
82void arch_setup_new_exec(void);
83#define arch_setup_new_exec arch_setup_new_exec
84#endif
85
75#endif /* __ASSEMBLY__ */ 86#endif /* __ASSEMBLY__ */
76 87
77/* 88/*
@@ -81,7 +92,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
81#define TIF_SIGPENDING 1 /* signal pending */ 92#define TIF_SIGPENDING 1 /* signal pending */
82#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ 93#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
83#define TIF_FSCHECK 3 /* Check FS is USER_DS on return */ 94#define TIF_FSCHECK 3 /* Check FS is USER_DS on return */
84#define TIF_32BIT 4 /* 32 bit binary */ 95#define TIF_SYSCALL_EMU 4 /* syscall emulation active */
85#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ 96#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
86#define TIF_PATCH_PENDING 6 /* pending live patching update */ 97#define TIF_PATCH_PENDING 6 /* pending live patching update */
87#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 98#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
@@ -100,6 +111,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
100#define TIF_ELF2ABI 18 /* function descriptors must die! */ 111#define TIF_ELF2ABI 18 /* function descriptors must die! */
101#endif 112#endif
102#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 113#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
114#define TIF_32BIT 20 /* 32 bit binary */
103 115
104/* as above, but as bit values */ 116/* as above, but as bit values */
105#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 117#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -120,9 +132,10 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
120#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) 132#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
121#define _TIF_NOHZ (1<<TIF_NOHZ) 133#define _TIF_NOHZ (1<<TIF_NOHZ)
122#define _TIF_FSCHECK (1<<TIF_FSCHECK) 134#define _TIF_FSCHECK (1<<TIF_FSCHECK)
135#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
123#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ 136#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
124 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ 137 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
125 _TIF_NOHZ) 138 _TIF_NOHZ | _TIF_SYSCALL_EMU)
126 139
127#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ 140#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
128 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 141 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index d018e8602694..58ef8c43a89d 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -201,6 +201,21 @@ TRACE_EVENT(tlbie,
201 __entry->r) 201 __entry->r)
202); 202);
203 203
204TRACE_EVENT(tlbia,
205
206 TP_PROTO(unsigned long id),
207 TP_ARGS(id),
208 TP_STRUCT__entry(
209 __field(unsigned long, id)
210 ),
211
212 TP_fast_assign(
213 __entry->id = id;
214 ),
215
216 TP_printk("ctx.id=0x%lx", __entry->id)
217);
218
204#endif /* _TRACE_POWERPC_H */ 219#endif /* _TRACE_POWERPC_H */
205 220
206#undef TRACE_INCLUDE_PATH 221#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index bac225bb7f64..15bea9a0f260 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -260,7 +260,7 @@ do { \
260({ \ 260({ \
261 long __gu_err; \ 261 long __gu_err; \
262 __long_type(*(ptr)) __gu_val; \ 262 __long_type(*(ptr)) __gu_val; \
263 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 263 __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
264 __chk_user_ptr(ptr); \ 264 __chk_user_ptr(ptr); \
265 if (!is_kernel_addr((unsigned long)__gu_addr)) \ 265 if (!is_kernel_addr((unsigned long)__gu_addr)) \
266 might_fault(); \ 266 might_fault(); \
@@ -274,7 +274,7 @@ do { \
274({ \ 274({ \
275 long __gu_err = -EFAULT; \ 275 long __gu_err = -EFAULT; \
276 __long_type(*(ptr)) __gu_val = 0; \ 276 __long_type(*(ptr)) __gu_val = 0; \
277 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 277 __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
278 might_fault(); \ 278 might_fault(); \
279 if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ 279 if (access_ok(VERIFY_READ, __gu_addr, (size))) { \
280 barrier_nospec(); \ 280 barrier_nospec(); \
@@ -288,7 +288,7 @@ do { \
288({ \ 288({ \
289 long __gu_err; \ 289 long __gu_err; \
290 __long_type(*(ptr)) __gu_val; \ 290 __long_type(*(ptr)) __gu_val; \
291 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 291 __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
292 __chk_user_ptr(ptr); \ 292 __chk_user_ptr(ptr); \
293 barrier_nospec(); \ 293 barrier_nospec(); \
294 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ 294 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index c19379f0a32e..b0de85b477e1 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -22,6 +22,7 @@
22#include <linux/compiler.h> 22#include <linux/compiler.h>
23#include <linux/linkage.h> 23#include <linux/linkage.h>
24 24
25#define __ARCH_WANT_NEW_STAT
25#define __ARCH_WANT_OLD_READDIR 26#define __ARCH_WANT_OLD_READDIR
26#define __ARCH_WANT_STAT64 27#define __ARCH_WANT_STAT64
27#define __ARCH_WANT_SYS_ALARM 28#define __ARCH_WANT_SYS_ALARM
@@ -35,7 +36,6 @@
35#define __ARCH_WANT_SYS_SOCKETCALL 36#define __ARCH_WANT_SYS_SOCKETCALL
36#define __ARCH_WANT_SYS_FADVISE64 37#define __ARCH_WANT_SYS_FADVISE64
37#define __ARCH_WANT_SYS_GETPGRP 38#define __ARCH_WANT_SYS_GETPGRP
38#define __ARCH_WANT_SYS_LLSEEK
39#define __ARCH_WANT_SYS_NICE 39#define __ARCH_WANT_SYS_NICE
40#define __ARCH_WANT_SYS_OLD_GETRLIMIT 40#define __ARCH_WANT_SYS_OLD_GETRLIMIT
41#define __ARCH_WANT_SYS_OLD_UNAME 41#define __ARCH_WANT_SYS_OLD_UNAME
@@ -47,6 +47,7 @@
47#endif 47#endif
48#ifdef CONFIG_PPC64 48#ifdef CONFIG_PPC64
49#define __ARCH_WANT_COMPAT_SYS_TIME 49#define __ARCH_WANT_COMPAT_SYS_TIME
50#define __ARCH_WANT_SYS_UTIME32
50#define __ARCH_WANT_SYS_NEWFSTATAT 51#define __ARCH_WANT_SYS_NEWFSTATAT
51#define __ARCH_WANT_COMPAT_SYS_SENDFILE 52#define __ARCH_WANT_COMPAT_SYS_SENDFILE
52#endif 53#endif
diff --git a/arch/powerpc/include/asm/user.h b/arch/powerpc/include/asm/user.h
index 5c0e082eae7b..99443b8594e7 100644
--- a/arch/powerpc/include/asm/user.h
+++ b/arch/powerpc/include/asm/user.h
@@ -31,7 +31,7 @@
31 * to write an integer number of pages. 31 * to write an integer number of pages.
32 */ 32 */
33struct user { 33struct user {
34 struct pt_regs regs; /* entire machine state */ 34 struct user_pt_regs regs; /* entire machine state */
35 size_t u_tsize; /* text size (pages) */ 35 size_t u_tsize; /* text size (pages) */
36 size_t u_dsize; /* data size (pages) */ 36 size_t u_dsize; /* data size (pages) */
37 size_t u_ssize; /* stack size (pages) */ 37 size_t u_ssize; /* stack size (pages) */
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 1a6ed5919ffd..a658091a19f9 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -7,3 +7,4 @@ generic-y += poll.h
7generic-y += resource.h 7generic-y += resource.h
8generic-y += sockios.h 8generic-y += sockios.h
9generic-y += statfs.h 9generic-y += statfs.h
10generic-y += siginfo.h
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 41b1a5c15734..2c145da3b774 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -102,6 +102,8 @@
102#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ 102#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
103#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ 103#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
104#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ 104#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */
105#define TIOCGISO7816 _IOR('T', 0x42, struct serial_iso7816)
106#define TIOCSISO7816 _IOWR('T', 0x43, struct serial_iso7816)
105 107
106#define TIOCSERCONFIG 0x5453 108#define TIOCSERCONFIG 0x5453
107#define TIOCSERGWILD 0x5454 109#define TIOCSERGWILD 0x5454
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 1b32b56a03d3..8c876c166ef2 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
634 634
635#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) 635#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
636#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) 636#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
637#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
637 638
638/* Transactional Memory checkpointed state: 639/* Transactional Memory checkpointed state:
639 * This is all GPRs, all VSX regs and a subset of SPRs 640 * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
index 5e3edc2a7634..f5f1ccc740fc 100644
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -29,7 +29,12 @@
29 29
30#ifndef __ASSEMBLY__ 30#ifndef __ASSEMBLY__
31 31
32struct pt_regs { 32#ifdef __KERNEL__
33struct user_pt_regs
34#else
35struct pt_regs
36#endif
37{
33 unsigned long gpr[32]; 38 unsigned long gpr[32];
34 unsigned long nip; 39 unsigned long nip;
35 unsigned long msr; 40 unsigned long msr;
@@ -160,6 +165,10 @@ struct pt_regs {
160#define PTRACE_GETVSRREGS 0x1b 165#define PTRACE_GETVSRREGS 0x1b
161#define PTRACE_SETVSRREGS 0x1c 166#define PTRACE_SETVSRREGS 0x1c
162 167
168/* Syscall emulation defines */
169#define PTRACE_SYSEMU 0x1d
170#define PTRACE_SYSEMU_SINGLESTEP 0x1e
171
163/* 172/*
164 * Get or set a debug register. The first 16 are DABR registers and the 173 * Get or set a debug register. The first 16 are DABR registers and the
165 * second 16 are IABR registers. 174 * second 16 are IABR registers.
diff --git a/arch/powerpc/include/uapi/asm/sigcontext.h b/arch/powerpc/include/uapi/asm/sigcontext.h
index 2fbe485acdb4..630aeda56d59 100644
--- a/arch/powerpc/include/uapi/asm/sigcontext.h
+++ b/arch/powerpc/include/uapi/asm/sigcontext.h
@@ -22,7 +22,11 @@ struct sigcontext {
22#endif 22#endif
23 unsigned long handler; 23 unsigned long handler;
24 unsigned long oldmask; 24 unsigned long oldmask;
25 struct pt_regs __user *regs; 25#ifdef __KERNEL__
26 struct user_pt_regs __user *regs;
27#else
28 struct pt_regs *regs;
29#endif
26#ifdef __powerpc64__ 30#ifdef __powerpc64__
27 elf_gregset_t gp_regs; 31 elf_gregset_t gp_regs;
28 elf_fpregset_t fp_regs; 32 elf_fpregset_t fp_regs;
diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h
deleted file mode 100644
index 1d51d9b88221..000000000000
--- a/arch/powerpc/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,18 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2#ifndef _ASM_POWERPC_SIGINFO_H
3#define _ASM_POWERPC_SIGINFO_H
4
5/*
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#ifdef __powerpc64__
13# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
14#endif
15
16#include <asm-generic/siginfo.h>
17
18#endif /* _ASM_POWERPC_SIGINFO_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3b66f2c19c84..53d4b8d5b54d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -5,7 +5,8 @@
5 5
6CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' 6CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
7 7
8subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror 8# Disable clang warning for using setjmp without setjmp.h header
9CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header)
9 10
10ifdef CONFIG_PPC64 11ifdef CONFIG_PPC64
11CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) 12CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
@@ -20,12 +21,14 @@ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
20CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 21CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
21CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 22CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
22 23
24CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
25
23ifdef CONFIG_FUNCTION_TRACER 26ifdef CONFIG_FUNCTION_TRACER
24# Do not trace early boot code 27# Do not trace early boot code
25CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 28CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE)
26CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 29CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE)
27CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 30CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
28CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 31CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
29endif 32endif
30 33
31obj-y := cputable.o ptrace.o syscalls.o \ 34obj-y := cputable.o ptrace.o syscalls.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 89cf15566c4e..9ffc72ded73a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -79,11 +79,16 @@ int main(void)
79{ 79{
80 OFFSET(THREAD, task_struct, thread); 80 OFFSET(THREAD, task_struct, thread);
81 OFFSET(MM, task_struct, mm); 81 OFFSET(MM, task_struct, mm);
82#ifdef CONFIG_STACKPROTECTOR
83 OFFSET(TASK_CANARY, task_struct, stack_canary);
84#ifdef CONFIG_PPC64
85 OFFSET(PACA_CANARY, paca_struct, canary);
86#endif
87#endif
82 OFFSET(MMCONTEXTID, mm_struct, context.id); 88 OFFSET(MMCONTEXTID, mm_struct, context.id);
83#ifdef CONFIG_PPC64 89#ifdef CONFIG_PPC64
84 DEFINE(SIGSEGV, SIGSEGV); 90 DEFINE(SIGSEGV, SIGSEGV);
85 DEFINE(NMI_MASK, NMI_MASK); 91 DEFINE(NMI_MASK, NMI_MASK);
86 OFFSET(TASKTHREADPPR, task_struct, thread.ppr);
87#else 92#else
88 OFFSET(THREAD_INFO, task_struct, stack); 93 OFFSET(THREAD_INFO, task_struct, stack);
89 DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); 94 DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
@@ -173,7 +178,6 @@ int main(void)
173 OFFSET(PACAKSAVE, paca_struct, kstack); 178 OFFSET(PACAKSAVE, paca_struct, kstack);
174 OFFSET(PACACURRENT, paca_struct, __current); 179 OFFSET(PACACURRENT, paca_struct, __current);
175 OFFSET(PACASAVEDMSR, paca_struct, saved_msr); 180 OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
176 OFFSET(PACASTABRR, paca_struct, stab_rr);
177 OFFSET(PACAR1, paca_struct, saved_r1); 181 OFFSET(PACAR1, paca_struct, saved_r1);
178 OFFSET(PACATOC, paca_struct, kernel_toc); 182 OFFSET(PACATOC, paca_struct, kernel_toc);
179 OFFSET(PACAKBASE, paca_struct, kernelbase); 183 OFFSET(PACAKBASE, paca_struct, kernelbase);
@@ -212,6 +216,7 @@ int main(void)
212#ifdef CONFIG_PPC_BOOK3S_64 216#ifdef CONFIG_PPC_BOOK3S_64
213 OFFSET(PACASLBCACHE, paca_struct, slb_cache); 217 OFFSET(PACASLBCACHE, paca_struct, slb_cache);
214 OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); 218 OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
219 OFFSET(PACASTABRR, paca_struct, stab_rr);
215 OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); 220 OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
216#ifdef CONFIG_PPC_MM_SLICES 221#ifdef CONFIG_PPC_MM_SLICES
217 OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); 222 OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
@@ -274,11 +279,6 @@ int main(void)
274 /* Interrupt register frame */ 279 /* Interrupt register frame */
275 DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); 280 DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
276 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); 281 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
277#ifdef CONFIG_PPC64
278 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
279 DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
280 DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
281#endif /* CONFIG_PPC64 */
282 STACK_PT_REGS_OFFSET(GPR0, gpr[0]); 282 STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
283 STACK_PT_REGS_OFFSET(GPR1, gpr[1]); 283 STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
284 STACK_PT_REGS_OFFSET(GPR2, gpr[2]); 284 STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
@@ -322,10 +322,7 @@ int main(void)
322 STACK_PT_REGS_OFFSET(_ESR, dsisr); 322 STACK_PT_REGS_OFFSET(_ESR, dsisr);
323#else /* CONFIG_PPC64 */ 323#else /* CONFIG_PPC64 */
324 STACK_PT_REGS_OFFSET(SOFTE, softe); 324 STACK_PT_REGS_OFFSET(SOFTE, softe);
325 325 STACK_PT_REGS_OFFSET(_PPR, ppr);
326 /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
327 DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
328 DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
329#endif /* CONFIG_PPC64 */ 326#endif /* CONFIG_PPC64 */
330 327
331#if defined(CONFIG_PPC32) 328#if defined(CONFIG_PPC32)
@@ -387,12 +384,12 @@ int main(void)
387 OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); 384 OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
388 OFFSET(TVAL64_TV_SEC, timeval, tv_sec); 385 OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
389 OFFSET(TVAL64_TV_USEC, timeval, tv_usec); 386 OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
390 OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec); 387 OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
391 OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec); 388 OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
392 OFFSET(TSPC64_TV_SEC, timespec, tv_sec); 389 OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
393 OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec); 390 OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
394 OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec); 391 OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
395 OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec); 392 OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
396#else 393#else
397 OFFSET(TVAL32_TV_SEC, timeval, tv_sec); 394 OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
398 OFFSET(TVAL32_TV_USEC, timeval, tv_usec); 395 OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
@@ -438,7 +435,7 @@ int main(void)
438#ifdef CONFIG_PPC_BOOK3S 435#ifdef CONFIG_PPC_BOOK3S
439 OFFSET(VCPU_TAR, kvm_vcpu, arch.tar); 436 OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
440#endif 437#endif
441 OFFSET(VCPU_CR, kvm_vcpu, arch.cr); 438 OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
442 OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip); 439 OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
443#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 440#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
444 OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr); 441 OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
@@ -503,6 +500,7 @@ int main(void)
503 OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); 500 OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
504 OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); 501 OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
505 OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); 502 OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
503 OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
506 OFFSET(VCPU_CPU, kvm_vcpu, cpu); 504 OFFSET(VCPU_CPU, kvm_vcpu, cpu);
507 OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); 505 OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
508#endif 506#endif
@@ -695,7 +693,7 @@ int main(void)
695#endif /* CONFIG_PPC_BOOK3S_64 */ 693#endif /* CONFIG_PPC_BOOK3S_64 */
696 694
697#else /* CONFIG_PPC_BOOK3S */ 695#else /* CONFIG_PPC_BOOK3S */
698 OFFSET(VCPU_CR, kvm_vcpu, arch.cr); 696 OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
699 OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer); 697 OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
700 OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link); 698 OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
701 OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr); 699 OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index b2072d5bbf2b..b4241ed1456e 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -163,7 +163,7 @@ void btext_map(void)
163 offset = ((unsigned long) dispDeviceBase) - base; 163 offset = ((unsigned long) dispDeviceBase) - base;
164 size = dispDeviceRowBytes * dispDeviceRect[3] + offset 164 size = dispDeviceRowBytes * dispDeviceRect[3] + offset
165 + dispDeviceRect[0]; 165 + dispDeviceRect[0];
166 vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0)))); 166 vbase = ioremap_wc(base, size);
167 if (!vbase) 167 if (!vbase)
168 return; 168 return;
169 logicalDisplayBase = vbase + offset; 169 logicalDisplayBase = vbase + offset;
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index a8f20e5928e1..be57bd07596d 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -20,6 +20,8 @@
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <asm/prom.h> 22#include <asm/prom.h>
23#include <asm/cputhreads.h>
24#include <asm/smp.h>
23 25
24#include "cacheinfo.h" 26#include "cacheinfo.h"
25 27
@@ -627,17 +629,48 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
627static struct kobj_attribute cache_level_attr = 629static struct kobj_attribute cache_level_attr =
628 __ATTR(level, 0444, level_show, NULL); 630 __ATTR(level, 0444, level_show, NULL);
629 631
632static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
633{
634 struct kobject *index_dir_kobj = &index->kobj;
635 struct kobject *cache_dir_kobj = index_dir_kobj->parent;
636 struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
637 struct device *dev = kobj_to_dev(cpu_dev_kobj);
638
639 return dev->id;
640}
641
642/*
643 * On big-core systems, each core has two groups of CPUs each of which
644 * has its own L1-cache. The thread-siblings which share l1-cache with
645 * @cpu can be obtained via cpu_smallcore_mask().
646 */
647static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache)
648{
649 if (cache->level == 1)
650 return cpu_smallcore_mask(cpu);
651
652 return &cache->shared_cpu_map;
653}
654
630static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) 655static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
631{ 656{
632 struct cache_index_dir *index; 657 struct cache_index_dir *index;
633 struct cache *cache; 658 struct cache *cache;
634 int ret; 659 const struct cpumask *mask;
660 int ret, cpu;
635 661
636 index = kobj_to_cache_index_dir(k); 662 index = kobj_to_cache_index_dir(k);
637 cache = index->cache; 663 cache = index->cache;
638 664
665 if (has_big_cores) {
666 cpu = index_dir_to_cpu(index);
667 mask = get_big_core_shared_cpu_map(cpu, cache);
668 } else {
669 mask = &cache->shared_cpu_map;
670 }
671
639 ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n", 672 ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
640 cpumask_pr_args(&cache->shared_cpu_map)); 673 cpumask_pr_args(mask));
641 buf[ret++] = '\n'; 674 buf[ret++] = '\n';
642 buf[ret] = '\0'; 675 buf[ret] = '\0';
643 return ret; 676 return ret;
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 458b928dbd84..c317080db771 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -147,8 +147,8 @@ __init_hvmode_206:
147 rldicl. r0,r3,4,63 147 rldicl. r0,r3,4,63
148 bnelr 148 bnelr
149 ld r5,CPU_SPEC_FEATURES(r4) 149 ld r5,CPU_SPEC_FEATURES(r4)
150 LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) 150 LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
151 xor r5,r5,r6 151 andc r5,r5,r6
152 std r5,CPU_SPEC_FEATURES(r4) 152 std r5,CPU_SPEC_FEATURES(r4)
153 blr 153 blr
154 154
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index d10ad258d41a..bbdc4706c159 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -110,7 +110,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
110 vaddr = __va(paddr); 110 vaddr = __va(paddr);
111 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); 111 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
112 } else { 112 } else {
113 vaddr = __ioremap(paddr, PAGE_SIZE, 0); 113 vaddr = ioremap_cache(paddr, PAGE_SIZE);
114 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); 114 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
115 iounmap(vaddr); 115 iounmap(vaddr);
116 } 116 }
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 88f3963ca30f..5fc335f4d9cd 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -11,7 +11,7 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/dma-mapping.h> 14#include <linux/dma-direct.h>
15#include <linux/memblock.h> 15#include <linux/memblock.h>
16#include <linux/pfn.h> 16#include <linux/pfn.h>
17#include <linux/of_platform.h> 17#include <linux/of_platform.h>
@@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
59 .sync_single_for_device = swiotlb_sync_single_for_device, 59 .sync_single_for_device = swiotlb_sync_single_for_device,
60 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 60 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
61 .sync_sg_for_device = swiotlb_sync_sg_for_device, 61 .sync_sg_for_device = swiotlb_sync_sg_for_device,
62 .mapping_error = swiotlb_dma_mapping_error, 62 .mapping_error = dma_direct_mapping_error,
63 .get_required_mask = swiotlb_powerpc_get_required, 63 .get_required_mask = swiotlb_powerpc_get_required,
64}; 64};
65 65
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index f432054234a4..8be3721d9302 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1008,9 +1008,7 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
1008 /* Count and allocate space for cpu features */ 1008 /* Count and allocate space for cpu features */
1009 of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes, 1009 of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
1010 &nr_dt_cpu_features); 1010 &nr_dt_cpu_features);
1011 dt_cpu_features = __va( 1011 dt_cpu_features = __va(memblock_phys_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE));
1012 memblock_alloc(sizeof(struct dt_cpu_feature)*
1013 nr_dt_cpu_features, PAGE_SIZE));
1014 1012
1015 cpufeatures_setup_start(isa); 1013 cpufeatures_setup_start(isa);
1016 1014
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6ebba3e48b01..6cae6b56ffd6 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -169,6 +169,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
169 int n = 0, l = 0; 169 int n = 0, l = 0;
170 char buffer[128]; 170 char buffer[128];
171 171
172 if (!pdn) {
173 pr_warn("EEH: Note: No error log for absent device.\n");
174 return 0;
175 }
176
172 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", 177 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
173 pdn->phb->global_number, pdn->busno, 178 pdn->phb->global_number, pdn->busno,
174 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 179 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
@@ -399,7 +404,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
399 } 404 }
400 405
401 /* Isolate the PHB and send event */ 406 /* Isolate the PHB and send event */
402 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 407 eeh_pe_mark_isolated(phb_pe);
403 eeh_serialize_unlock(flags); 408 eeh_serialize_unlock(flags);
404 409
405 pr_err("EEH: PHB#%x failure detected, location: %s\n", 410 pr_err("EEH: PHB#%x failure detected, location: %s\n",
@@ -558,7 +563,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
558 * with other functions on this device, and functions under 563 * with other functions on this device, and functions under
559 * bridges. 564 * bridges.
560 */ 565 */
561 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 566 eeh_pe_mark_isolated(pe);
562 eeh_serialize_unlock(flags); 567 eeh_serialize_unlock(flags);
563 568
564 /* Most EEH events are due to device driver bugs. Having 569 /* Most EEH events are due to device driver bugs. Having
@@ -676,7 +681,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
676 681
677 /* Check if the request is finished successfully */ 682 /* Check if the request is finished successfully */
678 if (active_flag) { 683 if (active_flag) {
679 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 684 rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
680 if (rc < 0) 685 if (rc < 0)
681 return rc; 686 return rc;
682 687
@@ -825,7 +830,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
825 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 830 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
826 break; 831 break;
827 case pcie_hot_reset: 832 case pcie_hot_reset:
828 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 833 eeh_pe_mark_isolated(pe);
834 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
829 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 835 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
830 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 836 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
831 if (!(pe->type & EEH_PE_VF)) 837 if (!(pe->type & EEH_PE_VF))
@@ -833,7 +839,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
833 eeh_ops->reset(pe, EEH_RESET_HOT); 839 eeh_ops->reset(pe, EEH_RESET_HOT);
834 break; 840 break;
835 case pcie_warm_reset: 841 case pcie_warm_reset:
836 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 842 eeh_pe_mark_isolated(pe);
843 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
837 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 844 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
838 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 845 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
839 if (!(pe->type & EEH_PE_VF)) 846 if (!(pe->type & EEH_PE_VF))
@@ -913,16 +920,15 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
913 break; 920 break;
914 921
915 /* Wait until the PE is in a functioning state */ 922 /* Wait until the PE is in a functioning state */
916 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 923 state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
917 if (eeh_state_active(state))
918 break;
919
920 if (state < 0) { 924 if (state < 0) {
921 pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", 925 pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
922 __func__, pe->phb->global_number, pe->addr); 926 __func__, pe->phb->global_number, pe->addr);
923 ret = -ENOTRECOVERABLE; 927 ret = -ENOTRECOVERABLE;
924 break; 928 break;
925 } 929 }
930 if (eeh_state_active(state))
931 break;
926 932
927 /* Set error in case this is our last attempt */ 933 /* Set error in case this is our last attempt */
928 ret = -EIO; 934 ret = -EIO;
@@ -1036,6 +1042,11 @@ void eeh_probe_devices(void)
1036 pdn = hose->pci_data; 1042 pdn = hose->pci_data;
1037 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1043 traverse_pci_dn(pdn, eeh_ops->probe, NULL);
1038 } 1044 }
1045 if (eeh_enabled())
1046 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
1047 else
1048 pr_info("EEH: No capable adapters found\n");
1049
1039} 1050}
1040 1051
1041/** 1052/**
@@ -1079,18 +1090,7 @@ static int eeh_init(void)
1079 eeh_dev_phb_init_dynamic(hose); 1090 eeh_dev_phb_init_dynamic(hose);
1080 1091
1081 /* Initialize EEH event */ 1092 /* Initialize EEH event */
1082 ret = eeh_event_init(); 1093 return eeh_event_init();
1083 if (ret)
1084 return ret;
1085
1086 eeh_probe_devices();
1087
1088 if (eeh_enabled())
1089 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
1090 else if (!eeh_has_flag(EEH_POSTPONED_PROBE))
1091 pr_info("EEH: No capable adapters found\n");
1092
1093 return ret;
1094} 1094}
1095 1095
1096core_initcall_sync(eeh_init); 1096core_initcall_sync(eeh_init);
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index a34e6912c15e..d8c90f3284b5 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -60,8 +60,6 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
60 /* Associate EEH device with OF node */ 60 /* Associate EEH device with OF node */
61 pdn->edev = edev; 61 pdn->edev = edev;
62 edev->pdn = pdn; 62 edev->pdn = pdn;
63 INIT_LIST_HEAD(&edev->list);
64 INIT_LIST_HEAD(&edev->rmv_list);
65 63
66 return edev; 64 return edev;
67} 65}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 67619b4b3f96..9446248eb6b8 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -35,8 +35,8 @@
35#include <asm/rtas.h> 35#include <asm/rtas.h>
36 36
37struct eeh_rmv_data { 37struct eeh_rmv_data {
38 struct list_head edev_list; 38 struct list_head removed_vf_list;
39 int removed; 39 int removed_dev_count;
40}; 40};
41 41
42static int eeh_result_priority(enum pci_ers_result result) 42static int eeh_result_priority(enum pci_ers_result result)
@@ -281,6 +281,10 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
281 struct pci_driver *driver; 281 struct pci_driver *driver;
282 enum pci_ers_result new_result; 282 enum pci_ers_result new_result;
283 283
284 if (!edev->pdev) {
285 eeh_edev_info(edev, "no device");
286 return;
287 }
284 device_lock(&edev->pdev->dev); 288 device_lock(&edev->pdev->dev);
285 if (eeh_edev_actionable(edev)) { 289 if (eeh_edev_actionable(edev)) {
286 driver = eeh_pcid_get(edev->pdev); 290 driver = eeh_pcid_get(edev->pdev);
@@ -400,7 +404,7 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
400 * EEH device is created. 404 * EEH device is created.
401 */ 405 */
402 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) { 406 if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
403 if (list_is_last(&edev->list, &edev->pe->edevs)) 407 if (list_is_last(&edev->entry, &edev->pe->edevs))
404 eeh_pe_restore_bars(edev->pe); 408 eeh_pe_restore_bars(edev->pe);
405 409
406 return NULL; 410 return NULL;
@@ -465,10 +469,9 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
465 return rc; 469 return rc;
466} 470}
467 471
468static void *eeh_add_virt_device(void *data, void *userdata) 472static void *eeh_add_virt_device(struct eeh_dev *edev)
469{ 473{
470 struct pci_driver *driver; 474 struct pci_driver *driver;
471 struct eeh_dev *edev = (struct eeh_dev *)data;
472 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 475 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
473 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 476 struct pci_dn *pdn = eeh_dev_to_pdn(edev);
474 477
@@ -499,7 +502,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
499 struct pci_driver *driver; 502 struct pci_driver *driver;
500 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 503 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
501 struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; 504 struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
502 int *removed = rmv_data ? &rmv_data->removed : NULL;
503 505
504 /* 506 /*
505 * Actually, we should remove the PCI bridges as well. 507 * Actually, we should remove the PCI bridges as well.
@@ -521,7 +523,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
521 if (eeh_dev_removed(edev)) 523 if (eeh_dev_removed(edev))
522 return NULL; 524 return NULL;
523 525
524 if (removed) { 526 if (rmv_data) {
525 if (eeh_pe_passed(edev->pe)) 527 if (eeh_pe_passed(edev->pe))
526 return NULL; 528 return NULL;
527 driver = eeh_pcid_get(dev); 529 driver = eeh_pcid_get(dev);
@@ -539,10 +541,9 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
539 /* Remove it from PCI subsystem */ 541 /* Remove it from PCI subsystem */
540 pr_debug("EEH: Removing %s without EEH sensitive driver\n", 542 pr_debug("EEH: Removing %s without EEH sensitive driver\n",
541 pci_name(dev)); 543 pci_name(dev));
542 edev->bus = dev->bus;
543 edev->mode |= EEH_DEV_DISCONNECTED; 544 edev->mode |= EEH_DEV_DISCONNECTED;
544 if (removed) 545 if (rmv_data)
545 (*removed)++; 546 rmv_data->removed_dev_count++;
546 547
547 if (edev->physfn) { 548 if (edev->physfn) {
548#ifdef CONFIG_PCI_IOV 549#ifdef CONFIG_PCI_IOV
@@ -558,7 +559,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
558 pdn->pe_number = IODA_INVALID_PE; 559 pdn->pe_number = IODA_INVALID_PE;
559#endif 560#endif
560 if (rmv_data) 561 if (rmv_data)
561 list_add(&edev->rmv_list, &rmv_data->edev_list); 562 list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
562 } else { 563 } else {
563 pci_lock_rescan_remove(); 564 pci_lock_rescan_remove();
564 pci_stop_and_remove_bus_device(dev); 565 pci_stop_and_remove_bus_device(dev);
@@ -727,7 +728,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
727 * the device up before the scripts have taken it down, 728 * the device up before the scripts have taken it down,
728 * potentially weird things happen. 729 * potentially weird things happen.
729 */ 730 */
730 if (!driver_eeh_aware || rmv_data->removed) { 731 if (!driver_eeh_aware || rmv_data->removed_dev_count) {
731 pr_info("EEH: Sleep 5s ahead of %s hotplug\n", 732 pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
732 (driver_eeh_aware ? "partial" : "complete")); 733 (driver_eeh_aware ? "partial" : "complete"));
733 ssleep(5); 734 ssleep(5);
@@ -737,10 +738,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
737 * PE. We should disconnect it so the binding can be 738 * PE. We should disconnect it so the binding can be
738 * rebuilt when adding PCI devices. 739 * rebuilt when adding PCI devices.
739 */ 740 */
740 edev = list_first_entry(&pe->edevs, struct eeh_dev, list); 741 edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
741 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); 742 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
742 if (pe->type & EEH_PE_VF) { 743 if (pe->type & EEH_PE_VF) {
743 eeh_add_virt_device(edev, NULL); 744 eeh_add_virt_device(edev);
744 } else { 745 } else {
745 if (!driver_eeh_aware) 746 if (!driver_eeh_aware)
746 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 747 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
@@ -789,7 +790,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
789 struct eeh_pe *tmp_pe; 790 struct eeh_pe *tmp_pe;
790 int rc = 0; 791 int rc = 0;
791 enum pci_ers_result result = PCI_ERS_RESULT_NONE; 792 enum pci_ers_result result = PCI_ERS_RESULT_NONE;
792 struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; 793 struct eeh_rmv_data rmv_data =
794 {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
793 795
794 bus = eeh_pe_bus_get(pe); 796 bus = eeh_pe_bus_get(pe);
795 if (!bus) { 797 if (!bus) {
@@ -806,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
806 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", 808 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
807 pe->phb->global_number, pe->addr, 809 pe->phb->global_number, pe->addr,
808 pe->freeze_count); 810 pe->freeze_count);
809 goto hard_fail; 811 result = PCI_ERS_RESULT_DISCONNECT;
810 } 812 }
811 pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
812 pe->freeze_count, eeh_max_freezes);
813 813
814 /* Walk the various device drivers attached to this slot through 814 /* Walk the various device drivers attached to this slot through
815 * a reset sequence, giving each an opportunity to do what it needs 815 * a reset sequence, giving each an opportunity to do what it needs
@@ -821,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
821 * the error. Override the result if necessary to have partially 821 * the error. Override the result if necessary to have partially
822 * hotplug for this case. 822 * hotplug for this case.
823 */ 823 */
824 pr_info("EEH: Notify device drivers to shutdown\n"); 824 if (result != PCI_ERS_RESULT_DISCONNECT) {
825 eeh_set_channel_state(pe, pci_channel_io_frozen); 825 pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
826 eeh_set_irq_state(pe, false); 826 pe->freeze_count, eeh_max_freezes);
827 eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error, 827 pr_info("EEH: Notify device drivers to shutdown\n");
828 &result); 828 eeh_set_channel_state(pe, pci_channel_io_frozen);
829 if ((pe->type & EEH_PE_PHB) && 829 eeh_set_irq_state(pe, false);
830 result != PCI_ERS_RESULT_NONE && 830 eeh_pe_report("error_detected(IO frozen)", pe,
831 result != PCI_ERS_RESULT_NEED_RESET) 831 eeh_report_error, &result);
832 result = PCI_ERS_RESULT_NEED_RESET; 832 if ((pe->type & EEH_PE_PHB) &&
833 result != PCI_ERS_RESULT_NONE &&
834 result != PCI_ERS_RESULT_NEED_RESET)
835 result = PCI_ERS_RESULT_NEED_RESET;
836 }
833 837
834 /* Get the current PCI slot state. This can take a long time, 838 /* Get the current PCI slot state. This can take a long time,
835 * sometimes over 300 seconds for certain systems. 839 * sometimes over 300 seconds for certain systems.
836 */ 840 */
837 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); 841 if (result != PCI_ERS_RESULT_DISCONNECT) {
838 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 842 rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
839 pr_warn("EEH: Permanent failure\n"); 843 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
840 goto hard_fail; 844 pr_warn("EEH: Permanent failure\n");
845 result = PCI_ERS_RESULT_DISCONNECT;
846 }
841 } 847 }
842 848
843 /* Since rtas may enable MMIO when posting the error log, 849 /* Since rtas may enable MMIO when posting the error log,
844 * don't post the error log until after all dev drivers 850 * don't post the error log until after all dev drivers
845 * have been informed. 851 * have been informed.
846 */ 852 */
847 pr_info("EEH: Collect temporary log\n"); 853 if (result != PCI_ERS_RESULT_DISCONNECT) {
848 eeh_slot_error_detail(pe, EEH_LOG_TEMP); 854 pr_info("EEH: Collect temporary log\n");
855 eeh_slot_error_detail(pe, EEH_LOG_TEMP);
856 }
849 857
850 /* If all device drivers were EEH-unaware, then shut 858 /* If all device drivers were EEH-unaware, then shut
851 * down all of the device drivers, and hope they 859 * down all of the device drivers, and hope they
@@ -857,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
857 if (rc) { 865 if (rc) {
858 pr_warn("%s: Unable to reset, err=%d\n", 866 pr_warn("%s: Unable to reset, err=%d\n",
859 __func__, rc); 867 __func__, rc);
860 goto hard_fail; 868 result = PCI_ERS_RESULT_DISCONNECT;
861 } 869 }
862 } 870 }
863 871
@@ -866,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
866 pr_info("EEH: Enable I/O for affected devices\n"); 874 pr_info("EEH: Enable I/O for affected devices\n");
867 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 875 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
868 876
869 if (rc < 0) 877 if (rc < 0) {
870 goto hard_fail; 878 result = PCI_ERS_RESULT_DISCONNECT;
871 if (rc) { 879 } else if (rc) {
872 result = PCI_ERS_RESULT_NEED_RESET; 880 result = PCI_ERS_RESULT_NEED_RESET;
873 } else { 881 } else {
874 pr_info("EEH: Notify device drivers to resume I/O\n"); 882 pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -882,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
882 pr_info("EEH: Enabled DMA for affected devices\n"); 890 pr_info("EEH: Enabled DMA for affected devices\n");
883 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 891 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
884 892
885 if (rc < 0) 893 if (rc < 0) {
886 goto hard_fail; 894 result = PCI_ERS_RESULT_DISCONNECT;
887 if (rc) { 895 } else if (rc) {
888 result = PCI_ERS_RESULT_NEED_RESET; 896 result = PCI_ERS_RESULT_NEED_RESET;
889 } else { 897 } else {
890 /* 898 /*
@@ -897,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
897 } 905 }
898 } 906 }
899 907
900 /* If any device has a hard failure, then shut off everything. */
901 if (result == PCI_ERS_RESULT_DISCONNECT) {
902 pr_warn("EEH: Device driver gave up\n");
903 goto hard_fail;
904 }
905
906 /* If any device called out for a reset, then reset the slot */ 908 /* If any device called out for a reset, then reset the slot */
907 if (result == PCI_ERS_RESULT_NEED_RESET) { 909 if (result == PCI_ERS_RESULT_NEED_RESET) {
908 pr_info("EEH: Reset without hotplug activity\n"); 910 pr_info("EEH: Reset without hotplug activity\n");
@@ -910,88 +912,81 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
910 if (rc) { 912 if (rc) {
911 pr_warn("%s: Cannot reset, err=%d\n", 913 pr_warn("%s: Cannot reset, err=%d\n",
912 __func__, rc); 914 __func__, rc);
913 goto hard_fail; 915 result = PCI_ERS_RESULT_DISCONNECT;
916 } else {
917 result = PCI_ERS_RESULT_NONE;
918 eeh_set_channel_state(pe, pci_channel_io_normal);
919 eeh_set_irq_state(pe, true);
920 eeh_pe_report("slot_reset", pe, eeh_report_reset,
921 &result);
914 } 922 }
915
916 pr_info("EEH: Notify device drivers "
917 "the completion of reset\n");
918 result = PCI_ERS_RESULT_NONE;
919 eeh_set_channel_state(pe, pci_channel_io_normal);
920 eeh_set_irq_state(pe, true);
921 eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
922 }
923
924 /* All devices should claim they have recovered by now. */
925 if ((result != PCI_ERS_RESULT_RECOVERED) &&
926 (result != PCI_ERS_RESULT_NONE)) {
927 pr_warn("EEH: Not recovered\n");
928 goto hard_fail;
929 }
930
931 /*
932 * For those hot removed VFs, we should add back them after PF get
933 * recovered properly.
934 */
935 list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
936 eeh_add_virt_device(edev, NULL);
937 list_del(&edev->rmv_list);
938 } 923 }
939 924
940 /* Tell all device drivers that they can resume operations */ 925 if ((result == PCI_ERS_RESULT_RECOVERED) ||
941 pr_info("EEH: Notify device driver to resume\n"); 926 (result == PCI_ERS_RESULT_NONE)) {
942 eeh_set_channel_state(pe, pci_channel_io_normal); 927 /*
943 eeh_set_irq_state(pe, true); 928 * For those hot removed VFs, we should add back them after PF
944 eeh_pe_report("resume", pe, eeh_report_resume, NULL); 929 * get recovered properly.
945 eeh_for_each_pe(pe, tmp_pe) { 930 */
946 eeh_pe_for_each_dev(tmp_pe, edev, tmp) { 931 list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
947 edev->mode &= ~EEH_DEV_NO_HANDLER; 932 rmv_entry) {
948 edev->in_error = false; 933 eeh_add_virt_device(edev);
934 list_del(&edev->rmv_entry);
949 } 935 }
950 }
951 936
952 pr_info("EEH: Recovery successful.\n"); 937 /* Tell all device drivers that they can resume operations */
953 goto final; 938 pr_info("EEH: Notify device driver to resume\n");
939 eeh_set_channel_state(pe, pci_channel_io_normal);
940 eeh_set_irq_state(pe, true);
941 eeh_pe_report("resume", pe, eeh_report_resume, NULL);
942 eeh_for_each_pe(pe, tmp_pe) {
943 eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
944 edev->mode &= ~EEH_DEV_NO_HANDLER;
945 edev->in_error = false;
946 }
947 }
954 948
955hard_fail: 949 pr_info("EEH: Recovery successful.\n");
956 /* 950 } else {
957 * About 90% of all real-life EEH failures in the field 951 /*
958 * are due to poorly seated PCI cards. Only 10% or so are 952 * About 90% of all real-life EEH failures in the field
959 * due to actual, failed cards. 953 * are due to poorly seated PCI cards. Only 10% or so are
960 */ 954 * due to actual, failed cards.
961 pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" 955 */
962 "Please try reseating or replacing it\n", 956 pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
963 pe->phb->global_number, pe->addr); 957 "Please try reseating or replacing it\n",
958 pe->phb->global_number, pe->addr);
964 959
965 eeh_slot_error_detail(pe, EEH_LOG_PERM); 960 eeh_slot_error_detail(pe, EEH_LOG_PERM);
966 961
967 /* Notify all devices that they're about to go down. */ 962 /* Notify all devices that they're about to go down. */
968 eeh_set_channel_state(pe, pci_channel_io_perm_failure); 963 eeh_set_channel_state(pe, pci_channel_io_perm_failure);
969 eeh_set_irq_state(pe, false); 964 eeh_set_irq_state(pe, false);
970 eeh_pe_report("error_detected(permanent failure)", pe, 965 eeh_pe_report("error_detected(permanent failure)", pe,
971 eeh_report_failure, NULL); 966 eeh_report_failure, NULL);
972 967
973 /* Mark the PE to be removed permanently */ 968 /* Mark the PE to be removed permanently */
974 eeh_pe_state_mark(pe, EEH_PE_REMOVED); 969 eeh_pe_state_mark(pe, EEH_PE_REMOVED);
975 970
976 /* 971 /*
977 * Shut down the device drivers for good. We mark 972 * Shut down the device drivers for good. We mark
978 * all removed devices correctly to avoid access 973 * all removed devices correctly to avoid access
979 * the their PCI config any more. 974 * the their PCI config any more.
980 */ 975 */
981 if (pe->type & EEH_PE_VF) { 976 if (pe->type & EEH_PE_VF) {
982 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 977 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
983 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 978 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
984 } else { 979 } else {
985 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 980 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
986 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 981 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
987 982
988 pci_lock_rescan_remove(); 983 pci_lock_rescan_remove();
989 pci_hp_remove_devices(bus); 984 pci_hp_remove_devices(bus);
990 pci_unlock_rescan_remove(); 985 pci_unlock_rescan_remove();
991 /* The passed PE should no longer be used */ 986 /* The passed PE should no longer be used */
992 return; 987 return;
988 }
993 } 989 }
994final:
995 eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 990 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
996} 991}
997 992
@@ -1026,7 +1021,7 @@ void eeh_handle_special_event(void)
1026 phb_pe = eeh_phb_pe_get(hose); 1021 phb_pe = eeh_phb_pe_get(hose);
1027 if (!phb_pe) continue; 1022 if (!phb_pe) continue;
1028 1023
1029 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 1024 eeh_pe_mark_isolated(phb_pe);
1030 } 1025 }
1031 1026
1032 eeh_serialize_unlock(flags); 1027 eeh_serialize_unlock(flags);
@@ -1041,11 +1036,9 @@ void eeh_handle_special_event(void)
1041 /* Purge all events of the PHB */ 1036 /* Purge all events of the PHB */
1042 eeh_remove_event(pe, true); 1037 eeh_remove_event(pe, true);
1043 1038
1044 if (rc == EEH_NEXT_ERR_DEAD_PHB) 1039 if (rc != EEH_NEXT_ERR_DEAD_PHB)
1045 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 1040 eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
1046 else 1041 eeh_pe_mark_isolated(pe);
1047 eeh_pe_state_mark(pe,
1048 EEH_PE_ISOLATED | EEH_PE_RECOVERING);
1049 1042
1050 eeh_serialize_unlock(flags); 1043 eeh_serialize_unlock(flags);
1051 1044
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 1b238ecc553e..6fa2032e0594 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -75,7 +75,6 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
75 pe->type = type; 75 pe->type = type;
76 pe->phb = phb; 76 pe->phb = phb;
77 INIT_LIST_HEAD(&pe->child_list); 77 INIT_LIST_HEAD(&pe->child_list);
78 INIT_LIST_HEAD(&pe->child);
79 INIT_LIST_HEAD(&pe->edevs); 78 INIT_LIST_HEAD(&pe->edevs);
80 79
81 pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe), 80 pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
@@ -110,6 +109,57 @@ int eeh_phb_pe_create(struct pci_controller *phb)
110} 109}
111 110
112/** 111/**
112 * eeh_wait_state - Wait for PE state
113 * @pe: EEH PE
114 * @max_wait: maximal period in millisecond
115 *
116 * Wait for the state of associated PE. It might take some time
117 * to retrieve the PE's state.
118 */
119int eeh_wait_state(struct eeh_pe *pe, int max_wait)
120{
121 int ret;
122 int mwait;
123
124 /*
125 * According to PAPR, the state of PE might be temporarily
126 * unavailable. Under the circumstance, we have to wait
127 * for indicated time determined by firmware. The maximal
128 * wait time is 5 minutes, which is acquired from the original
129 * EEH implementation. Also, the original implementation
130 * also defined the minimal wait time as 1 second.
131 */
132#define EEH_STATE_MIN_WAIT_TIME (1000)
133#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
134
135 while (1) {
136 ret = eeh_ops->get_state(pe, &mwait);
137
138 if (ret != EEH_STATE_UNAVAILABLE)
139 return ret;
140
141 if (max_wait <= 0) {
142 pr_warn("%s: Timeout when getting PE's state (%d)\n",
143 __func__, max_wait);
144 return EEH_STATE_NOT_SUPPORT;
145 }
146
147 if (mwait < EEH_STATE_MIN_WAIT_TIME) {
148 pr_warn("%s: Firmware returned bad wait value %d\n",
149 __func__, mwait);
150 mwait = EEH_STATE_MIN_WAIT_TIME;
151 } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
152 pr_warn("%s: Firmware returned too long wait value %d\n",
153 __func__, mwait);
154 mwait = EEH_STATE_MAX_WAIT_TIME;
155 }
156
157 msleep(min(mwait, max_wait));
158 max_wait -= mwait;
159 }
160}
161
162/**
113 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB 163 * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
114 * @phb: PCI controller 164 * @phb: PCI controller
115 * 165 *
@@ -360,7 +410,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
360 edev->pe = pe; 410 edev->pe = pe;
361 411
362 /* Put the edev to PE */ 412 /* Put the edev to PE */
363 list_add_tail(&edev->list, &pe->edevs); 413 list_add_tail(&edev->entry, &pe->edevs);
364 pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n", 414 pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
365 pdn->phb->global_number, 415 pdn->phb->global_number,
366 pdn->busno, 416 pdn->busno,
@@ -369,7 +419,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
369 pe->addr); 419 pe->addr);
370 return 0; 420 return 0;
371 } else if (pe && (pe->type & EEH_PE_INVALID)) { 421 } else if (pe && (pe->type & EEH_PE_INVALID)) {
372 list_add_tail(&edev->list, &pe->edevs); 422 list_add_tail(&edev->entry, &pe->edevs);
373 edev->pe = pe; 423 edev->pe = pe;
374 /* 424 /*
375 * We're running to here because of PCI hotplug caused by 425 * We're running to here because of PCI hotplug caused by
@@ -379,7 +429,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
379 while (parent) { 429 while (parent) {
380 if (!(parent->type & EEH_PE_INVALID)) 430 if (!(parent->type & EEH_PE_INVALID))
381 break; 431 break;
382 parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); 432 parent->type &= ~EEH_PE_INVALID;
383 parent = parent->parent; 433 parent = parent->parent;
384 } 434 }
385 435
@@ -429,7 +479,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
429 * link the EEH device accordingly. 479 * link the EEH device accordingly.
430 */ 480 */
431 list_add_tail(&pe->child, &parent->child_list); 481 list_add_tail(&pe->child, &parent->child_list);
432 list_add_tail(&edev->list, &pe->edevs); 482 list_add_tail(&edev->entry, &pe->edevs);
433 edev->pe = pe; 483 edev->pe = pe;
434 pr_debug("EEH: Add %04x:%02x:%02x.%01x to " 484 pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
435 "Device PE#%x, Parent PE#%x\n", 485 "Device PE#%x, Parent PE#%x\n",
@@ -457,7 +507,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
457 int cnt; 507 int cnt;
458 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 508 struct pci_dn *pdn = eeh_dev_to_pdn(edev);
459 509
460 if (!edev->pe) { 510 pe = eeh_dev_to_pe(edev);
511 if (!pe) {
461 pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n", 512 pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
462 __func__, pdn->phb->global_number, 513 __func__, pdn->phb->global_number,
463 pdn->busno, 514 pdn->busno,
@@ -467,9 +518,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
467 } 518 }
468 519
469 /* Remove the EEH device */ 520 /* Remove the EEH device */
470 pe = eeh_dev_to_pe(edev);
471 edev->pe = NULL; 521 edev->pe = NULL;
472 list_del(&edev->list); 522 list_del(&edev->entry);
473 523
474 /* 524 /*
475 * Check if the parent PE includes any EEH devices. 525 * Check if the parent PE includes any EEH devices.
@@ -541,56 +591,50 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe)
541} 591}
542 592
543/** 593/**
544 * __eeh_pe_state_mark - Mark the state for the PE 594 * eeh_pe_state_mark - Mark specified state for PE and its associated device
545 * @data: EEH PE 595 * @pe: EEH PE
546 * @flag: state
547 * 596 *
548 * The function is used to mark the indicated state for the given 597 * EEH error affects the current PE and its child PEs. The function
549 * PE. Also, the associated PCI devices will be put into IO frozen 598 * is used to mark appropriate state for the affected PEs and the
550 * state as well. 599 * associated devices.
551 */ 600 */
552static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag) 601void eeh_pe_state_mark(struct eeh_pe *root, int state)
553{ 602{
554 int state = *((int *)flag); 603 struct eeh_pe *pe;
555 struct eeh_dev *edev, *tmp;
556 struct pci_dev *pdev;
557
558 /* Keep the state of permanently removed PE intact */
559 if (pe->state & EEH_PE_REMOVED)
560 return NULL;
561
562 pe->state |= state;
563
564 /* Offline PCI devices if applicable */
565 if (!(state & EEH_PE_ISOLATED))
566 return NULL;
567
568 eeh_pe_for_each_dev(pe, edev, tmp) {
569 pdev = eeh_dev_to_pci_dev(edev);
570 if (pdev)
571 pdev->error_state = pci_channel_io_frozen;
572 }
573
574 /* Block PCI config access if required */
575 if (pe->state & EEH_PE_CFG_RESTRICTED)
576 pe->state |= EEH_PE_CFG_BLOCKED;
577 604
578 return NULL; 605 eeh_for_each_pe(root, pe)
606 if (!(pe->state & EEH_PE_REMOVED))
607 pe->state |= state;
579} 608}
609EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
580 610
581/** 611/**
582 * eeh_pe_state_mark - Mark specified state for PE and its associated device 612 * eeh_pe_mark_isolated
583 * @pe: EEH PE 613 * @pe: EEH PE
584 * 614 *
585 * EEH error affects the current PE and its child PEs. The function 615 * Record that a PE has been isolated by marking the PE and it's children as
586 * is used to mark appropriate state for the affected PEs and the 616 * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
587 * associated devices. 617 * as pci_channel_io_frozen.
588 */ 618 */
589void eeh_pe_state_mark(struct eeh_pe *pe, int state) 619void eeh_pe_mark_isolated(struct eeh_pe *root)
590{ 620{
591 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); 621 struct eeh_pe *pe;
622 struct eeh_dev *edev;
623 struct pci_dev *pdev;
624
625 eeh_pe_state_mark(root, EEH_PE_ISOLATED);
626 eeh_for_each_pe(root, pe) {
627 list_for_each_entry(edev, &pe->edevs, entry) {
628 pdev = eeh_dev_to_pci_dev(edev);
629 if (pdev)
630 pdev->error_state = pci_channel_io_frozen;
631 }
632 /* Block PCI config access if required */
633 if (pe->state & EEH_PE_CFG_RESTRICTED)
634 pe->state |= EEH_PE_CFG_BLOCKED;
635 }
592} 636}
593EXPORT_SYMBOL_GPL(eeh_pe_state_mark); 637EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
594 638
595static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag) 639static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
596{ 640{
@@ -671,28 +715,6 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
671 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); 715 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
672} 716}
673 717
674/**
675 * eeh_pe_state_mark_with_cfg - Mark PE state with unblocked config space
676 * @pe: PE
677 * @state: PE state to be set
678 *
679 * Set specified flag to PE and its child PEs. The PCI config space
680 * of some PEs is blocked automatically when EEH_PE_ISOLATED is set,
681 * which isn't needed in some situations. The function allows to set
682 * the specified flag to indicated PEs without blocking their PCI
683 * config space.
684 */
685void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state)
686{
687 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
688 if (!(state & EEH_PE_ISOLATED))
689 return;
690
691 /* Clear EEH_PE_CFG_BLOCKED, which might be set just now */
692 state = EEH_PE_CFG_BLOCKED;
693 eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
694}
695
696/* 718/*
697 * Some PCI bridges (e.g. PLX bridges) have primary/secondary 719 * Some PCI bridges (e.g. PLX bridges) have primary/secondary
698 * buses assigned explicitly by firmware, and we probably have 720 * buses assigned explicitly by firmware, and we probably have
@@ -945,7 +967,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
945 return pe->bus; 967 return pe->bus;
946 968
947 /* Retrieve the parent PCI bus of first (top) PCI device */ 969 /* Retrieve the parent PCI bus of first (top) PCI device */
948 edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); 970 edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
949 pdev = eeh_dev_to_pci_dev(edev); 971 pdev = eeh_dev_to_pci_dev(edev);
950 if (pdev) 972 if (pdev)
951 return pdev->bus; 973 return pdev->bus;
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e58c3f467db5..77decded1175 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -794,7 +794,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
794 lis r10,MSR_KERNEL@h 794 lis r10,MSR_KERNEL@h
795 ori r10,r10,MSR_KERNEL@l 795 ori r10,r10,MSR_KERNEL@l
796 bl transfer_to_handler_full 796 bl transfer_to_handler_full
797 .long nonrecoverable_exception 797 .long unrecoverable_exception
798 .long ret_from_except 798 .long ret_from_except
799#endif 799#endif
800 800
@@ -1297,7 +1297,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
1297 rlwinm r3,r3,0,0,30 1297 rlwinm r3,r3,0,0,30
1298 stw r3,_TRAP(r1) 1298 stw r3,_TRAP(r1)
12994: addi r3,r1,STACK_FRAME_OVERHEAD 12994: addi r3,r1,STACK_FRAME_OVERHEAD
1300 bl nonrecoverable_exception 1300 bl unrecoverable_exception
1301 /* shouldn't return */ 1301 /* shouldn't return */
1302 b 4b 1302 b 4b
1303 1303
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2206912ea4f0..7b1693adff2a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -171,7 +171,7 @@ system_call: /* label this so stack traces look sane */
171 * based on caller's run-mode / personality. 171 * based on caller's run-mode / personality.
172 */ 172 */
173 ld r11,SYS_CALL_TABLE@toc(2) 173 ld r11,SYS_CALL_TABLE@toc(2)
174 andi. r10,r10,_TIF_32BIT 174 andis. r10,r10,_TIF_32BIT@h
175 beq 15f 175 beq 15f
176 addi r11,r11,8 /* use 32-bit syscall entries */ 176 addi r11,r11,8 /* use 32-bit syscall entries */
177 clrldi r3,r3,32 177 clrldi r3,r3,32
@@ -386,10 +386,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
386 386
3874: /* Anything else left to do? */ 3874: /* Anything else left to do? */
388BEGIN_FTR_SECTION 388BEGIN_FTR_SECTION
389 lis r3,INIT_PPR@highest /* Set thread.ppr = 3 */ 389 lis r3,DEFAULT_PPR@highest /* Set default PPR */
390 ld r10,PACACURRENT(r13)
391 sldi r3,r3,32 /* bits 11-13 are used for ppr */ 390 sldi r3,r3,32 /* bits 11-13 are used for ppr */
392 std r3,TASKTHREADPPR(r10) 391 std r3,_PPR(r1)
393END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 392END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
394 393
395 andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) 394 andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -624,6 +623,10 @@ _GLOBAL(_switch)
624 623
625 addi r6,r4,-THREAD /* Convert THREAD to 'current' */ 624 addi r6,r4,-THREAD /* Convert THREAD to 'current' */
626 std r6,PACACURRENT(r13) /* Set new 'current' */ 625 std r6,PACACURRENT(r13) /* Set new 'current' */
626#if defined(CONFIG_STACKPROTECTOR)
627 ld r6, TASK_CANARY(r6)
628 std r6, PACA_CANARY(r13)
629#endif
627 630
628 ld r8,KSP(r4) /* new stack pointer */ 631 ld r8,KSP(r4) /* new stack pointer */
629#ifdef CONFIG_PPC_BOOK3S_64 632#ifdef CONFIG_PPC_BOOK3S_64
@@ -672,7 +675,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
672 675
673 isync 676 isync
674 slbie r6 677 slbie r6
678BEGIN_FTR_SECTION
675 slbie r6 /* Workaround POWER5 < DD2.1 issue */ 679 slbie r6 /* Workaround POWER5 < DD2.1 issue */
680END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
676 slbmte r7,r0 681 slbmte r7,r0
677 isync 682 isync
6782: 6832:
@@ -936,12 +941,6 @@ fast_exception_return:
936 andi. r0,r3,MSR_RI 941 andi. r0,r3,MSR_RI
937 beq- .Lunrecov_restore 942 beq- .Lunrecov_restore
938 943
939 /* Load PPR from thread struct before we clear MSR:RI */
940BEGIN_FTR_SECTION
941 ld r2,PACACURRENT(r13)
942 ld r2,TASKTHREADPPR(r2)
943END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
944
945 /* 944 /*
946 * Clear RI before restoring r13. If we are returning to 945 * Clear RI before restoring r13. If we are returning to
947 * userspace and we take an exception after restoring r13, 946 * userspace and we take an exception after restoring r13,
@@ -962,7 +961,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
962 andi. r0,r3,MSR_PR 961 andi. r0,r3,MSR_PR
963 beq 1f 962 beq 1f
964BEGIN_FTR_SECTION 963BEGIN_FTR_SECTION
965 mtspr SPRN_PPR,r2 /* Restore PPR */ 964 /* Restore PPR */
965 ld r2,_PPR(r1)
966 mtspr SPRN_PPR,r2
966END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 967END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
967 ACCOUNT_CPU_USER_EXIT(r13, r2, r4) 968 ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
968 REST_GPR(13, r1) 969 REST_GPR(13, r1)
@@ -1118,7 +1119,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return);
1118_GLOBAL(enter_rtas) 1119_GLOBAL(enter_rtas)
1119 mflr r0 1120 mflr r0
1120 std r0,16(r1) 1121 std r0,16(r1)
1121 stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */ 1122 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
1122 1123
1123 /* Because RTAS is running in 32b mode, it clobbers the high order half 1124 /* Because RTAS is running in 32b mode, it clobbers the high order half
1124 * of all registers that it saves. We therefore save those registers 1125 * of all registers that it saves. We therefore save those registers
@@ -1250,7 +1251,7 @@ rtas_restore_regs:
1250 ld r8,_DSISR(r1) 1251 ld r8,_DSISR(r1)
1251 mtdsisr r8 1252 mtdsisr r8
1252 1253
1253 addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */ 1254 addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
1254 ld r0,16(r1) /* get return address */ 1255 ld r0,16(r1) /* get return address */
1255 1256
1256 mtlr r0 1257 mtlr r0
@@ -1261,7 +1262,7 @@ rtas_restore_regs:
1261_GLOBAL(enter_prom) 1262_GLOBAL(enter_prom)
1262 mflr r0 1263 mflr r0
1263 std r0,16(r1) 1264 std r0,16(r1)
1264 stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ 1265 stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
1265 1266
1266 /* Because PROM is running in 32b mode, it clobbers the high order half 1267 /* Because PROM is running in 32b mode, it clobbers the high order half
1267 * of all registers that it saves. We therefore save those registers 1268 * of all registers that it saves. We therefore save those registers
@@ -1318,8 +1319,8 @@ _GLOBAL(enter_prom)
1318 REST_10GPRS(22, r1) 1319 REST_10GPRS(22, r1)
1319 ld r4,_CCR(r1) 1320 ld r4,_CCR(r1)
1320 mtcr r4 1321 mtcr r4
1321 1322
1322 addi r1,r1,PROM_FRAME_SIZE 1323 addi r1,r1,SWITCH_FRAME_SIZE
1323 ld r0,16(r1) 1324 ld r0,16(r1)
1324 mtlr r0 1325 mtlr r0
1325 blr 1326 blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2d8fc8c9da7a..89d32bb79d5e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -244,14 +244,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
244 SET_SCRATCH0(r13) /* save r13 */ 244 SET_SCRATCH0(r13) /* save r13 */
245 EXCEPTION_PROLOG_0(PACA_EXMC) 245 EXCEPTION_PROLOG_0(PACA_EXMC)
246BEGIN_FTR_SECTION 246BEGIN_FTR_SECTION
247 b machine_check_powernv_early 247 b machine_check_common_early
248FTR_SECTION_ELSE 248FTR_SECTION_ELSE
249 b machine_check_pSeries_0 249 b machine_check_pSeries_0
250ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 250ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
251EXC_REAL_END(machine_check, 0x200, 0x100) 251EXC_REAL_END(machine_check, 0x200, 0x100)
252EXC_VIRT_NONE(0x4200, 0x100) 252EXC_VIRT_NONE(0x4200, 0x100)
253TRAMP_REAL_BEGIN(machine_check_powernv_early) 253TRAMP_REAL_BEGIN(machine_check_common_early)
254BEGIN_FTR_SECTION
255 EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) 254 EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
256 /* 255 /*
257 * Register contents: 256 * Register contents:
@@ -305,7 +304,9 @@ BEGIN_FTR_SECTION
305 /* Save r9 through r13 from EXMC save area to stack frame. */ 304 /* Save r9 through r13 from EXMC save area to stack frame. */
306 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) 305 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
307 mfmsr r11 /* get MSR value */ 306 mfmsr r11 /* get MSR value */
307BEGIN_FTR_SECTION
308 ori r11,r11,MSR_ME /* turn on ME bit */ 308 ori r11,r11,MSR_ME /* turn on ME bit */
309END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
309 ori r11,r11,MSR_RI /* turn on RI bit */ 310 ori r11,r11,MSR_RI /* turn on RI bit */
310 LOAD_HANDLER(r12, machine_check_handle_early) 311 LOAD_HANDLER(r12, machine_check_handle_early)
3111: mtspr SPRN_SRR0,r12 3121: mtspr SPRN_SRR0,r12
@@ -324,13 +325,15 @@ BEGIN_FTR_SECTION
324 andc r11,r11,r10 /* Turn off MSR_ME */ 325 andc r11,r11,r10 /* Turn off MSR_ME */
325 b 1b 326 b 1b
326 b . /* prevent speculative execution */ 327 b . /* prevent speculative execution */
327END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
328 328
329TRAMP_REAL_BEGIN(machine_check_pSeries) 329TRAMP_REAL_BEGIN(machine_check_pSeries)
330 .globl machine_check_fwnmi 330 .globl machine_check_fwnmi
331machine_check_fwnmi: 331machine_check_fwnmi:
332 SET_SCRATCH0(r13) /* save r13 */ 332 SET_SCRATCH0(r13) /* save r13 */
333 EXCEPTION_PROLOG_0(PACA_EXMC) 333 EXCEPTION_PROLOG_0(PACA_EXMC)
334BEGIN_FTR_SECTION
335 b machine_check_common_early
336END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
334machine_check_pSeries_0: 337machine_check_pSeries_0:
335 EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) 338 EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
336 /* 339 /*
@@ -440,6 +443,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
440 bl machine_check_early 443 bl machine_check_early
441 std r3,RESULT(r1) /* Save result */ 444 std r3,RESULT(r1) /* Save result */
442 ld r12,_MSR(r1) 445 ld r12,_MSR(r1)
446BEGIN_FTR_SECTION
447 b 4f
448END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
443 449
444#ifdef CONFIG_PPC_P7_NAP 450#ifdef CONFIG_PPC_P7_NAP
445 /* 451 /*
@@ -463,11 +469,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
463 */ 469 */
464 rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ 470 rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
465 beq 5f 471 beq 5f
466 andi. r11,r12,MSR_PR /* See if coming from user. */ 4724: andi. r11,r12,MSR_PR /* See if coming from user. */
467 bne 9f /* continue in V mode if we are. */ 473 bne 9f /* continue in V mode if we are. */
468 474
4695: 4755:
470#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 476#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
477BEGIN_FTR_SECTION
471 /* 478 /*
472 * We are coming from kernel context. Check if we are coming from 479 * We are coming from kernel context. Check if we are coming from
473 * guest. if yes, then we can continue. We will fall through 480 * guest. if yes, then we can continue. We will fall through
@@ -476,6 +483,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
476 lbz r11,HSTATE_IN_GUEST(r13) 483 lbz r11,HSTATE_IN_GUEST(r13)
477 cmpwi r11,0 /* Check if coming from guest */ 484 cmpwi r11,0 /* Check if coming from guest */
478 bne 9f /* continue if we are. */ 485 bne 9f /* continue if we are. */
486END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
479#endif 487#endif
480 /* 488 /*
481 * At this point we are not sure about what context we come from. 489 * At this point we are not sure about what context we come from.
@@ -510,6 +518,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
510 cmpdi r3,0 /* see if we handled MCE successfully */ 518 cmpdi r3,0 /* see if we handled MCE successfully */
511 519
512 beq 1b /* if !handled then panic */ 520 beq 1b /* if !handled then panic */
521BEGIN_FTR_SECTION
513 /* 522 /*
514 * Return from MC interrupt. 523 * Return from MC interrupt.
515 * Queue up the MCE event so that we can log it later, while 524 * Queue up the MCE event so that we can log it later, while
@@ -518,10 +527,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
518 bl machine_check_queue_event 527 bl machine_check_queue_event
519 MACHINE_CHECK_HANDLER_WINDUP 528 MACHINE_CHECK_HANDLER_WINDUP
520 RFI_TO_USER_OR_KERNEL 529 RFI_TO_USER_OR_KERNEL
530FTR_SECTION_ELSE
531 /*
532 * pSeries: Return from MC interrupt. Before that stay on emergency
533 * stack and call machine_check_exception to log the MCE event.
534 */
535 LOAD_HANDLER(r10,mce_return)
536 mtspr SPRN_SRR0,r10
537 ld r10,PACAKMSR(r13)
538 mtspr SPRN_SRR1,r10
539 RFI_TO_KERNEL
540 b .
541ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
5219: 5429:
522 /* Deliver the machine check to host kernel in V mode. */ 543 /* Deliver the machine check to host kernel in V mode. */
523 MACHINE_CHECK_HANDLER_WINDUP 544 MACHINE_CHECK_HANDLER_WINDUP
524 b machine_check_pSeries 545 SET_SCRATCH0(r13) /* save r13 */
546 EXCEPTION_PROLOG_0(PACA_EXMC)
547 b machine_check_pSeries_0
525 548
526EXC_COMMON_BEGIN(unrecover_mce) 549EXC_COMMON_BEGIN(unrecover_mce)
527 /* Invoke machine_check_exception to print MCE event and panic. */ 550 /* Invoke machine_check_exception to print MCE event and panic. */
@@ -535,6 +558,13 @@ EXC_COMMON_BEGIN(unrecover_mce)
535 bl unrecoverable_exception 558 bl unrecoverable_exception
536 b 1b 559 b 1b
537 560
561EXC_COMMON_BEGIN(mce_return)
562 /* Invoke machine_check_exception to print MCE event and return. */
563 addi r3,r1,STACK_FRAME_OVERHEAD
564 bl machine_check_exception
565 MACHINE_CHECK_HANDLER_WINDUP
566 RFI_TO_KERNEL
567 b .
538 568
539EXC_REAL(data_access, 0x300, 0x80) 569EXC_REAL(data_access, 0x300, 0x80)
540EXC_VIRT(data_access, 0x4300, 0x80, 0x300) 570EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
@@ -566,28 +596,36 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
566 596
567 597
568EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) 598EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
569 SET_SCRATCH0(r13) 599EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
570 EXCEPTION_PROLOG_0(PACA_EXSLB)
571 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
572 mr r12,r3 /* save r3 */
573 mfspr r3,SPRN_DAR
574 mfspr r11,SPRN_SRR1
575 crset 4*cr6+eq
576 BRANCH_TO_COMMON(r10, slb_miss_common)
577EXC_REAL_END(data_access_slb, 0x380, 0x80) 600EXC_REAL_END(data_access_slb, 0x380, 0x80)
578 601
579EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) 602EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
580 SET_SCRATCH0(r13) 603EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
581 EXCEPTION_PROLOG_0(PACA_EXSLB)
582 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
583 mr r12,r3 /* save r3 */
584 mfspr r3,SPRN_DAR
585 mfspr r11,SPRN_SRR1
586 crset 4*cr6+eq
587 BRANCH_TO_COMMON(r10, slb_miss_common)
588EXC_VIRT_END(data_access_slb, 0x4380, 0x80) 604EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
605
589TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) 606TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
590 607
608EXC_COMMON_BEGIN(data_access_slb_common)
609 mfspr r10,SPRN_DAR
610 std r10,PACA_EXSLB+EX_DAR(r13)
611 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
612 ld r4,PACA_EXSLB+EX_DAR(r13)
613 std r4,_DAR(r1)
614 addi r3,r1,STACK_FRAME_OVERHEAD
615 bl do_slb_fault
616 cmpdi r3,0
617 bne- 1f
618 b fast_exception_return
6191: /* Error case */
620 std r3,RESULT(r1)
621 bl save_nvgprs
622 RECONCILE_IRQ_STATE(r10, r11)
623 ld r4,_DAR(r1)
624 ld r5,RESULT(r1)
625 addi r3,r1,STACK_FRAME_OVERHEAD
626 bl do_bad_slb_fault
627 b ret_from_except
628
591 629
592EXC_REAL(instruction_access, 0x400, 0x80) 630EXC_REAL(instruction_access, 0x400, 0x80)
593EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) 631EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
@@ -610,160 +648,34 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
610 648
611 649
612EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) 650EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
613 SET_SCRATCH0(r13) 651EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480);
614 EXCEPTION_PROLOG_0(PACA_EXSLB)
615 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
616 mr r12,r3 /* save r3 */
617 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
618 mfspr r11,SPRN_SRR1
619 crclr 4*cr6+eq
620 BRANCH_TO_COMMON(r10, slb_miss_common)
621EXC_REAL_END(instruction_access_slb, 0x480, 0x80) 652EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
622 653
623EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) 654EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
624 SET_SCRATCH0(r13) 655EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480);
625 EXCEPTION_PROLOG_0(PACA_EXSLB)
626 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
627 mr r12,r3 /* save r3 */
628 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
629 mfspr r11,SPRN_SRR1
630 crclr 4*cr6+eq
631 BRANCH_TO_COMMON(r10, slb_miss_common)
632EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) 656EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
633TRAMP_KVM(PACA_EXSLB, 0x480)
634
635
636/*
637 * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
638 * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
639 */
640EXC_COMMON_BEGIN(slb_miss_common)
641 /*
642 * r13 points to the PACA, r9 contains the saved CR,
643 * r12 contains the saved r3,
644 * r11 contain the saved SRR1, SRR0 is still ready for return
645 * r3 has the faulting address
646 * r9 - r13 are saved in paca->exslb.
647 * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
648 * We assume we aren't going to take any exceptions during this
649 * procedure.
650 */
651 mflr r10
652 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
653 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
654
655 andi. r9,r11,MSR_PR // Check for exception from userspace
656 cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
657
658 /*
659 * Test MSR_RI before calling slb_allocate_realmode, because the
660 * MSR in r11 gets clobbered. However we still want to allocate
661 * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
662 * recursive SLB faults. So use cr5 for this, which is preserved.
663 */
664 andi. r11,r11,MSR_RI /* check for unrecoverable exception */
665 cmpdi cr5,r11,MSR_RI
666
667 crset 4*cr0+eq
668#ifdef CONFIG_PPC_BOOK3S_64
669BEGIN_MMU_FTR_SECTION
670 bl slb_allocate
671END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
672#endif
673
674 ld r10,PACA_EXSLB+EX_LR(r13)
675 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
676 mtlr r10
677
678 /*
679 * Large address, check whether we have to allocate new contexts.
680 */
681 beq- 8f
682 657
683 bne- cr5,2f /* if unrecoverable exception, oops */ 658TRAMP_KVM(PACA_EXSLB, 0x480)
684
685 /* All done -- return from exception. */
686
687 bne cr4,1f /* returning to kernel */
688
689 mtcrf 0x80,r9
690 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
691 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
692 mtcrf 0x02,r9 /* I/D indication is in cr6 */
693 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
694
695 RESTORE_CTR(r9, PACA_EXSLB)
696 RESTORE_PPR_PACA(PACA_EXSLB, r9)
697 mr r3,r12
698 ld r9,PACA_EXSLB+EX_R9(r13)
699 ld r10,PACA_EXSLB+EX_R10(r13)
700 ld r11,PACA_EXSLB+EX_R11(r13)
701 ld r12,PACA_EXSLB+EX_R12(r13)
702 ld r13,PACA_EXSLB+EX_R13(r13)
703 RFI_TO_USER
704 b . /* prevent speculative execution */
7051:
706 mtcrf 0x80,r9
707 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
708 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
709 mtcrf 0x02,r9 /* I/D indication is in cr6 */
710 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
711
712 RESTORE_CTR(r9, PACA_EXSLB)
713 RESTORE_PPR_PACA(PACA_EXSLB, r9)
714 mr r3,r12
715 ld r9,PACA_EXSLB+EX_R9(r13)
716 ld r10,PACA_EXSLB+EX_R10(r13)
717 ld r11,PACA_EXSLB+EX_R11(r13)
718 ld r12,PACA_EXSLB+EX_R12(r13)
719 ld r13,PACA_EXSLB+EX_R13(r13)
720 RFI_TO_KERNEL
721 b . /* prevent speculative execution */
722
723
7242: std r3,PACA_EXSLB+EX_DAR(r13)
725 mr r3,r12
726 mfspr r11,SPRN_SRR0
727 mfspr r12,SPRN_SRR1
728 LOAD_HANDLER(r10,unrecov_slb)
729 mtspr SPRN_SRR0,r10
730 ld r10,PACAKMSR(r13)
731 mtspr SPRN_SRR1,r10
732 RFI_TO_KERNEL
733 b .
734
7358: std r3,PACA_EXSLB+EX_DAR(r13)
736 mr r3,r12
737 mfspr r11,SPRN_SRR0
738 mfspr r12,SPRN_SRR1
739 LOAD_HANDLER(r10, large_addr_slb)
740 mtspr SPRN_SRR0,r10
741 ld r10,PACAKMSR(r13)
742 mtspr SPRN_SRR1,r10
743 RFI_TO_KERNEL
744 b .
745 659
746EXC_COMMON_BEGIN(unrecov_slb) 660EXC_COMMON_BEGIN(instruction_access_slb_common)
747 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) 661 EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
748 RECONCILE_IRQ_STATE(r10, r11) 662 ld r4,_NIP(r1)
663 addi r3,r1,STACK_FRAME_OVERHEAD
664 bl do_slb_fault
665 cmpdi r3,0
666 bne- 1f
667 b fast_exception_return
6681: /* Error case */
669 std r3,RESULT(r1)
749 bl save_nvgprs 670 bl save_nvgprs
7501: addi r3,r1,STACK_FRAME_OVERHEAD
751 bl unrecoverable_exception
752 b 1b
753
754EXC_COMMON_BEGIN(large_addr_slb)
755 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
756 RECONCILE_IRQ_STATE(r10, r11) 671 RECONCILE_IRQ_STATE(r10, r11)
757 ld r3, PACA_EXSLB+EX_DAR(r13) 672 ld r4,_NIP(r1)
758 std r3, _DAR(r1) 673 ld r5,RESULT(r1)
759 beq cr6, 2f 674 addi r3,r1,STACK_FRAME_OVERHEAD
760 li r10, 0x481 /* fix trap number for I-SLB miss */ 675 bl do_bad_slb_fault
761 std r10, _TRAP(r1)
7622: bl save_nvgprs
763 addi r3, r1, STACK_FRAME_OVERHEAD
764 bl slb_miss_large_addr
765 b ret_from_except 676 b ret_from_except
766 677
678
767EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) 679EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
768 .globl hardware_interrupt_hv; 680 .globl hardware_interrupt_hv;
769hardware_interrupt_hv: 681hardware_interrupt_hv:
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index a711d22339ea..761b28b1427d 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1444,8 +1444,8 @@ static ssize_t fadump_register_store(struct kobject *kobj,
1444 break; 1444 break;
1445 case 1: 1445 case 1:
1446 if (fw_dump.dump_registered == 1) { 1446 if (fw_dump.dump_registered == 1) {
1447 ret = -EEXIST; 1447 /* Un-register Firmware-assisted dump */
1448 goto unlock_out; 1448 fadump_unregister_dump(&fdm);
1449 } 1449 }
1450 /* Register Firmware-assisted dump */ 1450 /* Register Firmware-assisted dump */
1451 ret = register_fadump(); 1451 ret = register_fadump();
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 6582f824d620..134a573a9f2d 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -642,7 +642,7 @@ DTLBMissIMMR:
642 mtspr SPRN_MD_TWC, r10 642 mtspr SPRN_MD_TWC, r10
643 mfspr r10, SPRN_IMMR /* Get current IMMR */ 643 mfspr r10, SPRN_IMMR /* Get current IMMR */
644 rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ 644 rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
645 ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ 645 ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
646 _PAGE_PRESENT | _PAGE_NO_CACHE 646 _PAGE_PRESENT | _PAGE_NO_CACHE
647 mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ 647 mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
648 648
@@ -660,7 +660,7 @@ DTLBMissLinear:
660 li r11, MD_PS8MEG | MD_SVALID | M_APG2 660 li r11, MD_PS8MEG | MD_SVALID | M_APG2
661 mtspr SPRN_MD_TWC, r11 661 mtspr SPRN_MD_TWC, r11
662 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ 662 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
663 ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ 663 ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
664 _PAGE_PRESENT 664 _PAGE_PRESENT
665 mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ 665 mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
666 666
@@ -679,7 +679,7 @@ ITLBMissLinear:
679 li r11, MI_PS8MEG | MI_SVALID | M_APG2 679 li r11, MI_PS8MEG | MI_SVALID | M_APG2
680 mtspr SPRN_MI_TWC, r11 680 mtspr SPRN_MI_TWC, r11
681 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ 681 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
682 ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ 682 ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
683 _PAGE_PRESENT 683 _PAGE_PRESENT
684 mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ 684 mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
685 685
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index aa9f1b8261db..7e89d02a84e1 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -153,10 +153,10 @@ static const struct ppc_pci_io iowa_pci_io = {
153 153
154#ifdef CONFIG_PPC_INDIRECT_MMIO 154#ifdef CONFIG_PPC_INDIRECT_MMIO
155static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, 155static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
156 unsigned long flags, void *caller) 156 pgprot_t prot, void *caller)
157{ 157{
158 struct iowa_bus *bus; 158 struct iowa_bus *bus;
159 void __iomem *res = __ioremap_caller(addr, size, flags, caller); 159 void __iomem *res = __ioremap_caller(addr, size, prot, caller);
160 int busno; 160 int busno;
161 161
162 bus = iowa_pci_find(0, (unsigned long)addr); 162 bus = iowa_pci_find(0, (unsigned long)addr);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 19b4c628f3be..f0dc680e659a 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -785,9 +785,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
785 785
786 vaddr = page_address(page) + offset; 786 vaddr = page_address(page) + offset;
787 uaddr = (unsigned long)vaddr; 787 uaddr = (unsigned long)vaddr;
788 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
789 788
790 if (tbl) { 789 if (tbl) {
790 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
791 align = 0; 791 align = 0;
792 if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE && 792 if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
793 ((unsigned long)vaddr & ~PAGE_MASK) == 0) 793 ((unsigned long)vaddr & ~PAGE_MASK) == 0)
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 1df6c74aa731..fda3ae48480c 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -110,14 +110,14 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
110 size = 0x10000; 110 size = 0x10000;
111 111
112 __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, 112 __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
113 size, pgprot_val(pgprot_noncached(__pgprot(0)))); 113 size, pgprot_noncached(PAGE_KERNEL));
114 return; 114 return;
115 115
116inval_range: 116inval_range:
117 printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " 117 printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
118 "mapping 64k\n"); 118 "mapping 64k\n");
119 __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, 119 __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
120 0x10000, pgprot_val(pgprot_noncached(__pgprot(0)))); 120 0x10000, pgprot_noncached(PAGE_KERNEL));
121} 121}
122 122
123 123
@@ -253,7 +253,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
253 */ 253 */
254 isa_io_base = ISA_IO_BASE; 254 isa_io_base = ISA_IO_BASE;
255 __ioremap_at(pbase, (void *)ISA_IO_BASE, 255 __ioremap_at(pbase, (void *)ISA_IO_BASE,
256 size, pgprot_val(pgprot_noncached(__pgprot(0)))); 256 size, pgprot_noncached(PAGE_KERNEL));
257 257
258 pr_debug("ISA: Non-PCI bridge is %pOF\n", np); 258 pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
259} 259}
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 35e240a0a408..59c578f865aa 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -24,6 +24,7 @@
24#include <asm/processor.h> 24#include <asm/processor.h>
25#include <asm/machdep.h> 25#include <asm/machdep.h>
26#include <asm/debug.h> 26#include <asm/debug.h>
27#include <asm/code-patching.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28 29
29/* 30/*
@@ -144,7 +145,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
144 if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0) 145 if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
145 return 0; 146 return 0;
146 147
147 if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) 148 if (*(u32 *)regs->nip == BREAK_INSTR)
148 regs->nip += BREAK_INSTR_SIZE; 149 regs->nip += BREAK_INSTR_SIZE;
149 150
150 return 1; 151 return 1;
@@ -441,16 +442,42 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
441 return -1; 442 return -1;
442} 443}
443 444
445int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
446{
447 int err;
448 unsigned int instr;
449 unsigned int *addr = (unsigned int *)bpt->bpt_addr;
450
451 err = probe_kernel_address(addr, instr);
452 if (err)
453 return err;
454
455 err = patch_instruction(addr, BREAK_INSTR);
456 if (err)
457 return -EFAULT;
458
459 *(unsigned int *)bpt->saved_instr = instr;
460
461 return 0;
462}
463
464int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
465{
466 int err;
467 unsigned int instr = *(unsigned int *)bpt->saved_instr;
468 unsigned int *addr = (unsigned int *)bpt->bpt_addr;
469
470 err = patch_instruction(addr, instr);
471 if (err)
472 return -EFAULT;
473
474 return 0;
475}
476
444/* 477/*
445 * Global data 478 * Global data
446 */ 479 */
447struct kgdb_arch arch_kgdb_ops = { 480struct kgdb_arch arch_kgdb_ops;
448#ifdef __LITTLE_ENDIAN__
449 .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d},
450#else
451 .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
452#endif
453};
454 481
455static int kgdb_not_implemented(struct pt_regs *regs) 482static int kgdb_not_implemented(struct pt_regs *regs)
456{ 483{
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index efdd16a79075..bd933a75f0bc 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -488,10 +488,11 @@ long machine_check_early(struct pt_regs *regs)
488{ 488{
489 long handled = 0; 489 long handled = 0;
490 490
491 __this_cpu_inc(irq_stat.mce_exceptions); 491 /*
492 492 * See if platform is capable of handling machine check.
493 if (cur_cpu_spec && cur_cpu_spec->machine_check_early) 493 */
494 handled = cur_cpu_spec->machine_check_early(regs); 494 if (ppc_md.machine_check_early)
495 handled = ppc_md.machine_check_early(regs);
495 return handled; 496 return handled;
496} 497}
497 498
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 3497c8329c1d..6b800eec31f2 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -60,7 +60,7 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
60 60
61/* flush SLBs and reload */ 61/* flush SLBs and reload */
62#ifdef CONFIG_PPC_BOOK3S_64 62#ifdef CONFIG_PPC_BOOK3S_64
63static void flush_and_reload_slb(void) 63void flush_and_reload_slb(void)
64{ 64{
65 /* Invalidate all SLBs */ 65 /* Invalidate all SLBs */
66 slb_flush_all_realmode(); 66 slb_flush_all_realmode();
@@ -89,6 +89,13 @@ static void flush_and_reload_slb(void)
89 89
90static void flush_erat(void) 90static void flush_erat(void)
91{ 91{
92#ifdef CONFIG_PPC_BOOK3S_64
93 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
94 flush_and_reload_slb();
95 return;
96 }
97#endif
98 /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
92 asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); 99 asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
93} 100}
94 101
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 77371c9ef3d8..2d861a36662e 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -74,6 +74,14 @@ int module_finalize(const Elf_Ehdr *hdr,
74 (void *)sect->sh_addr + sect->sh_size); 74 (void *)sect->sh_addr + sect->sh_size);
75#endif /* CONFIG_PPC64 */ 75#endif /* CONFIG_PPC64 */
76 76
77#ifdef PPC64_ELF_ABI_v1
78 sect = find_section(hdr, sechdrs, ".opd");
79 if (sect != NULL) {
80 me->arch.start_opd = sect->sh_addr;
81 me->arch.end_opd = sect->sh_addr + sect->sh_size;
82 }
83#endif /* PPC64_ELF_ABI_v1 */
84
77#ifdef CONFIG_PPC_BARRIER_NOSPEC 85#ifdef CONFIG_PPC_BARRIER_NOSPEC
78 sect = find_section(hdr, sechdrs, "__spec_barrier_fixup"); 86 sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
79 if (sect != NULL) 87 if (sect != NULL)
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index b8d61e019d06..8661eea78503 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -360,11 +360,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
360 else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) 360 else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
361 dedotify_versions((void *)hdr + sechdrs[i].sh_offset, 361 dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
362 sechdrs[i].sh_size); 362 sechdrs[i].sh_size);
363 else if (!strcmp(secstrings + sechdrs[i].sh_name, ".opd")) {
364 me->arch.start_opd = sechdrs[i].sh_addr;
365 me->arch.end_opd = sechdrs[i].sh_addr +
366 sechdrs[i].sh_size;
367 }
368 363
369 /* We don't handle .init for the moment: rename to _init */ 364 /* We don't handle .init for the moment: rename to _init */
370 while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) 365 while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
@@ -685,7 +680,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
685 680
686 case R_PPC64_REL32: 681 case R_PPC64_REL32:
687 /* 32 bits relative (used by relative exception tables) */ 682 /* 32 bits relative (used by relative exception tables) */
688 *(u32 *)location = value - (unsigned long)location; 683 /* Convert value to relative */
684 value -= (unsigned long)location;
685 if (value + 0x80000000 > 0xffffffff) {
686 pr_err("%s: REL32 %li out of range!\n",
687 me->name, (long int)value);
688 return -ENOEXEC;
689 }
690 *(u32 *)location = value;
689 break; 691 break;
690 692
691 case R_PPC64_TOCSAVE: 693 case R_PPC64_TOCSAVE:
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0ee3e6d50f28..913bfca09c4f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -198,7 +198,7 @@ void __init allocate_paca_ptrs(void)
198 paca_nr_cpu_ids = nr_cpu_ids; 198 paca_nr_cpu_ids = nr_cpu_ids;
199 199
200 paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; 200 paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
201 paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0)); 201 paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, SMP_CACHE_BYTES));
202 memset(paca_ptrs, 0x88, paca_ptrs_size); 202 memset(paca_ptrs, 0x88, paca_ptrs_size);
203} 203}
204 204
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d63b488d34d7..d3f04f2d8249 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -10,14 +10,13 @@
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/bootmem.h> 13#include <linux/memblock.h>
14#include <linux/syscalls.h> 14#include <linux/syscalls.h>
15#include <linux/irq.h> 15#include <linux/irq.h>
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/of.h> 17#include <linux/of.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/export.h> 19#include <linux/export.h>
20#include <linux/syscalls.h>
21 20
22#include <asm/processor.h> 21#include <asm/processor.h>
23#include <asm/io.h> 22#include <asm/io.h>
@@ -204,7 +203,8 @@ pci_create_OF_bus_map(void)
204 struct property* of_prop; 203 struct property* of_prop;
205 struct device_node *dn; 204 struct device_node *dn;
206 205
207 of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0); 206 of_prop = memblock_alloc(sizeof(struct property) + 256,
207 SMP_CACHE_BYTES);
208 dn = of_find_node_by_path("/"); 208 dn = of_find_node_by_path("/");
209 if (dn) { 209 if (dn) {
210 memset(of_prop, -1, sizeof(struct property) + 256); 210 memset(of_prop, -1, sizeof(struct property) + 256);
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index dff28f903512..9d8c10d55407 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -159,7 +159,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
159 159
160 /* Establish the mapping */ 160 /* Establish the mapping */
161 if (__ioremap_at(phys_page, area->addr, size_page, 161 if (__ioremap_at(phys_page, area->addr, size_page,
162 pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL) 162 pgprot_noncached(PAGE_KERNEL)) == NULL)
163 return -ENOMEM; 163 return -ENOMEM;
164 164
165 /* Fixup hose IO resource */ 165 /* Fixup hose IO resource */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bb6ac471a784..4d5322cfad25 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -43,6 +43,7 @@
43#include <linux/uaccess.h> 43#include <linux/uaccess.h>
44#include <linux/elf-randomize.h> 44#include <linux/elf-randomize.h>
45#include <linux/pkeys.h> 45#include <linux/pkeys.h>
46#include <linux/seq_buf.h>
46 47
47#include <asm/pgtable.h> 48#include <asm/pgtable.h>
48#include <asm/io.h> 49#include <asm/io.h>
@@ -65,6 +66,7 @@
65#include <asm/livepatch.h> 66#include <asm/livepatch.h>
66#include <asm/cpu_has_feature.h> 67#include <asm/cpu_has_feature.h>
67#include <asm/asm-prototypes.h> 68#include <asm/asm-prototypes.h>
69#include <asm/stacktrace.h>
68 70
69#include <linux/kprobes.h> 71#include <linux/kprobes.h>
70#include <linux/kdebug.h> 72#include <linux/kdebug.h>
@@ -102,24 +104,18 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
102 } 104 }
103} 105}
104 106
105static inline bool msr_tm_active(unsigned long msr)
106{
107 return MSR_TM_ACTIVE(msr);
108}
109
110static bool tm_active_with_fp(struct task_struct *tsk) 107static bool tm_active_with_fp(struct task_struct *tsk)
111{ 108{
112 return msr_tm_active(tsk->thread.regs->msr) && 109 return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
113 (tsk->thread.ckpt_regs.msr & MSR_FP); 110 (tsk->thread.ckpt_regs.msr & MSR_FP);
114} 111}
115 112
116static bool tm_active_with_altivec(struct task_struct *tsk) 113static bool tm_active_with_altivec(struct task_struct *tsk)
117{ 114{
118 return msr_tm_active(tsk->thread.regs->msr) && 115 return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
119 (tsk->thread.ckpt_regs.msr & MSR_VEC); 116 (tsk->thread.ckpt_regs.msr & MSR_VEC);
120} 117}
121#else 118#else
122static inline bool msr_tm_active(unsigned long msr) { return false; }
123static inline void check_if_tm_restore_required(struct task_struct *tsk) { } 119static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
124static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; } 120static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
125static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; } 121static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
@@ -247,7 +243,8 @@ void enable_kernel_fp(void)
247 * giveup as this would save to the 'live' structure not the 243 * giveup as this would save to the 'live' structure not the
248 * checkpointed structure. 244 * checkpointed structure.
249 */ 245 */
250 if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) 246 if (!MSR_TM_ACTIVE(cpumsr) &&
247 MSR_TM_ACTIVE(current->thread.regs->msr))
251 return; 248 return;
252 __giveup_fpu(current); 249 __giveup_fpu(current);
253 } 250 }
@@ -311,7 +308,8 @@ void enable_kernel_altivec(void)
311 * giveup as this would save to the 'live' structure not the 308 * giveup as this would save to the 'live' structure not the
312 * checkpointed structure. 309 * checkpointed structure.
313 */ 310 */
314 if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) 311 if (!MSR_TM_ACTIVE(cpumsr) &&
312 MSR_TM_ACTIVE(current->thread.regs->msr))
315 return; 313 return;
316 __giveup_altivec(current); 314 __giveup_altivec(current);
317 } 315 }
@@ -397,7 +395,8 @@ void enable_kernel_vsx(void)
397 * giveup as this would save to the 'live' structure not the 395 * giveup as this would save to the 'live' structure not the
398 * checkpointed structure. 396 * checkpointed structure.
399 */ 397 */
400 if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) 398 if (!MSR_TM_ACTIVE(cpumsr) &&
399 MSR_TM_ACTIVE(current->thread.regs->msr))
401 return; 400 return;
402 __giveup_vsx(current); 401 __giveup_vsx(current);
403 } 402 }
@@ -530,7 +529,7 @@ void restore_math(struct pt_regs *regs)
530{ 529{
531 unsigned long msr; 530 unsigned long msr;
532 531
533 if (!msr_tm_active(regs->msr) && 532 if (!MSR_TM_ACTIVE(regs->msr) &&
534 !current->thread.load_fp && !loadvec(current->thread)) 533 !current->thread.load_fp && !loadvec(current->thread))
535 return; 534 return;
536 535
@@ -620,8 +619,6 @@ void do_send_trap(struct pt_regs *regs, unsigned long address,
620void do_break (struct pt_regs *regs, unsigned long address, 619void do_break (struct pt_regs *regs, unsigned long address,
621 unsigned long error_code) 620 unsigned long error_code)
622{ 621{
623 siginfo_t info;
624
625 current->thread.trap_nr = TRAP_HWBKPT; 622 current->thread.trap_nr = TRAP_HWBKPT;
626 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, 623 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
627 11, SIGSEGV) == NOTIFY_STOP) 624 11, SIGSEGV) == NOTIFY_STOP)
@@ -634,12 +631,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
634 hw_breakpoint_disable(); 631 hw_breakpoint_disable();
635 632
636 /* Deliver the signal to userspace */ 633 /* Deliver the signal to userspace */
637 clear_siginfo(&info); 634 force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address, current);
638 info.si_signo = SIGTRAP;
639 info.si_errno = 0;
640 info.si_code = TRAP_HWBKPT;
641 info.si_addr = (void __user *)address;
642 force_sig_info(SIGTRAP, &info, current);
643} 635}
644#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ 636#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
645 637
@@ -1259,17 +1251,16 @@ struct task_struct *__switch_to(struct task_struct *prev,
1259 return last; 1251 return last;
1260} 1252}
1261 1253
1262static int instructions_to_print = 16; 1254#define NR_INSN_TO_PRINT 16
1263 1255
1264static void show_instructions(struct pt_regs *regs) 1256static void show_instructions(struct pt_regs *regs)
1265{ 1257{
1266 int i; 1258 int i;
1267 unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 * 1259 unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
1268 sizeof(int));
1269 1260
1270 printk("Instruction dump:"); 1261 printk("Instruction dump:");
1271 1262
1272 for (i = 0; i < instructions_to_print; i++) { 1263 for (i = 0; i < NR_INSN_TO_PRINT; i++) {
1273 int instr; 1264 int instr;
1274 1265
1275 if (!(i % 8)) 1266 if (!(i % 8))
@@ -1284,7 +1275,7 @@ static void show_instructions(struct pt_regs *regs)
1284#endif 1275#endif
1285 1276
1286 if (!__kernel_text_address(pc) || 1277 if (!__kernel_text_address(pc) ||
1287 probe_kernel_address((unsigned int __user *)pc, instr)) { 1278 probe_kernel_address((const void *)pc, instr)) {
1288 pr_cont("XXXXXXXX "); 1279 pr_cont("XXXXXXXX ");
1289 } else { 1280 } else {
1290 if (regs->nip == pc) 1281 if (regs->nip == pc)
@@ -1302,43 +1293,43 @@ static void show_instructions(struct pt_regs *regs)
1302void show_user_instructions(struct pt_regs *regs) 1293void show_user_instructions(struct pt_regs *regs)
1303{ 1294{
1304 unsigned long pc; 1295 unsigned long pc;
1305 int i; 1296 int n = NR_INSN_TO_PRINT;
1297 struct seq_buf s;
1298 char buf[96]; /* enough for 8 times 9 + 2 chars */
1306 1299
1307 pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int)); 1300 pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
1308 1301
1309 /* 1302 /*
1310 * Make sure the NIP points at userspace, not kernel text/data or 1303 * Make sure the NIP points at userspace, not kernel text/data or
1311 * elsewhere. 1304 * elsewhere.
1312 */ 1305 */
1313 if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) { 1306 if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
1314 pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", 1307 pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
1315 current->comm, current->pid); 1308 current->comm, current->pid);
1316 return; 1309 return;
1317 } 1310 }
1318 1311
1319 pr_info("%s[%d]: code: ", current->comm, current->pid); 1312 seq_buf_init(&s, buf, sizeof(buf));
1320 1313
1321 for (i = 0; i < instructions_to_print; i++) { 1314 while (n) {
1322 int instr; 1315 int i;
1323 1316
1324 if (!(i % 8) && (i > 0)) { 1317 seq_buf_clear(&s);
1325 pr_cont("\n");
1326 pr_info("%s[%d]: code: ", current->comm, current->pid);
1327 }
1328 1318
1329 if (probe_kernel_address((unsigned int __user *)pc, instr)) { 1319 for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
1330 pr_cont("XXXXXXXX "); 1320 int instr;
1331 } else { 1321
1332 if (regs->nip == pc) 1322 if (probe_kernel_address((const void *)pc, instr)) {
1333 pr_cont("<%08x> ", instr); 1323 seq_buf_printf(&s, "XXXXXXXX ");
1334 else 1324 continue;
1335 pr_cont("%08x ", instr); 1325 }
1326 seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
1336 } 1327 }
1337 1328
1338 pc += sizeof(int); 1329 if (!seq_buf_has_overflowed(&s))
1330 pr_info("%s[%d]: code: %s\n", current->comm,
1331 current->pid, s.buffer);
1339 } 1332 }
1340
1341 pr_cont("\n");
1342} 1333}
1343 1334
1344struct regbit { 1335struct regbit {
@@ -1492,6 +1483,15 @@ void flush_thread(void)
1492#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 1483#endif /* CONFIG_HAVE_HW_BREAKPOINT */
1493} 1484}
1494 1485
1486#ifdef CONFIG_PPC_BOOK3S_64
1487void arch_setup_new_exec(void)
1488{
1489 if (radix_enabled())
1490 return;
1491 hash__setup_new_exec();
1492}
1493#endif
1494
1495int set_thread_uses_vas(void) 1495int set_thread_uses_vas(void)
1496{ 1496{
1497#ifdef CONFIG_PPC_BOOK3S_64 1497#ifdef CONFIG_PPC_BOOK3S_64
@@ -1712,7 +1712,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
1712 p->thread.dscr = mfspr(SPRN_DSCR); 1712 p->thread.dscr = mfspr(SPRN_DSCR);
1713 } 1713 }
1714 if (cpu_has_feature(CPU_FTR_HAS_PPR)) 1714 if (cpu_has_feature(CPU_FTR_HAS_PPR))
1715 p->thread.ppr = INIT_PPR; 1715 childregs->ppr = DEFAULT_PPR;
1716 1716
1717 p->thread.tidr = 0; 1717 p->thread.tidr = 0;
1718#endif 1718#endif
@@ -1720,6 +1720,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
1720 return 0; 1720 return 0;
1721} 1721}
1722 1722
1723void preload_new_slb_context(unsigned long start, unsigned long sp);
1724
1723/* 1725/*
1724 * Set up a thread for executing a new program 1726 * Set up a thread for executing a new program
1725 */ 1727 */
@@ -1727,6 +1729,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
1727{ 1729{
1728#ifdef CONFIG_PPC64 1730#ifdef CONFIG_PPC64
1729 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ 1731 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
1732
1733#ifdef CONFIG_PPC_BOOK3S_64
1734 preload_new_slb_context(start, sp);
1735#endif
1730#endif 1736#endif
1731 1737
1732 /* 1738 /*
@@ -1817,6 +1823,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
1817#ifdef CONFIG_VSX 1823#ifdef CONFIG_VSX
1818 current->thread.used_vsr = 0; 1824 current->thread.used_vsr = 0;
1819#endif 1825#endif
1826 current->thread.load_slb = 0;
1820 current->thread.load_fp = 0; 1827 current->thread.load_fp = 0;
1821 memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state)); 1828 memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
1822 current->thread.fp_save_area = NULL; 1829 current->thread.fp_save_area = NULL;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c4d7078e5295..fe758cedb93f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -126,7 +126,7 @@ static void __init move_device_tree(void)
126 if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || 126 if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
127 overlaps_crashkernel(start, size) || 127 overlaps_crashkernel(start, size) ||
128 overlaps_initrd(start, size)) { 128 overlaps_initrd(start, size)) {
129 p = __va(memblock_alloc(size, PAGE_SIZE)); 129 p = __va(memblock_phys_alloc(size, PAGE_SIZE));
130 memcpy(p, initial_boot_params, size); 130 memcpy(p, initial_boot_params, size);
131 initial_boot_params = p; 131 initial_boot_params = p;
132 DBG("Moved device tree to 0x%p\n", p); 132 DBG("Moved device tree to 0x%p\n", p);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 9b38a2e5dd35..f33ff4163a51 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -43,11 +43,13 @@
43#include <asm/btext.h> 43#include <asm/btext.h>
44#include <asm/sections.h> 44#include <asm/sections.h>
45#include <asm/machdep.h> 45#include <asm/machdep.h>
46#include <asm/opal.h>
47#include <asm/asm-prototypes.h> 46#include <asm/asm-prototypes.h>
48 47
49#include <linux/linux_logo.h> 48#include <linux/linux_logo.h>
50 49
50/* All of prom_init bss lives here */
51#define __prombss __section(.bss.prominit)
52
51/* 53/*
52 * Eventually bump that one up 54 * Eventually bump that one up
53 */ 55 */
@@ -87,7 +89,7 @@
87#define OF_WORKAROUNDS 0 89#define OF_WORKAROUNDS 0
88#else 90#else
89#define OF_WORKAROUNDS of_workarounds 91#define OF_WORKAROUNDS of_workarounds
90int of_workarounds; 92static int of_workarounds __prombss;
91#endif 93#endif
92 94
93#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */ 95#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
@@ -148,29 +150,31 @@ extern void copy_and_flush(unsigned long dest, unsigned long src,
148 unsigned long size, unsigned long offset); 150 unsigned long size, unsigned long offset);
149 151
150/* prom structure */ 152/* prom structure */
151static struct prom_t __initdata prom; 153static struct prom_t __prombss prom;
152 154
153static unsigned long prom_entry __initdata; 155static unsigned long __prombss prom_entry;
154 156
155#define PROM_SCRATCH_SIZE 256 157#define PROM_SCRATCH_SIZE 256
156 158
157static char __initdata of_stdout_device[256]; 159static char __prombss of_stdout_device[256];
158static char __initdata prom_scratch[PROM_SCRATCH_SIZE]; 160static char __prombss prom_scratch[PROM_SCRATCH_SIZE];
159 161
160static unsigned long __initdata dt_header_start; 162static unsigned long __prombss dt_header_start;
161static unsigned long __initdata dt_struct_start, dt_struct_end; 163static unsigned long __prombss dt_struct_start, dt_struct_end;
162static unsigned long __initdata dt_string_start, dt_string_end; 164static unsigned long __prombss dt_string_start, dt_string_end;
163 165
164static unsigned long __initdata prom_initrd_start, prom_initrd_end; 166static unsigned long __prombss prom_initrd_start, prom_initrd_end;
165 167
166#ifdef CONFIG_PPC64 168#ifdef CONFIG_PPC64
167static int __initdata prom_iommu_force_on; 169static int __prombss prom_iommu_force_on;
168static int __initdata prom_iommu_off; 170static int __prombss prom_iommu_off;
169static unsigned long __initdata prom_tce_alloc_start; 171static unsigned long __prombss prom_tce_alloc_start;
170static unsigned long __initdata prom_tce_alloc_end; 172static unsigned long __prombss prom_tce_alloc_end;
171#endif 173#endif
172 174
173static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); 175#ifdef CONFIG_PPC_PSERIES
176static bool __prombss prom_radix_disable;
177#endif
174 178
175struct platform_support { 179struct platform_support {
176 bool hash_mmu; 180 bool hash_mmu;
@@ -188,26 +192,25 @@ struct platform_support {
188#define PLATFORM_LPAR 0x0001 192#define PLATFORM_LPAR 0x0001
189#define PLATFORM_POWERMAC 0x0400 193#define PLATFORM_POWERMAC 0x0400
190#define PLATFORM_GENERIC 0x0500 194#define PLATFORM_GENERIC 0x0500
191#define PLATFORM_OPAL 0x0600
192 195
193static int __initdata of_platform; 196static int __prombss of_platform;
194 197
195static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; 198static char __prombss prom_cmd_line[COMMAND_LINE_SIZE];
196 199
197static unsigned long __initdata prom_memory_limit; 200static unsigned long __prombss prom_memory_limit;
198 201
199static unsigned long __initdata alloc_top; 202static unsigned long __prombss alloc_top;
200static unsigned long __initdata alloc_top_high; 203static unsigned long __prombss alloc_top_high;
201static unsigned long __initdata alloc_bottom; 204static unsigned long __prombss alloc_bottom;
202static unsigned long __initdata rmo_top; 205static unsigned long __prombss rmo_top;
203static unsigned long __initdata ram_top; 206static unsigned long __prombss ram_top;
204 207
205static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE]; 208static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE];
206static int __initdata mem_reserve_cnt; 209static int __prombss mem_reserve_cnt;
207 210
208static cell_t __initdata regbuf[1024]; 211static cell_t __prombss regbuf[1024];
209 212
210static bool rtas_has_query_cpu_stopped; 213static bool __prombss rtas_has_query_cpu_stopped;
211 214
212 215
213/* 216/*
@@ -522,8 +525,8 @@ static void add_string(char **str, const char *q)
522 525
523static char *tohex(unsigned int x) 526static char *tohex(unsigned int x)
524{ 527{
525 static char digits[] = "0123456789abcdef"; 528 static const char digits[] __initconst = "0123456789abcdef";
526 static char result[9]; 529 static char result[9] __prombss;
527 int i; 530 int i;
528 531
529 result[8] = 0; 532 result[8] = 0;
@@ -664,6 +667,8 @@ static void __init early_cmdline_parse(void)
664#endif 667#endif
665 } 668 }
666 669
670#ifdef CONFIG_PPC_PSERIES
671 prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
667 opt = strstr(prom_cmd_line, "disable_radix"); 672 opt = strstr(prom_cmd_line, "disable_radix");
668 if (opt) { 673 if (opt) {
669 opt += 13; 674 opt += 13;
@@ -679,9 +684,10 @@ static void __init early_cmdline_parse(void)
679 } 684 }
680 if (prom_radix_disable) 685 if (prom_radix_disable)
681 prom_debug("Radix disabled from cmdline\n"); 686 prom_debug("Radix disabled from cmdline\n");
687#endif /* CONFIG_PPC_PSERIES */
682} 688}
683 689
684#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) 690#ifdef CONFIG_PPC_PSERIES
685/* 691/*
686 * The architecture vector has an array of PVR mask/value pairs, 692 * The architecture vector has an array of PVR mask/value pairs,
687 * followed by # option vectors - 1, followed by the option vectors. 693 * followed by # option vectors - 1, followed by the option vectors.
@@ -782,7 +788,7 @@ struct ibm_arch_vec {
782 struct option_vector6 vec6; 788 struct option_vector6 vec6;
783} __packed; 789} __packed;
784 790
785struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { 791static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
786 .pvrs = { 792 .pvrs = {
787 { 793 {
788 .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */ 794 .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
@@ -920,9 +926,11 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
920 }, 926 },
921}; 927};
922 928
929static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned;
930
923/* Old method - ELF header with PT_NOTE sections only works on BE */ 931/* Old method - ELF header with PT_NOTE sections only works on BE */
924#ifdef __BIG_ENDIAN__ 932#ifdef __BIG_ENDIAN__
925static struct fake_elf { 933static const struct fake_elf {
926 Elf32_Ehdr elfhdr; 934 Elf32_Ehdr elfhdr;
927 Elf32_Phdr phdr[2]; 935 Elf32_Phdr phdr[2];
928 struct chrpnote { 936 struct chrpnote {
@@ -955,7 +963,7 @@ static struct fake_elf {
955 u32 ignore_me; 963 u32 ignore_me;
956 } rpadesc; 964 } rpadesc;
957 } rpanote; 965 } rpanote;
958} fake_elf = { 966} fake_elf __initconst = {
959 .elfhdr = { 967 .elfhdr = {
960 .e_ident = { 0x7f, 'E', 'L', 'F', 968 .e_ident = { 0x7f, 'E', 'L', 'F',
961 ELFCLASS32, ELFDATA2MSB, EV_CURRENT }, 969 ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
@@ -1129,14 +1137,21 @@ static void __init prom_check_platform_support(void)
1129 }; 1137 };
1130 int prop_len = prom_getproplen(prom.chosen, 1138 int prop_len = prom_getproplen(prom.chosen,
1131 "ibm,arch-vec-5-platform-support"); 1139 "ibm,arch-vec-5-platform-support");
1140
1141 /* First copy the architecture vec template */
1142 ibm_architecture_vec = ibm_architecture_vec_template;
1143
1132 if (prop_len > 1) { 1144 if (prop_len > 1) {
1133 int i; 1145 int i;
1134 u8 vec[prop_len]; 1146 u8 vec[8];
1135 prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", 1147 prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
1136 prop_len); 1148 prop_len);
1149 if (prop_len > sizeof(vec))
1150 prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
1151 prop_len);
1137 prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", 1152 prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
1138 &vec, sizeof(vec)); 1153 &vec, sizeof(vec));
1139 for (i = 0; i < prop_len; i += 2) { 1154 for (i = 0; i < sizeof(vec); i += 2) {
1140 prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 1155 prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
1141 , vec[i] 1156 , vec[i]
1142 , vec[i + 1]); 1157 , vec[i + 1]);
@@ -1225,7 +1240,7 @@ static void __init prom_send_capabilities(void)
1225 } 1240 }
1226#endif /* __BIG_ENDIAN__ */ 1241#endif /* __BIG_ENDIAN__ */
1227} 1242}
1228#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ 1243#endif /* CONFIG_PPC_PSERIES */
1229 1244
1230/* 1245/*
1231 * Memory allocation strategy... our layout is normally: 1246 * Memory allocation strategy... our layout is normally:
@@ -1562,88 +1577,6 @@ static void __init prom_close_stdin(void)
1562 } 1577 }
1563} 1578}
1564 1579
1565#ifdef CONFIG_PPC_POWERNV
1566
1567#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
1568static u64 __initdata prom_opal_base;
1569static u64 __initdata prom_opal_entry;
1570#endif
1571
1572/*
1573 * Allocate room for and instantiate OPAL
1574 */
1575static void __init prom_instantiate_opal(void)
1576{
1577 phandle opal_node;
1578 ihandle opal_inst;
1579 u64 base, entry;
1580 u64 size = 0, align = 0x10000;
1581 __be64 val64;
1582 u32 rets[2];
1583
1584 prom_debug("prom_instantiate_opal: start...\n");
1585
1586 opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal"));
1587 prom_debug("opal_node: %x\n", opal_node);
1588 if (!PHANDLE_VALID(opal_node))
1589 return;
1590
1591 val64 = 0;
1592 prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64));
1593 size = be64_to_cpu(val64);
1594 if (size == 0)
1595 return;
1596 val64 = 0;
1597 prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64));
1598 align = be64_to_cpu(val64);
1599
1600 base = alloc_down(size, align, 0);
1601 if (base == 0) {
1602 prom_printf("OPAL allocation failed !\n");
1603 return;
1604 }
1605
1606 opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal"));
1607 if (!IHANDLE_VALID(opal_inst)) {
1608 prom_printf("opening opal package failed (%x)\n", opal_inst);
1609 return;
1610 }
1611
1612 prom_printf("instantiating opal at 0x%llx...", base);
1613
1614 if (call_prom_ret("call-method", 4, 3, rets,
1615 ADDR("load-opal-runtime"),
1616 opal_inst,
1617 base >> 32, base & 0xffffffff) != 0
1618 || (rets[0] == 0 && rets[1] == 0)) {
1619 prom_printf(" failed\n");
1620 return;
1621 }
1622 entry = (((u64)rets[0]) << 32) | rets[1];
1623
1624 prom_printf(" done\n");
1625
1626 reserve_mem(base, size);
1627
1628 prom_debug("opal base = 0x%llx\n", base);
1629 prom_debug("opal align = 0x%llx\n", align);
1630 prom_debug("opal entry = 0x%llx\n", entry);
1631 prom_debug("opal size = 0x%llx\n", size);
1632
1633 prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
1634 &base, sizeof(base));
1635 prom_setprop(opal_node, "/ibm,opal", "opal-entry-address",
1636 &entry, sizeof(entry));
1637
1638#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
1639 prom_opal_base = base;
1640 prom_opal_entry = entry;
1641#endif
1642 prom_debug("prom_instantiate_opal: end...\n");
1643}
1644
1645#endif /* CONFIG_PPC_POWERNV */
1646
1647/* 1580/*
1648 * Allocate room for and instantiate RTAS 1581 * Allocate room for and instantiate RTAS
1649 */ 1582 */
@@ -2150,10 +2083,6 @@ static int __init prom_find_machine_type(void)
2150 } 2083 }
2151 } 2084 }
2152#ifdef CONFIG_PPC64 2085#ifdef CONFIG_PPC64
2153 /* Try to detect OPAL */
2154 if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal"))))
2155 return PLATFORM_OPAL;
2156
2157 /* Try to figure out if it's an IBM pSeries or any other 2086 /* Try to figure out if it's an IBM pSeries or any other
2158 * PAPR compliant platform. We assume it is if : 2087 * PAPR compliant platform. We assume it is if :
2159 * - /device_type is "chrp" (please, do NOT use that for future 2088 * - /device_type is "chrp" (please, do NOT use that for future
@@ -2202,7 +2131,7 @@ static void __init prom_check_displays(void)
2202 ihandle ih; 2131 ihandle ih;
2203 int i; 2132 int i;
2204 2133
2205 static unsigned char default_colors[] = { 2134 static const unsigned char default_colors[] __initconst = {
2206 0x00, 0x00, 0x00, 2135 0x00, 0x00, 0x00,
2207 0x00, 0x00, 0xaa, 2136 0x00, 0x00, 0xaa,
2208 0x00, 0xaa, 0x00, 2137 0x00, 0xaa, 0x00,
@@ -2398,7 +2327,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
2398 char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; 2327 char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
2399 unsigned long soff; 2328 unsigned long soff;
2400 unsigned char *valp; 2329 unsigned char *valp;
2401 static char pname[MAX_PROPERTY_NAME]; 2330 static char pname[MAX_PROPERTY_NAME] __prombss;
2402 int l, room, has_phandle = 0; 2331 int l, room, has_phandle = 0;
2403 2332
2404 dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); 2333 dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
@@ -2481,14 +2410,11 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
2481 has_phandle = 1; 2410 has_phandle = 1;
2482 } 2411 }
2483 2412
2484 /* Add a "linux,phandle" property if no "phandle" property already 2413 /* Add a "phandle" property if none already exist */
2485 * existed (can happen with OPAL)
2486 */
2487 if (!has_phandle) { 2414 if (!has_phandle) {
2488 soff = dt_find_string("linux,phandle"); 2415 soff = dt_find_string("phandle");
2489 if (soff == 0) 2416 if (soff == 0)
2490 prom_printf("WARNING: Can't find string index for" 2417 prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path);
2491 " <linux-phandle> node %s\n", path);
2492 else { 2418 else {
2493 dt_push_token(OF_DT_PROP, mem_start, mem_end); 2419 dt_push_token(OF_DT_PROP, mem_start, mem_end);
2494 dt_push_token(4, mem_start, mem_end); 2420 dt_push_token(4, mem_start, mem_end);
@@ -2548,9 +2474,9 @@ static void __init flatten_device_tree(void)
2548 dt_string_start = mem_start; 2474 dt_string_start = mem_start;
2549 mem_start += 4; /* hole */ 2475 mem_start += 4; /* hole */
2550 2476
2551 /* Add "linux,phandle" in there, we'll need it */ 2477 /* Add "phandle" in there, we'll need it */
2552 namep = make_room(&mem_start, &mem_end, 16, 1); 2478 namep = make_room(&mem_start, &mem_end, 16, 1);
2553 strcpy(namep, "linux,phandle"); 2479 strcpy(namep, "phandle");
2554 mem_start = (unsigned long)namep + strlen(namep) + 1; 2480 mem_start = (unsigned long)namep + strlen(namep) + 1;
2555 2481
2556 /* Build string array */ 2482 /* Build string array */
@@ -3172,7 +3098,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
3172 */ 3098 */
3173 early_cmdline_parse(); 3099 early_cmdline_parse();
3174 3100
3175#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) 3101#ifdef CONFIG_PPC_PSERIES
3176 /* 3102 /*
3177 * On pSeries, inform the firmware about our capabilities 3103 * On pSeries, inform the firmware about our capabilities
3178 */ 3104 */
@@ -3216,15 +3142,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
3216 * On non-powermacs, try to instantiate RTAS. PowerMacs don't 3142 * On non-powermacs, try to instantiate RTAS. PowerMacs don't
3217 * have a usable RTAS implementation. 3143 * have a usable RTAS implementation.
3218 */ 3144 */
3219 if (of_platform != PLATFORM_POWERMAC && 3145 if (of_platform != PLATFORM_POWERMAC)
3220 of_platform != PLATFORM_OPAL)
3221 prom_instantiate_rtas(); 3146 prom_instantiate_rtas();
3222 3147
3223#ifdef CONFIG_PPC_POWERNV
3224 if (of_platform == PLATFORM_OPAL)
3225 prom_instantiate_opal();
3226#endif /* CONFIG_PPC_POWERNV */
3227
3228#ifdef CONFIG_PPC64 3148#ifdef CONFIG_PPC64
3229 /* instantiate sml */ 3149 /* instantiate sml */
3230 prom_instantiate_sml(); 3150 prom_instantiate_sml();
@@ -3237,8 +3157,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
3237 * 3157 *
3238 * (This must be done after instanciating RTAS) 3158 * (This must be done after instanciating RTAS)
3239 */ 3159 */
3240 if (of_platform != PLATFORM_POWERMAC && 3160 if (of_platform != PLATFORM_POWERMAC)
3241 of_platform != PLATFORM_OPAL)
3242 prom_hold_cpus(); 3161 prom_hold_cpus();
3243 3162
3244 /* 3163 /*
@@ -3282,11 +3201,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
3282 /* 3201 /*
3283 * in case stdin is USB and still active on IBM machines... 3202 * in case stdin is USB and still active on IBM machines...
3284 * Unfortunately quiesce crashes on some powermacs if we have 3203 * Unfortunately quiesce crashes on some powermacs if we have
3285 * closed stdin already (in particular the powerbook 101). It 3204 * closed stdin already (in particular the powerbook 101).
3286 * appears that the OPAL version of OFW doesn't like it either.
3287 */ 3205 */
3288 if (of_platform != PLATFORM_POWERMAC && 3206 if (of_platform != PLATFORM_POWERMAC)
3289 of_platform != PLATFORM_OPAL)
3290 prom_close_stdin(); 3207 prom_close_stdin();
3291 3208
3292 /* 3209 /*
@@ -3304,10 +3221,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
3304 hdr = dt_header_start; 3221 hdr = dt_header_start;
3305 3222
3306 /* Don't print anything after quiesce under OPAL, it crashes OFW */ 3223 /* Don't print anything after quiesce under OPAL, it crashes OFW */
3307 if (of_platform != PLATFORM_OPAL) { 3224 prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
3308 prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); 3225 prom_debug("->dt_header_start=0x%lx\n", hdr);
3309 prom_debug("->dt_header_start=0x%lx\n", hdr);
3310 }
3311 3226
3312#ifdef CONFIG_PPC32 3227#ifdef CONFIG_PPC32
3313 reloc_got2(-offset); 3228 reloc_got2(-offset);
@@ -3315,13 +3230,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
3315 unreloc_toc(); 3230 unreloc_toc();
3316#endif 3231#endif
3317 3232
3318#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
3319 /* OPAL early debug gets the OPAL base & entry in r8 and r9 */
3320 __start(hdr, kbase, 0, 0, 0,
3321 prom_opal_base, prom_opal_entry);
3322#else
3323 __start(hdr, kbase, 0, 0, 0, 0, 0); 3233 __start(hdr, kbase, 0, 0, 0, 0, 0);
3324#endif
3325 3234
3326 return 0; 3235 return 0;
3327} 3236}
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index acb6b9226352..667df97d2595 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -28,6 +28,18 @@ OBJ="$2"
28 28
29ERROR=0 29ERROR=0
30 30
31function check_section()
32{
33 file=$1
34 section=$2
35 size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}")
36 size=${size:-0}
37 if [ $size -ne 0 ]; then
38 ERROR=1
39 echo "Error: Section $section not empty in prom_init.c" >&2
40 fi
41}
42
31for UNDEF in $($NM -u $OBJ | awk '{print $2}') 43for UNDEF in $($NM -u $OBJ | awk '{print $2}')
32do 44do
33 # On 64-bit nm gives us the function descriptors, which have 45 # On 64-bit nm gives us the function descriptors, which have
@@ -66,4 +78,8 @@ do
66 fi 78 fi
67done 79done
68 80
81check_section $OBJ .data
82check_section $OBJ .bss
83check_section $OBJ .init.data
84
69exit $ERROR 85exit $ERROR
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 9667666eb18e..afb819f4ca68 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -297,7 +297,7 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
297 } 297 }
298#endif 298#endif
299 299
300 if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { 300 if (regno < (sizeof(struct user_pt_regs) / sizeof(unsigned long))) {
301 *data = ((unsigned long *)task->thread.regs)[regno]; 301 *data = ((unsigned long *)task->thread.regs)[regno];
302 return 0; 302 return 0;
303 } 303 }
@@ -360,10 +360,10 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
360 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, 360 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
361 &target->thread.regs->orig_gpr3, 361 &target->thread.regs->orig_gpr3,
362 offsetof(struct pt_regs, orig_gpr3), 362 offsetof(struct pt_regs, orig_gpr3),
363 sizeof(struct pt_regs)); 363 sizeof(struct user_pt_regs));
364 if (!ret) 364 if (!ret)
365 ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, 365 ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
366 sizeof(struct pt_regs), -1); 366 sizeof(struct user_pt_regs), -1);
367 367
368 return ret; 368 return ret;
369} 369}
@@ -853,10 +853,10 @@ static int tm_cgpr_get(struct task_struct *target,
853 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, 853 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
854 &target->thread.ckpt_regs.orig_gpr3, 854 &target->thread.ckpt_regs.orig_gpr3,
855 offsetof(struct pt_regs, orig_gpr3), 855 offsetof(struct pt_regs, orig_gpr3),
856 sizeof(struct pt_regs)); 856 sizeof(struct user_pt_regs));
857 if (!ret) 857 if (!ret)
858 ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, 858 ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
859 sizeof(struct pt_regs), -1); 859 sizeof(struct user_pt_regs), -1);
860 860
861 return ret; 861 return ret;
862} 862}
@@ -1609,7 +1609,7 @@ static int ppr_get(struct task_struct *target,
1609 void *kbuf, void __user *ubuf) 1609 void *kbuf, void __user *ubuf)
1610{ 1610{
1611 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 1611 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
1612 &target->thread.ppr, 0, sizeof(u64)); 1612 &target->thread.regs->ppr, 0, sizeof(u64));
1613} 1613}
1614 1614
1615static int ppr_set(struct task_struct *target, 1615static int ppr_set(struct task_struct *target,
@@ -1618,7 +1618,7 @@ static int ppr_set(struct task_struct *target,
1618 const void *kbuf, const void __user *ubuf) 1618 const void *kbuf, const void __user *ubuf)
1619{ 1619{
1620 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 1620 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
1621 &target->thread.ppr, 0, sizeof(u64)); 1621 &target->thread.regs->ppr, 0, sizeof(u64));
1622} 1622}
1623 1623
1624static int dscr_get(struct task_struct *target, 1624static int dscr_get(struct task_struct *target,
@@ -2508,6 +2508,7 @@ void ptrace_disable(struct task_struct *child)
2508{ 2508{
2509 /* make sure the single step bit is not set. */ 2509 /* make sure the single step bit is not set. */
2510 user_disable_single_step(child); 2510 user_disable_single_step(child);
2511 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
2511} 2512}
2512 2513
2513#ifdef CONFIG_PPC_ADV_DEBUG_REGS 2514#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -3130,7 +3131,7 @@ long arch_ptrace(struct task_struct *child, long request,
3130 case PTRACE_GETREGS: /* Get all pt_regs from the child. */ 3131 case PTRACE_GETREGS: /* Get all pt_regs from the child. */
3131 return copy_regset_to_user(child, &user_ppc_native_view, 3132 return copy_regset_to_user(child, &user_ppc_native_view,
3132 REGSET_GPR, 3133 REGSET_GPR,
3133 0, sizeof(struct pt_regs), 3134 0, sizeof(struct user_pt_regs),
3134 datavp); 3135 datavp);
3135 3136
3136#ifdef CONFIG_PPC64 3137#ifdef CONFIG_PPC64
@@ -3139,7 +3140,7 @@ long arch_ptrace(struct task_struct *child, long request,
3139 case PTRACE_SETREGS: /* Set all gp regs in the child. */ 3140 case PTRACE_SETREGS: /* Set all gp regs in the child. */
3140 return copy_regset_from_user(child, &user_ppc_native_view, 3141 return copy_regset_from_user(child, &user_ppc_native_view,
3141 REGSET_GPR, 3142 REGSET_GPR,
3142 0, sizeof(struct pt_regs), 3143 0, sizeof(struct user_pt_regs),
3143 datavp); 3144 datavp);
3144 3145
3145 case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ 3146 case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
@@ -3264,6 +3265,16 @@ long do_syscall_trace_enter(struct pt_regs *regs)
3264{ 3265{
3265 user_exit(); 3266 user_exit();
3266 3267
3268 if (test_thread_flag(TIF_SYSCALL_EMU)) {
3269 ptrace_report_syscall(regs);
3270 /*
3271 * Returning -1 will skip the syscall execution. We want to
3272 * avoid clobbering any register also, thus, not 'gotoing'
3273 * skip label.
3274 */
3275 return -1;
3276 }
3277
3267 /* 3278 /*
3268 * The tracer may decide to abort the syscall, if so tracehook 3279 * The tracer may decide to abort the syscall, if so tracehook
3269 * will return !0. Note that the tracer may also just change 3280 * will return !0. Note that the tracer may also just change
@@ -3324,3 +3335,42 @@ void do_syscall_trace_leave(struct pt_regs *regs)
3324 3335
3325 user_enter(); 3336 user_enter();
3326} 3337}
3338
3339void __init pt_regs_check(void)
3340{
3341 BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
3342 offsetof(struct user_pt_regs, gpr));
3343 BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
3344 offsetof(struct user_pt_regs, nip));
3345 BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
3346 offsetof(struct user_pt_regs, msr));
3347 BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
3348 offsetof(struct user_pt_regs, msr));
3349 BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
3350 offsetof(struct user_pt_regs, orig_gpr3));
3351 BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
3352 offsetof(struct user_pt_regs, ctr));
3353 BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
3354 offsetof(struct user_pt_regs, link));
3355 BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
3356 offsetof(struct user_pt_regs, xer));
3357 BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
3358 offsetof(struct user_pt_regs, ccr));
3359#ifdef __powerpc64__
3360 BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
3361 offsetof(struct user_pt_regs, softe));
3362#else
3363 BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
3364 offsetof(struct user_pt_regs, mq));
3365#endif
3366 BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
3367 offsetof(struct user_pt_regs, trap));
3368 BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
3369 offsetof(struct user_pt_regs, dar));
3370 BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
3371 offsetof(struct user_pt_regs, dsisr));
3372 BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
3373 offsetof(struct user_pt_regs, result));
3374
3375 BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
3376}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8afd146bc9c7..de35bd8f047f 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -981,7 +981,15 @@ int rtas_ibm_suspend_me(u64 handle)
981 goto out; 981 goto out;
982 } 982 }
983 983
984 stop_topology_update(); 984 cpu_hotplug_disable();
985
986 /* Check if we raced with a CPU-Offline Operation */
987 if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
988 pr_err("%s: Raced against a concurrent CPU-Offline\n",
989 __func__);
990 atomic_set(&data.error, -EBUSY);
991 goto out_hotplug_enable;
992 }
985 993
986 /* Call function on all CPUs. One of us will make the 994 /* Call function on all CPUs. One of us will make the
987 * rtas call 995 * rtas call
@@ -994,7 +1002,8 @@ int rtas_ibm_suspend_me(u64 handle)
994 if (atomic_read(&data.error) != 0) 1002 if (atomic_read(&data.error) != 0)
995 printk(KERN_ERR "Error doing global join\n"); 1003 printk(KERN_ERR "Error doing global join\n");
996 1004
997 start_topology_update(); 1005out_hotplug_enable:
1006 cpu_hotplug_enable();
998 1007
999 /* Take down CPUs not online prior to suspend */ 1008 /* Take down CPUs not online prior to suspend */
1000 cpuret = rtas_offline_cpus_mask(offline_mask); 1009 cpuret = rtas_offline_cpus_mask(offline_mask);
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 44d66c33d59d..38cadae4ca4f 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -91,6 +91,8 @@ static char *rtas_event_type(int type)
91 return "Dump Notification Event"; 91 return "Dump Notification Event";
92 case RTAS_TYPE_PRRN: 92 case RTAS_TYPE_PRRN:
93 return "Platform Resource Reassignment Event"; 93 return "Platform Resource Reassignment Event";
94 case RTAS_TYPE_HOTPLUG:
95 return "Hotplug Event";
94 } 96 }
95 97
96 return rtas_type[0]; 98 return rtas_type[0];
@@ -150,8 +152,10 @@ static void printk_log_rtas(char *buf, int len)
150 } else { 152 } else {
151 struct rtas_error_log *errlog = (struct rtas_error_log *)buf; 153 struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
152 154
153 printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", 155 printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
154 error_log_cnt, rtas_event_type(rtas_error_type(errlog)), 156 error_log_cnt,
157 rtas_event_type(rtas_error_type(errlog)),
158 rtas_error_type(errlog),
155 rtas_error_severity(errlog)); 159 rtas_error_severity(errlog));
156 } 160 }
157} 161}
@@ -274,27 +278,16 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
274} 278}
275 279
276#ifdef CONFIG_PPC_PSERIES 280#ifdef CONFIG_PPC_PSERIES
277static s32 prrn_update_scope; 281static void handle_prrn_event(s32 scope)
278
279static void prrn_work_fn(struct work_struct *work)
280{ 282{
281 /* 283 /*
282 * For PRRN, we must pass the negative of the scope value in 284 * For PRRN, we must pass the negative of the scope value in
283 * the RTAS event. 285 * the RTAS event.
284 */ 286 */
285 pseries_devicetree_update(-prrn_update_scope); 287 pseries_devicetree_update(-scope);
286 numa_update_cpu_topology(false); 288 numa_update_cpu_topology(false);
287} 289}
288 290
289static DECLARE_WORK(prrn_work, prrn_work_fn);
290
291static void prrn_schedule_update(u32 scope)
292{
293 flush_work(&prrn_work);
294 prrn_update_scope = scope;
295 schedule_work(&prrn_work);
296}
297
298static void handle_rtas_event(const struct rtas_error_log *log) 291static void handle_rtas_event(const struct rtas_error_log *log)
299{ 292{
300 if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) 293 if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
@@ -303,7 +296,7 @@ static void handle_rtas_event(const struct rtas_error_log *log)
303 /* For PRRN Events the extended log length is used to denote 296 /* For PRRN Events the extended log length is used to denote
304 * the scope for calling rtas update-nodes. 297 * the scope for calling rtas update-nodes.
305 */ 298 */
306 prrn_schedule_update(rtas_error_extended_log_length(log)); 299 handle_prrn_event(rtas_error_extended_log_length(log));
307} 300}
308 301
309#else 302#else
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 93fa0c99681e..93ee3703b42f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -459,8 +459,7 @@ void __init smp_setup_cpu_maps(void)
459 459
460 DBG("smp_setup_cpu_maps()\n"); 460 DBG("smp_setup_cpu_maps()\n");
461 461
462 cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32), 462 cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32)));
463 __alignof__(u32)));
464 memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32)); 463 memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
465 464
466 for_each_node_by_type(dn, "cpu") { 465 for_each_node_by_type(dn, "cpu") {
@@ -966,6 +965,8 @@ void __init setup_arch(char **cmdline_p)
966 965
967 initmem_init(); 966 initmem_init();
968 967
968 early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
969
969#ifdef CONFIG_DUMMY_CONSOLE 970#ifdef CONFIG_DUMMY_CONSOLE
970 conswitchp = &dummy_con; 971 conswitchp = &dummy_con;
971#endif 972#endif
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 8c507be12c3c..81909600013a 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -206,9 +206,9 @@ void __init irqstack_early_init(void)
206 * as the memblock is limited to lowmem by default */ 206 * as the memblock is limited to lowmem by default */
207 for_each_possible_cpu(i) { 207 for_each_possible_cpu(i) {
208 softirq_ctx[i] = (struct thread_info *) 208 softirq_ctx[i] = (struct thread_info *)
209 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 209 __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
210 hardirq_ctx[i] = (struct thread_info *) 210 hardirq_ctx[i] = (struct thread_info *)
211 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 211 __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
212 } 212 }
213} 213}
214 214
@@ -227,12 +227,12 @@ void __init exc_lvl_early_init(void)
227#endif 227#endif
228 228
229 critirq_ctx[hw_cpu] = (struct thread_info *) 229 critirq_ctx[hw_cpu] = (struct thread_info *)
230 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 230 __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
231#ifdef CONFIG_BOOKE 231#ifdef CONFIG_BOOKE
232 dbgirq_ctx[hw_cpu] = (struct thread_info *) 232 dbgirq_ctx[hw_cpu] = (struct thread_info *)
233 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 233 __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
234 mcheckirq_ctx[hw_cpu] = (struct thread_info *) 234 mcheckirq_ctx[hw_cpu] = (struct thread_info *)
235 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 235 __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
236#endif 236#endif
237 } 237 }
238} 238}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6a501b25dd85..2a51e4cc8246 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -29,10 +29,9 @@
29#include <linux/unistd.h> 29#include <linux/unistd.h>
30#include <linux/serial.h> 30#include <linux/serial.h>
31#include <linux/serial_8250.h> 31#include <linux/serial_8250.h>
32#include <linux/bootmem.h> 32#include <linux/memblock.h>
33#include <linux/pci.h> 33#include <linux/pci.h>
34#include <linux/lockdep.h> 34#include <linux/lockdep.h>
35#include <linux/memblock.h>
36#include <linux/memory.h> 35#include <linux/memory.h>
37#include <linux/nmi.h> 36#include <linux/nmi.h>
38 37
@@ -243,13 +242,19 @@ static void cpu_ready_for_interrupts(void)
243 } 242 }
244 243
245 /* 244 /*
246 * Fixup HFSCR:TM based on CPU features. The bit is set by our 245 * Set HFSCR:TM based on CPU features:
247 * early asm init because at that point we haven't updated our 246 * In the special case of TM no suspend (P9N DD2.1), Linux is
248 * CPU features from firmware and device-tree. Here we have, 247 * told TM is off via the dt-ftrs but told to (partially) use
249 * so let's do it. 248 * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
249 * will be off from dt-ftrs but we need to turn it on for the
250 * no suspend case.
250 */ 251 */
251 if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) 252 if (cpu_has_feature(CPU_FTR_HVMODE)) {
252 mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); 253 if (cpu_has_feature(CPU_FTR_TM_COMP))
254 mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
255 else
256 mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
257 }
253 258
254 /* Set IR and DR in PACA MSR */ 259 /* Set IR and DR in PACA MSR */
255 get_paca()->kernel_msr = MSR_KERNEL; 260 get_paca()->kernel_msr = MSR_KERNEL;
@@ -757,13 +762,15 @@ void __init emergency_stack_init(void)
757 762
758static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) 763static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
759{ 764{
760 return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, 765 return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
761 __pa(MAX_DMA_ADDRESS)); 766 MEMBLOCK_ALLOC_ACCESSIBLE,
767 early_cpu_to_node(cpu));
768
762} 769}
763 770
764static void __init pcpu_fc_free(void *ptr, size_t size) 771static void __init pcpu_fc_free(void *ptr, size_t size)
765{ 772{
766 free_bootmem(__pa(ptr), size); 773 memblock_free(__pa(ptr), size);
767} 774}
768 775
769static int pcpu_cpu_distance(unsigned int from, unsigned int to) 776static int pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 61c1fadbc644..3f15edf25a0d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -34,6 +34,8 @@
34#include <linux/topology.h> 34#include <linux/topology.h>
35#include <linux/profile.h> 35#include <linux/profile.h>
36#include <linux/processor.h> 36#include <linux/processor.h>
37#include <linux/random.h>
38#include <linux/stackprotector.h>
37 39
38#include <asm/ptrace.h> 40#include <asm/ptrace.h>
39#include <linux/atomic.h> 41#include <linux/atomic.h>
@@ -74,14 +76,32 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
74#endif 76#endif
75 77
76struct thread_info *secondary_ti; 78struct thread_info *secondary_ti;
79bool has_big_cores;
77 80
78DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); 81DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
82DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
79DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); 83DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
80DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 84DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
81 85
82EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 86EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
83EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); 87EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
84EXPORT_PER_CPU_SYMBOL(cpu_core_map); 88EXPORT_PER_CPU_SYMBOL(cpu_core_map);
89EXPORT_SYMBOL_GPL(has_big_cores);
90
91#define MAX_THREAD_LIST_SIZE 8
92#define THREAD_GROUP_SHARE_L1 1
93struct thread_groups {
94 unsigned int property;
95 unsigned int nr_groups;
96 unsigned int threads_per_group;
97 unsigned int thread_list[MAX_THREAD_LIST_SIZE];
98};
99
100/*
101 * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
102 * the set its siblings that share the L1-cache.
103 */
104DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
85 105
86/* SMP operations for this machine */ 106/* SMP operations for this machine */
87struct smp_ops_t *smp_ops; 107struct smp_ops_t *smp_ops;
@@ -674,6 +694,185 @@ static void set_cpus_unrelated(int i, int j,
674} 694}
675#endif 695#endif
676 696
697/*
698 * parse_thread_groups: Parses the "ibm,thread-groups" device tree
699 * property for the CPU device node @dn and stores
700 * the parsed output in the thread_groups
701 * structure @tg if the ibm,thread-groups[0]
702 * matches @property.
703 *
704 * @dn: The device node of the CPU device.
705 * @tg: Pointer to a thread group structure into which the parsed
706 * output of "ibm,thread-groups" is stored.
707 * @property: The property of the thread-group that the caller is
708 * interested in.
709 *
710 * ibm,thread-groups[0..N-1] array defines which group of threads in
711 * the CPU-device node can be grouped together based on the property.
712 *
713 * ibm,thread-groups[0] tells us the property based on which the
714 * threads are being grouped together. If this value is 1, it implies
715 * that the threads in the same group share L1, translation cache.
716 *
717 * ibm,thread-groups[1] tells us how many such thread groups exist.
718 *
719 * ibm,thread-groups[2] tells us the number of threads in each such
720 * group.
721 *
722 * ibm,thread-groups[3..N-1] is the list of threads identified by
723 * "ibm,ppc-interrupt-server#s" arranged as per their membership in
724 * the grouping.
725 *
726 * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
727 * implies that there are 2 groups of 4 threads each, where each group
728 * of threads share L1, translation cache.
729 *
730 * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
731 * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
732 * 11, 12} structure
733 *
734 * Returns 0 on success, -EINVAL if the property does not exist,
735 * -ENODATA if property does not have a value, and -EOVERFLOW if the
736 * property data isn't large enough.
737 */
738static int parse_thread_groups(struct device_node *dn,
739 struct thread_groups *tg,
740 unsigned int property)
741{
742 int i;
743 u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
744 u32 *thread_list;
745 size_t total_threads;
746 int ret;
747
748 ret = of_property_read_u32_array(dn, "ibm,thread-groups",
749 thread_group_array, 3);
750 if (ret)
751 return ret;
752
753 tg->property = thread_group_array[0];
754 tg->nr_groups = thread_group_array[1];
755 tg->threads_per_group = thread_group_array[2];
756 if (tg->property != property ||
757 tg->nr_groups < 1 ||
758 tg->threads_per_group < 1)
759 return -ENODATA;
760
761 total_threads = tg->nr_groups * tg->threads_per_group;
762
763 ret = of_property_read_u32_array(dn, "ibm,thread-groups",
764 thread_group_array,
765 3 + total_threads);
766 if (ret)
767 return ret;
768
769 thread_list = &thread_group_array[3];
770
771 for (i = 0 ; i < total_threads; i++)
772 tg->thread_list[i] = thread_list[i];
773
774 return 0;
775}
776
777/*
778 * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
779 * that @cpu belongs to.
780 *
781 * @cpu : The logical CPU whose thread group is being searched.
782 * @tg : The thread-group structure of the CPU node which @cpu belongs
783 * to.
784 *
785 * Returns the index to tg->thread_list that points to the the start
786 * of the thread_group that @cpu belongs to.
787 *
788 * Returns -1 if cpu doesn't belong to any of the groups pointed to by
789 * tg->thread_list.
790 */
791static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
792{
793 int hw_cpu_id = get_hard_smp_processor_id(cpu);
794 int i, j;
795
796 for (i = 0; i < tg->nr_groups; i++) {
797 int group_start = i * tg->threads_per_group;
798
799 for (j = 0; j < tg->threads_per_group; j++) {
800 int idx = group_start + j;
801
802 if (tg->thread_list[idx] == hw_cpu_id)
803 return group_start;
804 }
805 }
806
807 return -1;
808}
809
810static int init_cpu_l1_cache_map(int cpu)
811
812{
813 struct device_node *dn = of_get_cpu_node(cpu, NULL);
814 struct thread_groups tg = {.property = 0,
815 .nr_groups = 0,
816 .threads_per_group = 0};
817 int first_thread = cpu_first_thread_sibling(cpu);
818 int i, cpu_group_start = -1, err = 0;
819
820 if (!dn)
821 return -ENODATA;
822
823 err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
824 if (err)
825 goto out;
826
827 zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
828 GFP_KERNEL,
829 cpu_to_node(cpu));
830
831 cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
832
833 if (unlikely(cpu_group_start == -1)) {
834 WARN_ON_ONCE(1);
835 err = -ENODATA;
836 goto out;
837 }
838
839 for (i = first_thread; i < first_thread + threads_per_core; i++) {
840 int i_group_start = get_cpu_thread_group_start(i, &tg);
841
842 if (unlikely(i_group_start == -1)) {
843 WARN_ON_ONCE(1);
844 err = -ENODATA;
845 goto out;
846 }
847
848 if (i_group_start == cpu_group_start)
849 cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
850 }
851
852out:
853 of_node_put(dn);
854 return err;
855}
856
857static int init_big_cores(void)
858{
859 int cpu;
860
861 for_each_possible_cpu(cpu) {
862 int err = init_cpu_l1_cache_map(cpu);
863
864 if (err)
865 return err;
866
867 zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
868 GFP_KERNEL,
869 cpu_to_node(cpu));
870 }
871
872 has_big_cores = true;
873 return 0;
874}
875
677void __init smp_prepare_cpus(unsigned int max_cpus) 876void __init smp_prepare_cpus(unsigned int max_cpus)
678{ 877{
679 unsigned int cpu; 878 unsigned int cpu;
@@ -712,6 +911,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
712 cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); 911 cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
713 cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); 912 cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
714 913
914 init_big_cores();
915 if (has_big_cores) {
916 cpumask_set_cpu(boot_cpuid,
917 cpu_smallcore_mask(boot_cpuid));
918 }
919
715 if (smp_ops && smp_ops->probe) 920 if (smp_ops && smp_ops->probe)
716 smp_ops->probe(); 921 smp_ops->probe();
717} 922}
@@ -995,10 +1200,28 @@ static void remove_cpu_from_masks(int cpu)
995 set_cpus_unrelated(cpu, i, cpu_core_mask); 1200 set_cpus_unrelated(cpu, i, cpu_core_mask);
996 set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); 1201 set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
997 set_cpus_unrelated(cpu, i, cpu_sibling_mask); 1202 set_cpus_unrelated(cpu, i, cpu_sibling_mask);
1203 if (has_big_cores)
1204 set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
998 } 1205 }
999} 1206}
1000#endif 1207#endif
1001 1208
1209static inline void add_cpu_to_smallcore_masks(int cpu)
1210{
1211 struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
1212 int i, first_thread = cpu_first_thread_sibling(cpu);
1213
1214 if (!has_big_cores)
1215 return;
1216
1217 cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
1218
1219 for (i = first_thread; i < first_thread + threads_per_core; i++) {
1220 if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
1221 set_cpus_related(i, cpu, cpu_smallcore_mask);
1222 }
1223}
1224
1002static void add_cpu_to_masks(int cpu) 1225static void add_cpu_to_masks(int cpu)
1003{ 1226{
1004 int first_thread = cpu_first_thread_sibling(cpu); 1227 int first_thread = cpu_first_thread_sibling(cpu);
@@ -1015,6 +1238,7 @@ static void add_cpu_to_masks(int cpu)
1015 if (cpu_online(i)) 1238 if (cpu_online(i))
1016 set_cpus_related(i, cpu, cpu_sibling_mask); 1239 set_cpus_related(i, cpu, cpu_sibling_mask);
1017 1240
1241 add_cpu_to_smallcore_masks(cpu);
1018 /* 1242 /*
1019 * Copy the thread sibling mask into the cache sibling mask 1243 * Copy the thread sibling mask into the cache sibling mask
1020 * and mark any CPUs that share an L2 with this CPU. 1244 * and mark any CPUs that share an L2 with this CPU.
@@ -1044,6 +1268,7 @@ static bool shared_caches;
1044void start_secondary(void *unused) 1268void start_secondary(void *unused)
1045{ 1269{
1046 unsigned int cpu = smp_processor_id(); 1270 unsigned int cpu = smp_processor_id();
1271 struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
1047 1272
1048 mmgrab(&init_mm); 1273 mmgrab(&init_mm);
1049 current->active_mm = &init_mm; 1274 current->active_mm = &init_mm;
@@ -1069,11 +1294,13 @@ void start_secondary(void *unused)
1069 /* Update topology CPU masks */ 1294 /* Update topology CPU masks */
1070 add_cpu_to_masks(cpu); 1295 add_cpu_to_masks(cpu);
1071 1296
1297 if (has_big_cores)
1298 sibling_mask = cpu_smallcore_mask;
1072 /* 1299 /*
1073 * Check for any shared caches. Note that this must be done on a 1300 * Check for any shared caches. Note that this must be done on a
1074 * per-core basis because one core in the pair might be disabled. 1301 * per-core basis because one core in the pair might be disabled.
1075 */ 1302 */
1076 if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu))) 1303 if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
1077 shared_caches = true; 1304 shared_caches = true;
1078 1305
1079 set_numa_node(numa_cpu_lookup_table[cpu]); 1306 set_numa_node(numa_cpu_lookup_table[cpu]);
@@ -1083,6 +1310,8 @@ void start_secondary(void *unused)
1083 notify_cpu_starting(cpu); 1310 notify_cpu_starting(cpu);
1084 set_cpu_online(cpu, true); 1311 set_cpu_online(cpu, true);
1085 1312
1313 boot_init_stack_canary();
1314
1086 local_irq_enable(); 1315 local_irq_enable();
1087 1316
1088 /* We can enable ftrace for secondary cpus now */ 1317 /* We can enable ftrace for secondary cpus now */
@@ -1140,6 +1369,13 @@ static const struct cpumask *shared_cache_mask(int cpu)
1140 return cpu_l2_cache_mask(cpu); 1369 return cpu_l2_cache_mask(cpu);
1141} 1370}
1142 1371
1372#ifdef CONFIG_SCHED_SMT
1373static const struct cpumask *smallcore_smt_mask(int cpu)
1374{
1375 return cpu_smallcore_mask(cpu);
1376}
1377#endif
1378
1143static struct sched_domain_topology_level power9_topology[] = { 1379static struct sched_domain_topology_level power9_topology[] = {
1144#ifdef CONFIG_SCHED_SMT 1380#ifdef CONFIG_SCHED_SMT
1145 { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, 1381 { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
@@ -1167,6 +1403,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
1167 shared_proc_topology_init(); 1403 shared_proc_topology_init();
1168 dump_numa_cpu_topology(); 1404 dump_numa_cpu_topology();
1169 1405
1406#ifdef CONFIG_SCHED_SMT
1407 if (has_big_cores) {
1408 pr_info("Using small cores at SMT level\n");
1409 power9_topology[0].mask = smallcore_smt_mask;
1410 powerpc_topology[0].mask = smallcore_smt_mask;
1411 }
1412#endif
1170 /* 1413 /*
1171 * If any CPU detects that it's sharing a cache with another CPU then 1414 * If any CPU detects that it's sharing a cache with another CPU then
1172 * use the deeper topology that is aware of this sharing. 1415 * use the deeper topology that is aware of this sharing.
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index f83bf6f72cb0..185216becb8b 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -262,7 +262,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
262 262
263 addi r1,r1,-128 263 addi r1,r1,-128
264#ifdef CONFIG_PPC_BOOK3S_64 264#ifdef CONFIG_PPC_BOOK3S_64
265 bl slb_flush_and_rebolt 265 bl slb_flush_and_restore_bolted
266#endif 266#endif
267 bl do_after_copyback 267 bl do_after_copyback
268 addi r1,r1,128 268 addi r1,r1,128
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 70f145e02487..3646affae963 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -111,6 +111,7 @@ struct clock_event_device decrementer_clockevent = {
111 .rating = 200, 111 .rating = 200,
112 .irq = 0, 112 .irq = 0,
113 .set_next_event = decrementer_set_next_event, 113 .set_next_event = decrementer_set_next_event,
114 .set_state_oneshot_stopped = decrementer_shutdown,
114 .set_state_shutdown = decrementer_shutdown, 115 .set_state_shutdown = decrementer_shutdown,
115 .tick_resume = decrementer_shutdown, 116 .tick_resume = decrementer_shutdown,
116 .features = CLOCK_EVT_FEAT_ONESHOT | 117 .features = CLOCK_EVT_FEAT_ONESHOT |
@@ -175,7 +176,7 @@ static void calc_cputime_factors(void)
175 * Read the SPURR on systems that have it, otherwise the PURR, 176 * Read the SPURR on systems that have it, otherwise the PURR,
176 * or if that doesn't exist return the timebase value passed in. 177 * or if that doesn't exist return the timebase value passed in.
177 */ 178 */
178static unsigned long read_spurr(unsigned long tb) 179static inline unsigned long read_spurr(unsigned long tb)
179{ 180{
180 if (cpu_has_feature(CPU_FTR_SPURR)) 181 if (cpu_has_feature(CPU_FTR_SPURR))
181 return mfspr(SPRN_SPURR); 182 return mfspr(SPRN_SPURR);
@@ -281,26 +282,17 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
281 * Account time for a transition between system, hard irq 282 * Account time for a transition between system, hard irq
282 * or soft irq state. 283 * or soft irq state.
283 */ 284 */
284static unsigned long vtime_delta(struct task_struct *tsk, 285static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
285 unsigned long *stime_scaled, 286 unsigned long now, unsigned long stime)
286 unsigned long *steal_time)
287{ 287{
288 unsigned long now, nowscaled, deltascaled; 288 unsigned long stime_scaled = 0;
289 unsigned long stime; 289#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
290 unsigned long nowscaled, deltascaled;
290 unsigned long utime, utime_scaled; 291 unsigned long utime, utime_scaled;
291 struct cpu_accounting_data *acct = get_accounting(tsk);
292 292
293 WARN_ON_ONCE(!irqs_disabled());
294
295 now = mftb();
296 nowscaled = read_spurr(now); 293 nowscaled = read_spurr(now);
297 stime = now - acct->starttime;
298 acct->starttime = now;
299 deltascaled = nowscaled - acct->startspurr; 294 deltascaled = nowscaled - acct->startspurr;
300 acct->startspurr = nowscaled; 295 acct->startspurr = nowscaled;
301
302 *steal_time = calculate_stolen_time(now);
303
304 utime = acct->utime - acct->utime_sspurr; 296 utime = acct->utime - acct->utime_sspurr;
305 acct->utime_sspurr = acct->utime; 297 acct->utime_sspurr = acct->utime;
306 298
@@ -314,17 +306,38 @@ static unsigned long vtime_delta(struct task_struct *tsk,
314 * the user ticks get saved up in paca->user_time_scaled to be 306 * the user ticks get saved up in paca->user_time_scaled to be
315 * used by account_process_tick. 307 * used by account_process_tick.
316 */ 308 */
317 *stime_scaled = stime; 309 stime_scaled = stime;
318 utime_scaled = utime; 310 utime_scaled = utime;
319 if (deltascaled != stime + utime) { 311 if (deltascaled != stime + utime) {
320 if (utime) { 312 if (utime) {
321 *stime_scaled = deltascaled * stime / (stime + utime); 313 stime_scaled = deltascaled * stime / (stime + utime);
322 utime_scaled = deltascaled - *stime_scaled; 314 utime_scaled = deltascaled - stime_scaled;
323 } else { 315 } else {
324 *stime_scaled = deltascaled; 316 stime_scaled = deltascaled;
325 } 317 }
326 } 318 }
327 acct->utime_scaled += utime_scaled; 319 acct->utime_scaled += utime_scaled;
320#endif
321
322 return stime_scaled;
323}
324
325static unsigned long vtime_delta(struct task_struct *tsk,
326 unsigned long *stime_scaled,
327 unsigned long *steal_time)
328{
329 unsigned long now, stime;
330 struct cpu_accounting_data *acct = get_accounting(tsk);
331
332 WARN_ON_ONCE(!irqs_disabled());
333
334 now = mftb();
335 stime = now - acct->starttime;
336 acct->starttime = now;
337
338 *stime_scaled = vtime_delta_scaled(acct, now, stime);
339
340 *steal_time = calculate_stolen_time(now);
328 341
329 return stime; 342 return stime;
330} 343}
@@ -341,7 +354,9 @@ void vtime_account_system(struct task_struct *tsk)
341 354
342 if ((tsk->flags & PF_VCPU) && !irq_count()) { 355 if ((tsk->flags & PF_VCPU) && !irq_count()) {
343 acct->gtime += stime; 356 acct->gtime += stime;
357#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
344 acct->utime_scaled += stime_scaled; 358 acct->utime_scaled += stime_scaled;
359#endif
345 } else { 360 } else {
346 if (hardirq_count()) 361 if (hardirq_count())
347 acct->hardirq_time += stime; 362 acct->hardirq_time += stime;
@@ -350,7 +365,9 @@ void vtime_account_system(struct task_struct *tsk)
350 else 365 else
351 acct->stime += stime; 366 acct->stime += stime;
352 367
368#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
353 acct->stime_scaled += stime_scaled; 369 acct->stime_scaled += stime_scaled;
370#endif
354 } 371 }
355} 372}
356EXPORT_SYMBOL_GPL(vtime_account_system); 373EXPORT_SYMBOL_GPL(vtime_account_system);
@@ -364,6 +381,21 @@ void vtime_account_idle(struct task_struct *tsk)
364 acct->idle_time += stime + steal_time; 381 acct->idle_time += stime + steal_time;
365} 382}
366 383
384static void vtime_flush_scaled(struct task_struct *tsk,
385 struct cpu_accounting_data *acct)
386{
387#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
388 if (acct->utime_scaled)
389 tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
390 if (acct->stime_scaled)
391 tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
392
393 acct->utime_scaled = 0;
394 acct->utime_sspurr = 0;
395 acct->stime_scaled = 0;
396#endif
397}
398
367/* 399/*
368 * Account the whole cputime accumulated in the paca 400 * Account the whole cputime accumulated in the paca
369 * Must be called with interrupts disabled. 401 * Must be called with interrupts disabled.
@@ -378,14 +410,13 @@ void vtime_flush(struct task_struct *tsk)
378 if (acct->utime) 410 if (acct->utime)
379 account_user_time(tsk, cputime_to_nsecs(acct->utime)); 411 account_user_time(tsk, cputime_to_nsecs(acct->utime));
380 412
381 if (acct->utime_scaled)
382 tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
383
384 if (acct->gtime) 413 if (acct->gtime)
385 account_guest_time(tsk, cputime_to_nsecs(acct->gtime)); 414 account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
386 415
387 if (acct->steal_time) 416 if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
388 account_steal_time(cputime_to_nsecs(acct->steal_time)); 417 account_steal_time(cputime_to_nsecs(acct->steal_time));
418 acct->steal_time = 0;
419 }
389 420
390 if (acct->idle_time) 421 if (acct->idle_time)
391 account_idle_time(cputime_to_nsecs(acct->idle_time)); 422 account_idle_time(cputime_to_nsecs(acct->idle_time));
@@ -393,8 +424,6 @@ void vtime_flush(struct task_struct *tsk)
393 if (acct->stime) 424 if (acct->stime)
394 account_system_index_time(tsk, cputime_to_nsecs(acct->stime), 425 account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
395 CPUTIME_SYSTEM); 426 CPUTIME_SYSTEM);
396 if (acct->stime_scaled)
397 tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
398 427
399 if (acct->hardirq_time) 428 if (acct->hardirq_time)
400 account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time), 429 account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
@@ -403,14 +432,12 @@ void vtime_flush(struct task_struct *tsk)
403 account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time), 432 account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
404 CPUTIME_SOFTIRQ); 433 CPUTIME_SOFTIRQ);
405 434
435 vtime_flush_scaled(tsk, acct);
436
406 acct->utime = 0; 437 acct->utime = 0;
407 acct->utime_scaled = 0;
408 acct->utime_sspurr = 0;
409 acct->gtime = 0; 438 acct->gtime = 0;
410 acct->steal_time = 0;
411 acct->idle_time = 0; 439 acct->idle_time = 0;
412 acct->stime = 0; 440 acct->stime = 0;
413 acct->stime_scaled = 0;
414 acct->hardirq_time = 0; 441 acct->hardirq_time = 0;
415 acct->softirq_time = 0; 442 acct->softirq_time = 0;
416} 443}
@@ -984,10 +1011,14 @@ static void register_decrementer_clockevent(int cpu)
984 *dec = decrementer_clockevent; 1011 *dec = decrementer_clockevent;
985 dec->cpumask = cpumask_of(cpu); 1012 dec->cpumask = cpumask_of(cpu);
986 1013
1014 clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max);
1015
987 printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", 1016 printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
988 dec->name, dec->mult, dec->shift, cpu); 1017 dec->name, dec->mult, dec->shift, cpu);
989 1018
990 clockevents_register_device(dec); 1019 /* Set values for KVM, see kvm_emulate_dec() */
1020 decrementer_clockevent.mult = dec->mult;
1021 decrementer_clockevent.shift = dec->shift;
991} 1022}
992 1023
993static void enable_large_decrementer(void) 1024static void enable_large_decrementer(void)
@@ -1035,18 +1066,7 @@ static void __init set_decrementer_max(void)
1035 1066
1036static void __init init_decrementer_clockevent(void) 1067static void __init init_decrementer_clockevent(void)
1037{ 1068{
1038 int cpu = smp_processor_id(); 1069 register_decrementer_clockevent(smp_processor_id());
1039
1040 clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
1041
1042 decrementer_clockevent.max_delta_ns =
1043 clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
1044 decrementer_clockevent.max_delta_ticks = decrementer_max;
1045 decrementer_clockevent.min_delta_ns =
1046 clockevent_delta2ns(2, &decrementer_clockevent);
1047 decrementer_clockevent.min_delta_ticks = 2;
1048
1049 register_decrementer_clockevent(cpu);
1050} 1070}
1051 1071
1052void secondary_cpu_time_init(void) 1072void secondary_cpu_time_init(void)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 7716374786bd..9fabdce255cd 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -92,13 +92,14 @@ _GLOBAL(tm_abort)
92 blr 92 blr
93EXPORT_SYMBOL_GPL(tm_abort); 93EXPORT_SYMBOL_GPL(tm_abort);
94 94
95/* void tm_reclaim(struct thread_struct *thread, 95/*
96 * void tm_reclaim(struct thread_struct *thread,
96 * uint8_t cause) 97 * uint8_t cause)
97 * 98 *
98 * - Performs a full reclaim. This destroys outstanding 99 * - Performs a full reclaim. This destroys outstanding
99 * transactions and updates thread->regs.tm_ckpt_* with the 100 * transactions and updates thread.ckpt_regs, thread.ckfp_state and
100 * original checkpointed state. Note that thread->regs is 101 * thread.ckvr_state with the original checkpointed state. Note that
101 * unchanged. 102 * thread->regs is unchanged.
102 * 103 *
103 * Purpose is to both abort transactions of, and preserve the state of, 104 * Purpose is to both abort transactions of, and preserve the state of,
104 * a transactions at a context switch. We preserve/restore both sets of process 105 * a transactions at a context switch. We preserve/restore both sets of process
@@ -163,15 +164,16 @@ _GLOBAL(tm_reclaim)
163 */ 164 */
164 TRECLAIM(R4) /* Cause in r4 */ 165 TRECLAIM(R4) /* Cause in r4 */
165 166
166 /* ******************** GPRs ******************** */ 167 /*
167 /* Stash the checkpointed r13 away in the scratch SPR and get the real 168 * ******************** GPRs ********************
168 * paca 169 * Stash the checkpointed r13 in the scratch SPR and get the real paca.
169 */ 170 */
170 SET_SCRATCH0(r13) 171 SET_SCRATCH0(r13)
171 GET_PACA(r13) 172 GET_PACA(r13)
172 173
173 /* Stash the checkpointed r1 away in paca tm_scratch and get the real 174 /*
174 * stack pointer back 175 * Stash the checkpointed r1 away in paca->tm_scratch and get the real
176 * stack pointer back into r1.
175 */ 177 */
176 std r1, PACATMSCRATCH(r13) 178 std r1, PACATMSCRATCH(r13)
177 ld r1, PACAR1(r13) 179 ld r1, PACAR1(r13)
@@ -209,14 +211,15 @@ _GLOBAL(tm_reclaim)
209 211
210 addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ 212 addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */
211 213
212 /* Make r7 look like an exception frame so that we 214 /*
213 * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr! 215 * Make r7 look like an exception frame so that we can use the neat
216 * GPRx(n) macros. r7 is NOT a pt_regs ptr!
214 */ 217 */
215 subi r7, r7, STACK_FRAME_OVERHEAD 218 subi r7, r7, STACK_FRAME_OVERHEAD
216 219
217 /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ 220 /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
218 SAVE_GPR(0, r7) /* user r0 */ 221 SAVE_GPR(0, r7) /* user r0 */
219 SAVE_GPR(2, r7) /* user r2 */ 222 SAVE_GPR(2, r7) /* user r2 */
220 SAVE_4GPRS(3, r7) /* user r3-r6 */ 223 SAVE_4GPRS(3, r7) /* user r3-r6 */
221 SAVE_GPR(8, r7) /* user r8 */ 224 SAVE_GPR(8, r7) /* user r8 */
222 SAVE_GPR(9, r7) /* user r9 */ 225 SAVE_GPR(9, r7) /* user r9 */
@@ -237,7 +240,8 @@ _GLOBAL(tm_reclaim)
237 /* ******************** NIP ******************** */ 240 /* ******************** NIP ******************** */
238 mfspr r3, SPRN_TFHAR 241 mfspr r3, SPRN_TFHAR
239 std r3, _NIP(r7) /* Returns to failhandler */ 242 std r3, _NIP(r7) /* Returns to failhandler */
240 /* The checkpointed NIP is ignored when rescheduling/rechkpting, 243 /*
244 * The checkpointed NIP is ignored when rescheduling/rechkpting,
241 * but is used in signal return to 'wind back' to the abort handler. 245 * but is used in signal return to 'wind back' to the abort handler.
242 */ 246 */
243 247
@@ -260,12 +264,13 @@ _GLOBAL(tm_reclaim)
260 std r3, THREAD_TM_TAR(r12) 264 std r3, THREAD_TM_TAR(r12)
261 std r4, THREAD_TM_DSCR(r12) 265 std r4, THREAD_TM_DSCR(r12)
262 266
263 /* MSR and flags: We don't change CRs, and we don't need to alter 267 /*
264 * MSR. 268 * MSR and flags: We don't change CRs, and we don't need to alter MSR.
265 */ 269 */
266 270
267 271
268 /* ******************** FPR/VR/VSRs ************ 272 /*
273 * ******************** FPR/VR/VSRs ************
269 * After reclaiming, capture the checkpointed FPRs/VRs. 274 * After reclaiming, capture the checkpointed FPRs/VRs.
270 * 275 *
271 * We enabled VEC/FP/VSX in the msr above, so we can execute these 276 * We enabled VEC/FP/VSX in the msr above, so we can execute these
@@ -275,7 +280,7 @@ _GLOBAL(tm_reclaim)
275 280
276 /* Altivec (VEC/VMX/VR)*/ 281 /* Altivec (VEC/VMX/VR)*/
277 addi r7, r3, THREAD_CKVRSTATE 282 addi r7, r3, THREAD_CKVRSTATE
278 SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ 283 SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 ckvr_state */
279 mfvscr v0 284 mfvscr v0
280 li r6, VRSTATE_VSCR 285 li r6, VRSTATE_VSCR
281 stvx v0, r7, r6 286 stvx v0, r7, r6
@@ -286,12 +291,13 @@ _GLOBAL(tm_reclaim)
286 291
287 /* Floating Point (FP) */ 292 /* Floating Point (FP) */
288 addi r7, r3, THREAD_CKFPSTATE 293 addi r7, r3, THREAD_CKFPSTATE
289 SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ 294 SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 ckfp_state */
290 mffs fr0 295 mffs fr0
291 stfd fr0,FPSTATE_FPSCR(r7) 296 stfd fr0,FPSTATE_FPSCR(r7)
292 297
293 298
294 /* TM regs, incl TEXASR -- these live in thread_struct. Note they've 299 /*
300 * TM regs, incl TEXASR -- these live in thread_struct. Note they've
295 * been updated by the treclaim, to explain to userland the failure 301 * been updated by the treclaim, to explain to userland the failure
296 * cause (aborted). 302 * cause (aborted).
297 */ 303 */
@@ -327,7 +333,7 @@ _GLOBAL(tm_reclaim)
327 blr 333 blr
328 334
329 335
330 /* 336 /*
331 * void __tm_recheckpoint(struct thread_struct *thread) 337 * void __tm_recheckpoint(struct thread_struct *thread)
332 * - Restore the checkpointed register state saved by tm_reclaim 338 * - Restore the checkpointed register state saved by tm_reclaim
333 * when we switch_to a process. 339 * when we switch_to a process.
@@ -343,7 +349,8 @@ _GLOBAL(__tm_recheckpoint)
343 std r2, STK_GOT(r1) 349 std r2, STK_GOT(r1)
344 stdu r1, -TM_FRAME_SIZE(r1) 350 stdu r1, -TM_FRAME_SIZE(r1)
345 351
346 /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. 352 /*
353 * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
347 * This is used for backing up the NVGPRs: 354 * This is used for backing up the NVGPRs:
348 */ 355 */
349 SAVE_NVGPRS(r1) 356 SAVE_NVGPRS(r1)
@@ -352,8 +359,9 @@ _GLOBAL(__tm_recheckpoint)
352 359
353 addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */ 360 addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
354 361
355 /* Make r7 look like an exception frame so that we 362 /*
356 * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr! 363 * Make r7 look like an exception frame so that we can use the neat
364 * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
357 */ 365 */
358 subi r7, r7, STACK_FRAME_OVERHEAD 366 subi r7, r7, STACK_FRAME_OVERHEAD
359 367
@@ -421,14 +429,15 @@ restore_gprs:
421 429
422 REST_NVGPRS(r7) /* GPR14-31 */ 430 REST_NVGPRS(r7) /* GPR14-31 */
423 431
424 /* Load up PPR and DSCR here so we don't run with user values for long 432 /* Load up PPR and DSCR here so we don't run with user values for long */
425 */
426 mtspr SPRN_DSCR, r5 433 mtspr SPRN_DSCR, r5
427 mtspr SPRN_PPR, r6 434 mtspr SPRN_PPR, r6
428 435
429 /* Do final sanity check on TEXASR to make sure FS is set. Do this 436 /*
437 * Do final sanity check on TEXASR to make sure FS is set. Do this
430 * here before we load up the userspace r1 so any bugs we hit will get 438 * here before we load up the userspace r1 so any bugs we hit will get
431 * a call chain */ 439 * a call chain.
440 */
432 mfspr r5, SPRN_TEXASR 441 mfspr r5, SPRN_TEXASR
433 srdi r5, r5, 16 442 srdi r5, r5, 16
434 li r6, (TEXASR_FS)@h 443 li r6, (TEXASR_FS)@h
@@ -436,8 +445,9 @@ restore_gprs:
4361: tdeqi r6, 0 4451: tdeqi r6, 0
437 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 446 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
438 447
439 /* Do final sanity check on MSR to make sure we are not transactional 448 /*
440 * or suspended 449 * Do final sanity check on MSR to make sure we are not transactional
450 * or suspended.
441 */ 451 */
442 mfmsr r6 452 mfmsr r6
443 li r5, (MSR_TS_MASK)@higher 453 li r5, (MSR_TS_MASK)@higher
@@ -453,8 +463,8 @@ restore_gprs:
453 REST_GPR(6, r7) 463 REST_GPR(6, r7)
454 464
455 /* 465 /*
456 * Store r1 and r5 on the stack so that we can access them 466 * Store r1 and r5 on the stack so that we can access them after we
457 * after we clear MSR RI. 467 * clear MSR RI.
458 */ 468 */
459 469
460 REST_GPR(5, r7) 470 REST_GPR(5, r7)
@@ -484,7 +494,8 @@ restore_gprs:
484 494
485 HMT_MEDIUM 495 HMT_MEDIUM
486 496
487 /* Our transactional state has now changed. 497 /*
498 * Our transactional state has now changed.
488 * 499 *
489 * Now just get out of here. Transactional (current) state will be 500 * Now just get out of here. Transactional (current) state will be
490 * updated once restore is called on the return path in the _switch-ed 501 * updated once restore is called on the return path in the _switch-ed
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index d22d8bafb643..b1725ad3e13d 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -3,11 +3,9 @@
3# Makefile for the powerpc trace subsystem 3# Makefile for the powerpc trace subsystem
4# 4#
5 5
6subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
7
8ifdef CONFIG_FUNCTION_TRACER 6ifdef CONFIG_FUNCTION_TRACER
9# do not trace tracer code 7# do not trace tracer code
10CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 8CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
11endif 9endif
12 10
13obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o 11obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 4bfbb54dee51..4bf051d3e21e 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -30,6 +30,16 @@
30 30
31 31
32#ifdef CONFIG_DYNAMIC_FTRACE 32#ifdef CONFIG_DYNAMIC_FTRACE
33
34/*
35 * We generally only have a single long_branch tramp and at most 2 or 3 plt
36 * tramps generated. But, we don't use the plt tramps currently. We also allot
37 * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
38 * tramps in total. Set aside 8 just to be sure.
39 */
40#define NUM_FTRACE_TRAMPS 8
41static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
42
33static unsigned int 43static unsigned int
34ftrace_call_replace(unsigned long ip, unsigned long addr, int link) 44ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
35{ 45{
@@ -85,13 +95,16 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr)
85 return create_branch((unsigned int *)ip, addr, 0); 95 return create_branch((unsigned int *)ip, addr, 0);
86} 96}
87 97
88#ifdef CONFIG_MODULES
89
90static int is_bl_op(unsigned int op) 98static int is_bl_op(unsigned int op)
91{ 99{
92 return (op & 0xfc000003) == 0x48000001; 100 return (op & 0xfc000003) == 0x48000001;
93} 101}
94 102
103static int is_b_op(unsigned int op)
104{
105 return (op & 0xfc000003) == 0x48000000;
106}
107
95static unsigned long find_bl_target(unsigned long ip, unsigned int op) 108static unsigned long find_bl_target(unsigned long ip, unsigned int op)
96{ 109{
97 static int offset; 110 static int offset;
@@ -104,6 +117,7 @@ static unsigned long find_bl_target(unsigned long ip, unsigned int op)
104 return ip + (long)offset; 117 return ip + (long)offset;
105} 118}
106 119
120#ifdef CONFIG_MODULES
107#ifdef CONFIG_PPC64 121#ifdef CONFIG_PPC64
108static int 122static int
109__ftrace_make_nop(struct module *mod, 123__ftrace_make_nop(struct module *mod,
@@ -270,6 +284,146 @@ __ftrace_make_nop(struct module *mod,
270#endif /* PPC64 */ 284#endif /* PPC64 */
271#endif /* CONFIG_MODULES */ 285#endif /* CONFIG_MODULES */
272 286
287static unsigned long find_ftrace_tramp(unsigned long ip)
288{
289 int i;
290
291 /*
292 * We have the compiler generated long_branch tramps at the end
293 * and we prefer those
294 */
295 for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
296 if (!ftrace_tramps[i])
297 continue;
298 else if (create_branch((void *)ip, ftrace_tramps[i], 0))
299 return ftrace_tramps[i];
300
301 return 0;
302}
303
304static int add_ftrace_tramp(unsigned long tramp)
305{
306 int i;
307
308 for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
309 if (!ftrace_tramps[i]) {
310 ftrace_tramps[i] = tramp;
311 return 0;
312 }
313
314 return -1;
315}
316
317/*
318 * If this is a compiler generated long_branch trampoline (essentially, a
319 * trampoline that has a branch to _mcount()), we re-write the branch to
320 * instead go to ftrace_[regs_]caller() and note down the location of this
321 * trampoline.
322 */
323static int setup_mcount_compiler_tramp(unsigned long tramp)
324{
325 int i, op;
326 unsigned long ptr;
327 static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
328
329 /* Is this a known long jump tramp? */
330 for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
331 if (!ftrace_tramps[i])
332 break;
333 else if (ftrace_tramps[i] == tramp)
334 return 0;
335
336 /* Is this a known plt tramp? */
337 for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
338 if (!ftrace_plt_tramps[i])
339 break;
340 else if (ftrace_plt_tramps[i] == tramp)
341 return -1;
342
343 /* New trampoline -- read where this goes */
344 if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) {
345 pr_debug("Fetching opcode failed.\n");
346 return -1;
347 }
348
349 /* Is this a 24 bit branch? */
350 if (!is_b_op(op)) {
351 pr_debug("Trampoline is not a long branch tramp.\n");
352 return -1;
353 }
354
355 /* lets find where the pointer goes */
356 ptr = find_bl_target(tramp, op);
357
358 if (ptr != ppc_global_function_entry((void *)_mcount)) {
359 pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
360 return -1;
361 }
362
363 /* Let's re-write the tramp to go to ftrace_[regs_]caller */
364#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
365 ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
366#else
367 ptr = ppc_global_function_entry((void *)ftrace_caller);
368#endif
369 if (!create_branch((void *)tramp, ptr, 0)) {
370 pr_debug("%ps is not reachable from existing mcount tramp\n",
371 (void *)ptr);
372 return -1;
373 }
374
375 if (patch_branch((unsigned int *)tramp, ptr, 0)) {
376 pr_debug("REL24 out of range!\n");
377 return -1;
378 }
379
380 if (add_ftrace_tramp(tramp)) {
381 pr_debug("No tramp locations left\n");
382 return -1;
383 }
384
385 return 0;
386}
387
388static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
389{
390 unsigned long tramp, ip = rec->ip;
391 unsigned int op;
392
393 /* Read where this goes */
394 if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
395 pr_err("Fetching opcode failed.\n");
396 return -EFAULT;
397 }
398
399 /* Make sure that that this is still a 24bit jump */
400 if (!is_bl_op(op)) {
401 pr_err("Not expected bl: opcode is %x\n", op);
402 return -EINVAL;
403 }
404
405 /* Let's find where the pointer goes */
406 tramp = find_bl_target(ip, op);
407
408 pr_devel("ip:%lx jumps to %lx", ip, tramp);
409
410 if (setup_mcount_compiler_tramp(tramp)) {
411 /* Are other trampolines reachable? */
412 if (!find_ftrace_tramp(ip)) {
413 pr_err("No ftrace trampolines reachable from %ps\n",
414 (void *)ip);
415 return -EINVAL;
416 }
417 }
418
419 if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
420 pr_err("Patching NOP failed.\n");
421 return -EPERM;
422 }
423
424 return 0;
425}
426
273int ftrace_make_nop(struct module *mod, 427int ftrace_make_nop(struct module *mod,
274 struct dyn_ftrace *rec, unsigned long addr) 428 struct dyn_ftrace *rec, unsigned long addr)
275{ 429{
@@ -286,7 +440,8 @@ int ftrace_make_nop(struct module *mod,
286 old = ftrace_call_replace(ip, addr, 1); 440 old = ftrace_call_replace(ip, addr, 1);
287 new = PPC_INST_NOP; 441 new = PPC_INST_NOP;
288 return ftrace_modify_code(ip, old, new); 442 return ftrace_modify_code(ip, old, new);
289 } 443 } else if (core_kernel_text(ip))
444 return __ftrace_make_nop_kernel(rec, addr);
290 445
291#ifdef CONFIG_MODULES 446#ifdef CONFIG_MODULES
292 /* 447 /*
@@ -456,6 +611,53 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
456#endif /* CONFIG_PPC64 */ 611#endif /* CONFIG_PPC64 */
457#endif /* CONFIG_MODULES */ 612#endif /* CONFIG_MODULES */
458 613
614static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
615{
616 unsigned int op;
617 void *ip = (void *)rec->ip;
618 unsigned long tramp, entry, ptr;
619
620 /* Make sure we're being asked to patch branch to a known ftrace addr */
621 entry = ppc_global_function_entry((void *)ftrace_caller);
622 ptr = ppc_global_function_entry((void *)addr);
623
624 if (ptr != entry) {
625#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
626 entry = ppc_global_function_entry((void *)ftrace_regs_caller);
627 if (ptr != entry) {
628#endif
629 pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
630 return -EINVAL;
631#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
632 }
633#endif
634 }
635
636 /* Make sure we have a nop */
637 if (probe_kernel_read(&op, ip, sizeof(op))) {
638 pr_err("Unable to read ftrace location %p\n", ip);
639 return -EFAULT;
640 }
641
642 if (op != PPC_INST_NOP) {
643 pr_err("Unexpected call sequence at %p: %x\n", ip, op);
644 return -EINVAL;
645 }
646
647 tramp = find_ftrace_tramp((unsigned long)ip);
648 if (!tramp) {
649 pr_err("No ftrace trampolines reachable from %ps\n", ip);
650 return -EINVAL;
651 }
652
653 if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
654 pr_err("Error patching branch to ftrace tramp!\n");
655 return -EINVAL;
656 }
657
658 return 0;
659}
660
459int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 661int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
460{ 662{
461 unsigned long ip = rec->ip; 663 unsigned long ip = rec->ip;
@@ -471,7 +673,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
471 old = PPC_INST_NOP; 673 old = PPC_INST_NOP;
472 new = ftrace_call_replace(ip, addr, 1); 674 new = ftrace_call_replace(ip, addr, 1);
473 return ftrace_modify_code(ip, old, new); 675 return ftrace_modify_code(ip, old, new);
474 } 676 } else if (core_kernel_text(ip))
677 return __ftrace_make_call_kernel(rec, addr);
475 678
476#ifdef CONFIG_MODULES 679#ifdef CONFIG_MODULES
477 /* 680 /*
@@ -603,6 +806,12 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
603 old = ftrace_call_replace(ip, old_addr, 1); 806 old = ftrace_call_replace(ip, old_addr, 1);
604 new = ftrace_call_replace(ip, addr, 1); 807 new = ftrace_call_replace(ip, addr, 1);
605 return ftrace_modify_code(ip, old, new); 808 return ftrace_modify_code(ip, old, new);
809 } else if (core_kernel_text(ip)) {
810 /*
811 * We always patch out of range locations to go to the regs
812 * variant, so there is nothing to do here
813 */
814 return 0;
606 } 815 }
607 816
608#ifdef CONFIG_MODULES 817#ifdef CONFIG_MODULES
@@ -654,10 +863,54 @@ void arch_ftrace_update_code(int command)
654 ftrace_modify_all_code(command); 863 ftrace_modify_all_code(command);
655} 864}
656 865
866#ifdef CONFIG_PPC64
867#define PACATOC offsetof(struct paca_struct, kernel_toc)
868
869#define PPC_LO(v) ((v) & 0xffff)
870#define PPC_HI(v) (((v) >> 16) & 0xffff)
871#define PPC_HA(v) PPC_HI ((v) + 0x8000)
872
873extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
874
875int __init ftrace_dyn_arch_init(void)
876{
877 int i;
878 unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
879 u32 stub_insns[] = {
880 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
881 0x3d8c0000, /* addis r12,r12,<high> */
882 0x398c0000, /* addi r12,r12,<low> */
883 0x7d8903a6, /* mtctr r12 */
884 0x4e800420, /* bctr */
885 };
886#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
887 unsigned long addr = ppc_global_function_entry((void *)ftrace_regs_caller);
888#else
889 unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
890#endif
891 long reladdr = addr - kernel_toc_addr();
892
893 if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
894 pr_err("Address of %ps out of range of kernel_toc.\n",
895 (void *)addr);
896 return -1;
897 }
898
899 for (i = 0; i < 2; i++) {
900 memcpy(tramp[i], stub_insns, sizeof(stub_insns));
901 tramp[i][1] |= PPC_HA(reladdr);
902 tramp[i][2] |= PPC_LO(reladdr);
903 add_ftrace_tramp((unsigned long)tramp[i]);
904 }
905
906 return 0;
907}
908#else
657int __init ftrace_dyn_arch_init(void) 909int __init ftrace_dyn_arch_init(void)
658{ 910{
659 return 0; 911 return 0;
660} 912}
913#endif
661#endif /* CONFIG_DYNAMIC_FTRACE */ 914#endif /* CONFIG_DYNAMIC_FTRACE */
662 915
663#ifdef CONFIG_FUNCTION_GRAPH_TRACER 916#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
index e25f77c10a72..1782af2d1496 100644
--- a/arch/powerpc/kernel/trace/ftrace_64.S
+++ b/arch/powerpc/kernel/trace/ftrace_64.S
@@ -14,6 +14,18 @@
14#include <asm/ppc-opcode.h> 14#include <asm/ppc-opcode.h>
15#include <asm/export.h> 15#include <asm/export.h>
16 16
17.pushsection ".tramp.ftrace.text","aw",@progbits;
18.globl ftrace_tramp_text
19ftrace_tramp_text:
20 .space 64
21.popsection
22
23.pushsection ".tramp.ftrace.init","aw",@progbits;
24.globl ftrace_tramp_init
25ftrace_tramp_init:
26 .space 64
27.popsection
28
17_GLOBAL(mcount) 29_GLOBAL(mcount)
18_GLOBAL(_mcount) 30_GLOBAL(_mcount)
19EXPORT_SYMBOL(_mcount) 31EXPORT_SYMBOL(_mcount)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c85adb858271..9a86572db1ef 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -247,8 +247,6 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
247 mdelay(MSEC_PER_SEC); 247 mdelay(MSEC_PER_SEC);
248 } 248 }
249 249
250 if (in_interrupt())
251 panic("Fatal exception in interrupt");
252 if (panic_on_oops) 250 if (panic_on_oops)
253 panic("Fatal exception"); 251 panic("Fatal exception");
254 do_exit(signr); 252 do_exit(signr);
@@ -307,12 +305,9 @@ void die(const char *str, struct pt_regs *regs, long err)
307} 305}
308NOKPROBE_SYMBOL(die); 306NOKPROBE_SYMBOL(die);
309 307
310void user_single_step_siginfo(struct task_struct *tsk, 308void user_single_step_report(struct pt_regs *regs)
311 struct pt_regs *regs, siginfo_t *info)
312{ 309{
313 info->si_signo = SIGTRAP; 310 force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current);
314 info->si_code = TRAP_TRACE;
315 info->si_addr = (void __user *)regs->nip;
316} 311}
317 312
318static void show_signal_msg(int signr, struct pt_regs *regs, int code, 313static void show_signal_msg(int signr, struct pt_regs *regs, int code,
@@ -341,14 +336,12 @@ static void show_signal_msg(int signr, struct pt_regs *regs, int code,
341 show_user_instructions(regs); 336 show_user_instructions(regs);
342} 337}
343 338
344void _exception_pkey(int signr, struct pt_regs *regs, int code, 339static bool exception_common(int signr, struct pt_regs *regs, int code,
345 unsigned long addr, int key) 340 unsigned long addr)
346{ 341{
347 siginfo_t info;
348
349 if (!user_mode(regs)) { 342 if (!user_mode(regs)) {
350 die("Exception in kernel mode", regs, signr); 343 die("Exception in kernel mode", regs, signr);
351 return; 344 return false;
352 } 345 }
353 346
354 show_signal_msg(signr, regs, code, addr); 347 show_signal_msg(signr, regs, code, addr);
@@ -364,18 +357,23 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code,
364 */ 357 */
365 thread_pkey_regs_save(&current->thread); 358 thread_pkey_regs_save(&current->thread);
366 359
367 clear_siginfo(&info); 360 return true;
368 info.si_signo = signr; 361}
369 info.si_code = code;
370 info.si_addr = (void __user *) addr;
371 info.si_pkey = key;
372 362
373 force_sig_info(signr, &info, current); 363void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
364{
365 if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
366 return;
367
368 force_sig_pkuerr((void __user *) addr, key);
374} 369}
375 370
376void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) 371void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
377{ 372{
378 _exception_pkey(signr, regs, code, addr, 0); 373 if (!exception_common(signr, regs, code, addr))
374 return;
375
376 force_sig_fault(signr, code, (void __user *)addr, current);
379} 377}
380 378
381void system_reset_exception(struct pt_regs *regs) 379void system_reset_exception(struct pt_regs *regs)
@@ -535,10 +533,10 @@ int machine_check_e500mc(struct pt_regs *regs)
535 printk("Caused by (from MCSR=%lx): ", reason); 533 printk("Caused by (from MCSR=%lx): ", reason);
536 534
537 if (reason & MCSR_MCP) 535 if (reason & MCSR_MCP)
538 printk("Machine Check Signal\n"); 536 pr_cont("Machine Check Signal\n");
539 537
540 if (reason & MCSR_ICPERR) { 538 if (reason & MCSR_ICPERR) {
541 printk("Instruction Cache Parity Error\n"); 539 pr_cont("Instruction Cache Parity Error\n");
542 540
543 /* 541 /*
544 * This is recoverable by invalidating the i-cache. 542 * This is recoverable by invalidating the i-cache.
@@ -556,7 +554,7 @@ int machine_check_e500mc(struct pt_regs *regs)
556 } 554 }
557 555
558 if (reason & MCSR_DCPERR_MC) { 556 if (reason & MCSR_DCPERR_MC) {
559 printk("Data Cache Parity Error\n"); 557 pr_cont("Data Cache Parity Error\n");
560 558
561 /* 559 /*
562 * In write shadow mode we auto-recover from the error, but it 560 * In write shadow mode we auto-recover from the error, but it
@@ -575,38 +573,38 @@ int machine_check_e500mc(struct pt_regs *regs)
575 } 573 }
576 574
577 if (reason & MCSR_L2MMU_MHIT) { 575 if (reason & MCSR_L2MMU_MHIT) {
578 printk("Hit on multiple TLB entries\n"); 576 pr_cont("Hit on multiple TLB entries\n");
579 recoverable = 0; 577 recoverable = 0;
580 } 578 }
581 579
582 if (reason & MCSR_NMI) 580 if (reason & MCSR_NMI)
583 printk("Non-maskable interrupt\n"); 581 pr_cont("Non-maskable interrupt\n");
584 582
585 if (reason & MCSR_IF) { 583 if (reason & MCSR_IF) {
586 printk("Instruction Fetch Error Report\n"); 584 pr_cont("Instruction Fetch Error Report\n");
587 recoverable = 0; 585 recoverable = 0;
588 } 586 }
589 587
590 if (reason & MCSR_LD) { 588 if (reason & MCSR_LD) {
591 printk("Load Error Report\n"); 589 pr_cont("Load Error Report\n");
592 recoverable = 0; 590 recoverable = 0;
593 } 591 }
594 592
595 if (reason & MCSR_ST) { 593 if (reason & MCSR_ST) {
596 printk("Store Error Report\n"); 594 pr_cont("Store Error Report\n");
597 recoverable = 0; 595 recoverable = 0;
598 } 596 }
599 597
600 if (reason & MCSR_LDG) { 598 if (reason & MCSR_LDG) {
601 printk("Guarded Load Error Report\n"); 599 pr_cont("Guarded Load Error Report\n");
602 recoverable = 0; 600 recoverable = 0;
603 } 601 }
604 602
605 if (reason & MCSR_TLBSYNC) 603 if (reason & MCSR_TLBSYNC)
606 printk("Simultaneous tlbsync operations\n"); 604 pr_cont("Simultaneous tlbsync operations\n");
607 605
608 if (reason & MCSR_BSL2_ERR) { 606 if (reason & MCSR_BSL2_ERR) {
609 printk("Level 2 Cache Error\n"); 607 pr_cont("Level 2 Cache Error\n");
610 recoverable = 0; 608 recoverable = 0;
611 } 609 }
612 610
@@ -616,7 +614,7 @@ int machine_check_e500mc(struct pt_regs *regs)
616 addr = mfspr(SPRN_MCAR); 614 addr = mfspr(SPRN_MCAR);
617 addr |= (u64)mfspr(SPRN_MCARU) << 32; 615 addr |= (u64)mfspr(SPRN_MCARU) << 32;
618 616
619 printk("Machine Check %s Address: %#llx\n", 617 pr_cont("Machine Check %s Address: %#llx\n",
620 reason & MCSR_MEA ? "Effective" : "Physical", addr); 618 reason & MCSR_MEA ? "Effective" : "Physical", addr);
621 } 619 }
622 620
@@ -640,29 +638,29 @@ int machine_check_e500(struct pt_regs *regs)
640 printk("Caused by (from MCSR=%lx): ", reason); 638 printk("Caused by (from MCSR=%lx): ", reason);
641 639
642 if (reason & MCSR_MCP) 640 if (reason & MCSR_MCP)
643 printk("Machine Check Signal\n"); 641 pr_cont("Machine Check Signal\n");
644 if (reason & MCSR_ICPERR) 642 if (reason & MCSR_ICPERR)
645 printk("Instruction Cache Parity Error\n"); 643 pr_cont("Instruction Cache Parity Error\n");
646 if (reason & MCSR_DCP_PERR) 644 if (reason & MCSR_DCP_PERR)
647 printk("Data Cache Push Parity Error\n"); 645 pr_cont("Data Cache Push Parity Error\n");
648 if (reason & MCSR_DCPERR) 646 if (reason & MCSR_DCPERR)
649 printk("Data Cache Parity Error\n"); 647 pr_cont("Data Cache Parity Error\n");
650 if (reason & MCSR_BUS_IAERR) 648 if (reason & MCSR_BUS_IAERR)
651 printk("Bus - Instruction Address Error\n"); 649 pr_cont("Bus - Instruction Address Error\n");
652 if (reason & MCSR_BUS_RAERR) 650 if (reason & MCSR_BUS_RAERR)
653 printk("Bus - Read Address Error\n"); 651 pr_cont("Bus - Read Address Error\n");
654 if (reason & MCSR_BUS_WAERR) 652 if (reason & MCSR_BUS_WAERR)
655 printk("Bus - Write Address Error\n"); 653 pr_cont("Bus - Write Address Error\n");
656 if (reason & MCSR_BUS_IBERR) 654 if (reason & MCSR_BUS_IBERR)
657 printk("Bus - Instruction Data Error\n"); 655 pr_cont("Bus - Instruction Data Error\n");
658 if (reason & MCSR_BUS_RBERR) 656 if (reason & MCSR_BUS_RBERR)
659 printk("Bus - Read Data Bus Error\n"); 657 pr_cont("Bus - Read Data Bus Error\n");
660 if (reason & MCSR_BUS_WBERR) 658 if (reason & MCSR_BUS_WBERR)
661 printk("Bus - Write Data Bus Error\n"); 659 pr_cont("Bus - Write Data Bus Error\n");
662 if (reason & MCSR_BUS_IPERR) 660 if (reason & MCSR_BUS_IPERR)
663 printk("Bus - Instruction Parity Error\n"); 661 pr_cont("Bus - Instruction Parity Error\n");
664 if (reason & MCSR_BUS_RPERR) 662 if (reason & MCSR_BUS_RPERR)
665 printk("Bus - Read Parity Error\n"); 663 pr_cont("Bus - Read Parity Error\n");
666 664
667 return 0; 665 return 0;
668} 666}
@@ -680,19 +678,19 @@ int machine_check_e200(struct pt_regs *regs)
680 printk("Caused by (from MCSR=%lx): ", reason); 678 printk("Caused by (from MCSR=%lx): ", reason);
681 679
682 if (reason & MCSR_MCP) 680 if (reason & MCSR_MCP)
683 printk("Machine Check Signal\n"); 681 pr_cont("Machine Check Signal\n");
684 if (reason & MCSR_CP_PERR) 682 if (reason & MCSR_CP_PERR)
685 printk("Cache Push Parity Error\n"); 683 pr_cont("Cache Push Parity Error\n");
686 if (reason & MCSR_CPERR) 684 if (reason & MCSR_CPERR)
687 printk("Cache Parity Error\n"); 685 pr_cont("Cache Parity Error\n");
688 if (reason & MCSR_EXCP_ERR) 686 if (reason & MCSR_EXCP_ERR)
689 printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); 687 pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
690 if (reason & MCSR_BUS_IRERR) 688 if (reason & MCSR_BUS_IRERR)
691 printk("Bus - Read Bus Error on instruction fetch\n"); 689 pr_cont("Bus - Read Bus Error on instruction fetch\n");
692 if (reason & MCSR_BUS_DRERR) 690 if (reason & MCSR_BUS_DRERR)
693 printk("Bus - Read Bus Error on data load\n"); 691 pr_cont("Bus - Read Bus Error on data load\n");
694 if (reason & MCSR_BUS_WRERR) 692 if (reason & MCSR_BUS_WRERR)
695 printk("Bus - Write Bus Error on buffered store or cache line push\n"); 693 pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
696 694
697 return 0; 695 return 0;
698} 696}
@@ -705,30 +703,30 @@ int machine_check_generic(struct pt_regs *regs)
705 printk("Caused by (from SRR1=%lx): ", reason); 703 printk("Caused by (from SRR1=%lx): ", reason);
706 switch (reason & 0x601F0000) { 704 switch (reason & 0x601F0000) {
707 case 0x80000: 705 case 0x80000:
708 printk("Machine check signal\n"); 706 pr_cont("Machine check signal\n");
709 break; 707 break;
710 case 0: /* for 601 */ 708 case 0: /* for 601 */
711 case 0x40000: 709 case 0x40000:
712 case 0x140000: /* 7450 MSS error and TEA */ 710 case 0x140000: /* 7450 MSS error and TEA */
713 printk("Transfer error ack signal\n"); 711 pr_cont("Transfer error ack signal\n");
714 break; 712 break;
715 case 0x20000: 713 case 0x20000:
716 printk("Data parity error signal\n"); 714 pr_cont("Data parity error signal\n");
717 break; 715 break;
718 case 0x10000: 716 case 0x10000:
719 printk("Address parity error signal\n"); 717 pr_cont("Address parity error signal\n");
720 break; 718 break;
721 case 0x20000000: 719 case 0x20000000:
722 printk("L1 Data Cache error\n"); 720 pr_cont("L1 Data Cache error\n");
723 break; 721 break;
724 case 0x40000000: 722 case 0x40000000:
725 printk("L1 Instruction Cache error\n"); 723 pr_cont("L1 Instruction Cache error\n");
726 break; 724 break;
727 case 0x00100000: 725 case 0x00100000:
728 printk("L2 data cache parity error\n"); 726 pr_cont("L2 data cache parity error\n");
729 break; 727 break;
730 default: 728 default:
731 printk("Unknown values in msr\n"); 729 pr_cont("Unknown values in msr\n");
732 } 730 }
733 return 0; 731 return 0;
734} 732}
@@ -741,9 +739,7 @@ void machine_check_exception(struct pt_regs *regs)
741 if (!nested) 739 if (!nested)
742 nmi_enter(); 740 nmi_enter();
743 741
744 /* 64s accounts the mce in machine_check_early when in HVMODE */ 742 __this_cpu_inc(irq_stat.mce_exceptions);
745 if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
746 __this_cpu_inc(irq_stat.mce_exceptions);
747 743
748 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 744 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
749 745
@@ -767,12 +763,17 @@ void machine_check_exception(struct pt_regs *regs)
767 if (check_io_access(regs)) 763 if (check_io_access(regs))
768 goto bail; 764 goto bail;
769 765
770 die("Machine check", regs, SIGBUS);
771
772 /* Must die if the interrupt is not recoverable */ 766 /* Must die if the interrupt is not recoverable */
773 if (!(regs->msr & MSR_RI)) 767 if (!(regs->msr & MSR_RI))
774 nmi_panic(regs, "Unrecoverable Machine check"); 768 nmi_panic(regs, "Unrecoverable Machine check");
775 769
770 if (!nested)
771 nmi_exit();
772
773 die("Machine check", regs, SIGBUS);
774
775 return;
776
776bail: 777bail:
777 if (!nested) 778 if (!nested)
778 nmi_exit(); 779 nmi_exit();
@@ -1433,7 +1434,7 @@ void program_check_exception(struct pt_regs *regs)
1433 goto bail; 1434 goto bail;
1434 } else { 1435 } else {
1435 printk(KERN_EMERG "Unexpected TM Bad Thing exception " 1436 printk(KERN_EMERG "Unexpected TM Bad Thing exception "
1436 "at %lx (msr 0x%x)\n", regs->nip, reason); 1437 "at %lx (msr 0x%lx)\n", regs->nip, regs->msr);
1437 die("Unrecoverable exception", regs, SIGABRT); 1438 die("Unrecoverable exception", regs, SIGABRT);
1438 } 1439 }
1439 } 1440 }
@@ -1547,14 +1548,6 @@ void StackOverflow(struct pt_regs *regs)
1547 panic("kernel stack overflow"); 1548 panic("kernel stack overflow");
1548} 1549}
1549 1550
1550void nonrecoverable_exception(struct pt_regs *regs)
1551{
1552 printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
1553 regs->nip, regs->msr);
1554 debugger(regs);
1555 die("nonrecoverable exception", regs, SIGKILL);
1556}
1557
1558void kernel_fp_unavailable_exception(struct pt_regs *regs) 1551void kernel_fp_unavailable_exception(struct pt_regs *regs)
1559{ 1552{
1560 enum ctx_state prev_state = exception_enter(); 1553 enum ctx_state prev_state = exception_enter();
@@ -1750,16 +1743,20 @@ void fp_unavailable_tm(struct pt_regs *regs)
1750 * checkpointed FP registers need to be loaded. 1743 * checkpointed FP registers need to be loaded.
1751 */ 1744 */
1752 tm_reclaim_current(TM_CAUSE_FAC_UNAV); 1745 tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1753 /* Reclaim didn't save out any FPRs to transact_fprs. */ 1746
1747 /*
1748 * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
1749 * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
1750 *
1751 * At this point, ck{fp,vr}_state contains the exact values we want to
1752 * recheckpoint.
1753 */
1754 1754
1755 /* Enable FP for the task: */ 1755 /* Enable FP for the task: */
1756 current->thread.load_fp = 1; 1756 current->thread.load_fp = 1;
1757 1757
1758 /* This loads and recheckpoints the FP registers from 1758 /*
1759 * thread.fpr[]. They will remain in registers after the 1759 * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
1760 * checkpoint so we don't need to reload them after.
1761 * If VMX is in use, the VRs now hold checkpointed values,
1762 * so we don't want to load the VRs from the thread_struct.
1763 */ 1760 */
1764 tm_recheckpoint(&current->thread); 1761 tm_recheckpoint(&current->thread);
1765} 1762}
@@ -2086,8 +2083,8 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
2086 */ 2083 */
2087void unrecoverable_exception(struct pt_regs *regs) 2084void unrecoverable_exception(struct pt_regs *regs)
2088{ 2085{
2089 printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", 2086 pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
2090 regs->trap, regs->nip); 2087 regs->trap, regs->nip, regs->msr);
2091 die("Unrecoverable exception", regs, SIGABRT); 2088 die("Unrecoverable exception", regs, SIGABRT);
2092} 2089}
2093NOKPROBE_SYMBOL(unrecoverable_exception); 2090NOKPROBE_SYMBOL(unrecoverable_exception);
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 3745113fcc65..2a7eb5452aba 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -37,6 +37,7 @@ data_page_branch:
37 mtlr r0 37 mtlr r0
38 addi r3, r3, __kernel_datapage_offset-data_page_branch 38 addi r3, r3, __kernel_datapage_offset-data_page_branch
39 lwz r0,0(r3) 39 lwz r0,0(r3)
40 .cfi_restore lr
40 add r3,r0,r3 41 add r3,r0,r3
41 blr 42 blr
42 .cfi_endproc 43 .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 769c2624e0a6..1e0bc5955a40 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -139,6 +139,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
139 */ 139 */
14099: 14099:
141 li r0,__NR_clock_gettime 141 li r0,__NR_clock_gettime
142 .cfi_restore lr
142 sc 143 sc
143 blr 144 blr
144 .cfi_endproc 145 .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index abf17feffe40..bf9668691511 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -37,6 +37,7 @@ data_page_branch:
37 mtlr r0 37 mtlr r0
38 addi r3, r3, __kernel_datapage_offset-data_page_branch 38 addi r3, r3, __kernel_datapage_offset-data_page_branch
39 lwz r0,0(r3) 39 lwz r0,0(r3)
40 .cfi_restore lr
40 add r3,r0,r3 41 add r3,r0,r3
41 blr 42 blr
42 .cfi_endproc 43 .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index c002adcc694c..a4ed9edfd5f0 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -169,6 +169,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
169 */ 169 */
17099: 17099:
171 li r0,__NR_clock_gettime 171 li r0,__NR_clock_gettime
172 .cfi_restore lr
172 sc 173 sc
173 blr 174 blr
174 .cfi_endproc 175 .cfi_endproc
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 07ae018e550e..434581bcd5b4 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -4,6 +4,9 @@
4#else 4#else
5#define PROVIDE32(x) PROVIDE(x) 5#define PROVIDE32(x) PROVIDE(x)
6#endif 6#endif
7
8#define BSS_FIRST_SECTIONS *(.bss.prominit)
9
7#include <asm/page.h> 10#include <asm/page.h>
8#include <asm-generic/vmlinux.lds.h> 11#include <asm-generic/vmlinux.lds.h>
9#include <asm/cache.h> 12#include <asm/cache.h>
@@ -99,6 +102,9 @@ SECTIONS
99#endif 102#endif
100 /* careful! __ftr_alt_* sections need to be close to .text */ 103 /* careful! __ftr_alt_* sections need to be close to .text */
101 *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); 104 *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
105#ifdef CONFIG_PPC64
106 *(.tramp.ftrace.text);
107#endif
102 SCHED_TEXT 108 SCHED_TEXT
103 CPUIDLE_TEXT 109 CPUIDLE_TEXT
104 LOCK_TEXT 110 LOCK_TEXT
@@ -181,7 +187,15 @@ SECTIONS
181 */ 187 */
182 . = ALIGN(STRICT_ALIGN_SIZE); 188 . = ALIGN(STRICT_ALIGN_SIZE);
183 __init_begin = .; 189 __init_begin = .;
184 INIT_TEXT_SECTION(PAGE_SIZE) :kernel 190 . = ALIGN(PAGE_SIZE);
191 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
192 _sinittext = .;
193 INIT_TEXT
194 _einittext = .;
195#ifdef CONFIG_PPC64
196 *(.tramp.ftrace.init);
197#endif
198 } :kernel
185 199
186 /* .exit.text is discarded at runtime, not link time, 200 /* .exit.text is discarded at runtime, not link time,
187 * to deal with references from __bug_table 201 * to deal with references from __bug_table
@@ -212,8 +226,6 @@ SECTIONS
212 CON_INITCALL 226 CON_INITCALL
213 } 227 }
214 228
215 SECURITY_INIT
216
217 . = ALIGN(8); 229 . = ALIGN(8);
218 __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { 230 __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
219 __start___ftr_fixup = .; 231 __start___ftr_fixup = .;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index f872c04bb5b1..64f1135e7732 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -3,8 +3,6 @@
3# Makefile for Kernel-based Virtual Machine module 3# Makefile for Kernel-based Virtual Machine module
4# 4#
5 5
6subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
7
8ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm 6ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
9KVM := ../../../virt/kvm 7KVM := ../../../virt/kvm
10 8
@@ -75,7 +73,8 @@ kvm-hv-y += \
75 book3s_hv.o \ 73 book3s_hv.o \
76 book3s_hv_interrupts.o \ 74 book3s_hv_interrupts.o \
77 book3s_64_mmu_hv.o \ 75 book3s_64_mmu_hv.o \
78 book3s_64_mmu_radix.o 76 book3s_64_mmu_radix.o \
77 book3s_hv_nested.o
79 78
80kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ 79kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
81 book3s_hv_tm.o 80 book3s_hv_tm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 87348e498c89..fd9893bc7aa1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -78,8 +78,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
78{ 78{
79 if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { 79 if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
80 ulong pc = kvmppc_get_pc(vcpu); 80 ulong pc = kvmppc_get_pc(vcpu);
81 ulong lr = kvmppc_get_lr(vcpu);
81 if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) 82 if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
82 kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); 83 kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
84 if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
85 kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
83 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; 86 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
84 } 87 }
85} 88}
@@ -150,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
150 case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; 153 case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
151 case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; 154 case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
152 case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; 155 case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
153 case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
154 case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; 156 case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
155 case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; 157 case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
156 case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; 158 case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
@@ -236,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
236void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 238void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
237 struct kvm_interrupt *irq) 239 struct kvm_interrupt *irq)
238{ 240{
239 unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL; 241 /*
240 242 * This case (KVM_INTERRUPT_SET) should never actually arise for
241 if (irq->irq == KVM_INTERRUPT_SET_LEVEL) 243 * a pseries guest (because pseries guests expect their interrupt
242 vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; 244 * controllers to continue asserting an external interrupt request
245 * until it is acknowledged at the interrupt controller), but is
246 * included to avoid ABI breakage and potentially for other
247 * sorts of guest.
248 *
249 * There is a subtlety here: HV KVM does not test the
250 * external_oneshot flag in the code that synthesizes
251 * external interrupts for the guest just before entering
252 * the guest. That is OK even if userspace did do a
253 * KVM_INTERRUPT_SET on a pseries guest vcpu, because the
254 * caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
255 * which ends up doing a smp_send_reschedule(), which will
256 * pull the guest all the way out to the host, meaning that
257 * we will call kvmppc_core_prepare_to_enter() before entering
258 * the guest again, and that will handle the external_oneshot
259 * flag correctly.
260 */
261 if (irq->irq == KVM_INTERRUPT_SET)
262 vcpu->arch.external_oneshot = 1;
243 263
244 kvmppc_book3s_queue_irqprio(vcpu, vec); 264 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
245} 265}
246 266
247void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) 267void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
248{ 268{
249 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 269 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
250 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
251} 270}
252 271
253void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, 272void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
@@ -278,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
278 vec = BOOK3S_INTERRUPT_DECREMENTER; 297 vec = BOOK3S_INTERRUPT_DECREMENTER;
279 break; 298 break;
280 case BOOK3S_IRQPRIO_EXTERNAL: 299 case BOOK3S_IRQPRIO_EXTERNAL:
281 case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
282 deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; 300 deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
283 vec = BOOK3S_INTERRUPT_EXTERNAL; 301 vec = BOOK3S_INTERRUPT_EXTERNAL;
284 break; 302 break;
@@ -352,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
352 case BOOK3S_IRQPRIO_DECREMENTER: 370 case BOOK3S_IRQPRIO_DECREMENTER:
353 /* DEC interrupts get cleared by mtdec */ 371 /* DEC interrupts get cleared by mtdec */
354 return false; 372 return false;
355 case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: 373 case BOOK3S_IRQPRIO_EXTERNAL:
356 /* External interrupts get cleared by userspace */ 374 /*
375 * External interrupts get cleared by userspace
376 * except when set by the KVM_INTERRUPT ioctl with
377 * KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
378 */
379 if (vcpu->arch.external_oneshot) {
380 vcpu->arch.external_oneshot = 0;
381 return true;
382 }
357 return false; 383 return false;
358 } 384 }
359 385
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68e14afecac8..c615617e78ac 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -268,14 +268,13 @@ int kvmppc_mmu_hv_init(void)
268{ 268{
269 unsigned long host_lpid, rsvd_lpid; 269 unsigned long host_lpid, rsvd_lpid;
270 270
271 if (!cpu_has_feature(CPU_FTR_HVMODE))
272 return -EINVAL;
273
274 if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) 271 if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
275 return -EINVAL; 272 return -EINVAL;
276 273
277 /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ 274 /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
278 host_lpid = mfspr(SPRN_LPID); 275 host_lpid = 0;
276 if (cpu_has_feature(CPU_FTR_HVMODE))
277 host_lpid = mfspr(SPRN_LPID);
279 rsvd_lpid = LPID_RSVD; 278 rsvd_lpid = LPID_RSVD;
280 279
281 kvmppc_init_lpid(rsvd_lpid + 1); 280 kvmppc_init_lpid(rsvd_lpid + 1);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 998f8d089ac7..d68162ee159b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -10,6 +10,9 @@
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/kvm.h> 11#include <linux/kvm.h>
12#include <linux/kvm_host.h> 12#include <linux/kvm_host.h>
13#include <linux/anon_inodes.h>
14#include <linux/file.h>
15#include <linux/debugfs.h>
13 16
14#include <asm/kvm_ppc.h> 17#include <asm/kvm_ppc.h>
15#include <asm/kvm_book3s.h> 18#include <asm/kvm_book3s.h>
@@ -26,87 +29,74 @@
26 */ 29 */
27static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; 30static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
28 31
29int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 32int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
30 struct kvmppc_pte *gpte, bool data, bool iswrite) 33 struct kvmppc_pte *gpte, u64 root,
34 u64 *pte_ret_p)
31{ 35{
32 struct kvm *kvm = vcpu->kvm; 36 struct kvm *kvm = vcpu->kvm;
33 u32 pid;
34 int ret, level, ps; 37 int ret, level, ps;
35 __be64 prte, rpte; 38 unsigned long rts, bits, offset, index;
36 unsigned long ptbl; 39 u64 pte, base, gpa;
37 unsigned long root, pte, index; 40 __be64 rpte;
38 unsigned long rts, bits, offset;
39 unsigned long gpa;
40 unsigned long proc_tbl_size;
41
42 /* Work out effective PID */
43 switch (eaddr >> 62) {
44 case 0:
45 pid = vcpu->arch.pid;
46 break;
47 case 3:
48 pid = 0;
49 break;
50 default:
51 return -EINVAL;
52 }
53 proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12);
54 if (pid * 16 >= proc_tbl_size)
55 return -EINVAL;
56
57 /* Read partition table to find root of tree for effective PID */
58 ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16);
59 ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte));
60 if (ret)
61 return ret;
62 41
63 root = be64_to_cpu(prte);
64 rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) | 42 rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
65 ((root & RTS2_MASK) >> RTS2_SHIFT); 43 ((root & RTS2_MASK) >> RTS2_SHIFT);
66 bits = root & RPDS_MASK; 44 bits = root & RPDS_MASK;
67 root = root & RPDB_MASK; 45 base = root & RPDB_MASK;
68 46
69 offset = rts + 31; 47 offset = rts + 31;
70 48
71 /* current implementations only support 52-bit space */ 49 /* Current implementations only support 52-bit space */
72 if (offset != 52) 50 if (offset != 52)
73 return -EINVAL; 51 return -EINVAL;
74 52
53 /* Walk each level of the radix tree */
75 for (level = 3; level >= 0; --level) { 54 for (level = 3; level >= 0; --level) {
55 u64 addr;
56 /* Check a valid size */
76 if (level && bits != p9_supported_radix_bits[level]) 57 if (level && bits != p9_supported_radix_bits[level])
77 return -EINVAL; 58 return -EINVAL;
78 if (level == 0 && !(bits == 5 || bits == 9)) 59 if (level == 0 && !(bits == 5 || bits == 9))
79 return -EINVAL; 60 return -EINVAL;
80 offset -= bits; 61 offset -= bits;
81 index = (eaddr >> offset) & ((1UL << bits) - 1); 62 index = (eaddr >> offset) & ((1UL << bits) - 1);
82 /* check that low bits of page table base are zero */ 63 /* Check that low bits of page table base are zero */
83 if (root & ((1UL << (bits + 3)) - 1)) 64 if (base & ((1UL << (bits + 3)) - 1))
84 return -EINVAL; 65 return -EINVAL;
85 ret = kvm_read_guest(kvm, root + index * 8, 66 /* Read the entry from guest memory */
86 &rpte, sizeof(rpte)); 67 addr = base + (index * sizeof(rpte));
87 if (ret) 68 ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
69 if (ret) {
70 if (pte_ret_p)
71 *pte_ret_p = addr;
88 return ret; 72 return ret;
73 }
89 pte = __be64_to_cpu(rpte); 74 pte = __be64_to_cpu(rpte);
90 if (!(pte & _PAGE_PRESENT)) 75 if (!(pte & _PAGE_PRESENT))
91 return -ENOENT; 76 return -ENOENT;
77 /* Check if a leaf entry */
92 if (pte & _PAGE_PTE) 78 if (pte & _PAGE_PTE)
93 break; 79 break;
94 bits = pte & 0x1f; 80 /* Get ready to walk the next level */
95 root = pte & 0x0fffffffffffff00ul; 81 base = pte & RPDB_MASK;
82 bits = pte & RPDS_MASK;
96 } 83 }
97 /* need a leaf at lowest level; 512GB pages not supported */ 84
85 /* Need a leaf at lowest level; 512GB pages not supported */
98 if (level < 0 || level == 3) 86 if (level < 0 || level == 3)
99 return -EINVAL; 87 return -EINVAL;
100 88
101 /* offset is now log base 2 of the page size */ 89 /* We found a valid leaf PTE */
90 /* Offset is now log base 2 of the page size */
102 gpa = pte & 0x01fffffffffff000ul; 91 gpa = pte & 0x01fffffffffff000ul;
103 if (gpa & ((1ul << offset) - 1)) 92 if (gpa & ((1ul << offset) - 1))
104 return -EINVAL; 93 return -EINVAL;
105 gpa += eaddr & ((1ul << offset) - 1); 94 gpa |= eaddr & ((1ul << offset) - 1);
106 for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps) 95 for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
107 if (offset == mmu_psize_defs[ps].shift) 96 if (offset == mmu_psize_defs[ps].shift)
108 break; 97 break;
109 gpte->page_size = ps; 98 gpte->page_size = ps;
99 gpte->page_shift = offset;
110 100
111 gpte->eaddr = eaddr; 101 gpte->eaddr = eaddr;
112 gpte->raddr = gpa; 102 gpte->raddr = gpa;
@@ -115,6 +105,77 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
115 gpte->may_read = !!(pte & _PAGE_READ); 105 gpte->may_read = !!(pte & _PAGE_READ);
116 gpte->may_write = !!(pte & _PAGE_WRITE); 106 gpte->may_write = !!(pte & _PAGE_WRITE);
117 gpte->may_execute = !!(pte & _PAGE_EXEC); 107 gpte->may_execute = !!(pte & _PAGE_EXEC);
108
109 gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
110
111 if (pte_ret_p)
112 *pte_ret_p = pte;
113
114 return 0;
115}
116
117/*
118 * Used to walk a partition or process table radix tree in guest memory
119 * Note: We exploit the fact that a partition table and a process
120 * table have the same layout, a partition-scoped page table and a
121 * process-scoped page table have the same layout, and the 2nd
122 * doubleword of a partition table entry has the same layout as
123 * the PTCR register.
124 */
125int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
126 struct kvmppc_pte *gpte, u64 table,
127 int table_index, u64 *pte_ret_p)
128{
129 struct kvm *kvm = vcpu->kvm;
130 int ret;
131 unsigned long size, ptbl, root;
132 struct prtb_entry entry;
133
134 if ((table & PRTS_MASK) > 24)
135 return -EINVAL;
136 size = 1ul << ((table & PRTS_MASK) + 12);
137
138 /* Is the table big enough to contain this entry? */
139 if ((table_index * sizeof(entry)) >= size)
140 return -EINVAL;
141
142 /* Read the table to find the root of the radix tree */
143 ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
144 ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
145 if (ret)
146 return ret;
147
148 /* Root is stored in the first double word */
149 root = be64_to_cpu(entry.prtb0);
150
151 return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
152}
153
154int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
155 struct kvmppc_pte *gpte, bool data, bool iswrite)
156{
157 u32 pid;
158 u64 pte;
159 int ret;
160
161 /* Work out effective PID */
162 switch (eaddr >> 62) {
163 case 0:
164 pid = vcpu->arch.pid;
165 break;
166 case 3:
167 pid = 0;
168 break;
169 default:
170 return -EINVAL;
171 }
172
173 ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte,
174 vcpu->kvm->arch.process_table, pid, &pte);
175 if (ret)
176 return ret;
177
178 /* Check privilege (applies only to process scoped translations) */
118 if (kvmppc_get_msr(vcpu) & MSR_PR) { 179 if (kvmppc_get_msr(vcpu) & MSR_PR) {
119 if (pte & _PAGE_PRIVILEGED) { 180 if (pte & _PAGE_PRIVILEGED) {
120 gpte->may_read = 0; 181 gpte->may_read = 0;
@@ -137,20 +198,46 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
137} 198}
138 199
139static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, 200static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
140 unsigned int pshift) 201 unsigned int pshift, unsigned int lpid)
141{ 202{
142 unsigned long psize = PAGE_SIZE; 203 unsigned long psize = PAGE_SIZE;
204 int psi;
205 long rc;
206 unsigned long rb;
143 207
144 if (pshift) 208 if (pshift)
145 psize = 1UL << pshift; 209 psize = 1UL << pshift;
210 else
211 pshift = PAGE_SHIFT;
146 212
147 addr &= ~(psize - 1); 213 addr &= ~(psize - 1);
148 radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize); 214
215 if (!kvmhv_on_pseries()) {
216 radix__flush_tlb_lpid_page(lpid, addr, psize);
217 return;
218 }
219
220 psi = shift_to_mmu_psize(pshift);
221 rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
222 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
223 lpid, rb);
224 if (rc)
225 pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
149} 226}
150 227
151static void kvmppc_radix_flush_pwc(struct kvm *kvm) 228static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
152{ 229{
153 radix__flush_pwc_lpid(kvm->arch.lpid); 230 long rc;
231
232 if (!kvmhv_on_pseries()) {
233 radix__flush_pwc_lpid(lpid);
234 return;
235 }
236
237 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
238 lpid, TLBIEL_INVAL_SET_LPID);
239 if (rc)
240 pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
154} 241}
155 242
156static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, 243static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
@@ -195,23 +282,38 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
195 kmem_cache_free(kvm_pmd_cache, pmdp); 282 kmem_cache_free(kvm_pmd_cache, pmdp);
196} 283}
197 284
198static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, 285/* Called with kvm->mmu_lock held */
199 unsigned long gpa, unsigned int shift) 286void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
287 unsigned int shift, struct kvm_memory_slot *memslot,
288 unsigned int lpid)
200 289
201{ 290{
202 unsigned long page_size = 1ul << shift;
203 unsigned long old; 291 unsigned long old;
292 unsigned long gfn = gpa >> PAGE_SHIFT;
293 unsigned long page_size = PAGE_SIZE;
294 unsigned long hpa;
204 295
205 old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift); 296 old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
206 kvmppc_radix_tlbie_page(kvm, gpa, shift); 297 kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
207 if (old & _PAGE_DIRTY) { 298
208 unsigned long gfn = gpa >> PAGE_SHIFT; 299 /* The following only applies to L1 entries */
209 struct kvm_memory_slot *memslot; 300 if (lpid != kvm->arch.lpid)
301 return;
210 302
303 if (!memslot) {
211 memslot = gfn_to_memslot(kvm, gfn); 304 memslot = gfn_to_memslot(kvm, gfn);
212 if (memslot && memslot->dirty_bitmap) 305 if (!memslot)
213 kvmppc_update_dirty_map(memslot, gfn, page_size); 306 return;
214 } 307 }
308 if (shift)
309 page_size = 1ul << shift;
310
311 gpa &= ~(page_size - 1);
312 hpa = old & PTE_RPN_MASK;
313 kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
314
315 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
316 kvmppc_update_dirty_map(memslot, gfn, page_size);
215} 317}
216 318
217/* 319/*
@@ -224,7 +326,8 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
224 * and emit a warning if encountered, but there may already be data 326 * and emit a warning if encountered, but there may already be data
225 * corruption due to the unexpected mappings. 327 * corruption due to the unexpected mappings.
226 */ 328 */
227static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full) 329static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
330 unsigned int lpid)
228{ 331{
229 if (full) { 332 if (full) {
230 memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); 333 memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
@@ -238,14 +341,15 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
238 WARN_ON_ONCE(1); 341 WARN_ON_ONCE(1);
239 kvmppc_unmap_pte(kvm, p, 342 kvmppc_unmap_pte(kvm, p,
240 pte_pfn(*p) << PAGE_SHIFT, 343 pte_pfn(*p) << PAGE_SHIFT,
241 PAGE_SHIFT); 344 PAGE_SHIFT, NULL, lpid);
242 } 345 }
243 } 346 }
244 347
245 kvmppc_pte_free(pte); 348 kvmppc_pte_free(pte);
246} 349}
247 350
248static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full) 351static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
352 unsigned int lpid)
249{ 353{
250 unsigned long im; 354 unsigned long im;
251 pmd_t *p = pmd; 355 pmd_t *p = pmd;
@@ -260,20 +364,21 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
260 WARN_ON_ONCE(1); 364 WARN_ON_ONCE(1);
261 kvmppc_unmap_pte(kvm, (pte_t *)p, 365 kvmppc_unmap_pte(kvm, (pte_t *)p,
262 pte_pfn(*(pte_t *)p) << PAGE_SHIFT, 366 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
263 PMD_SHIFT); 367 PMD_SHIFT, NULL, lpid);
264 } 368 }
265 } else { 369 } else {
266 pte_t *pte; 370 pte_t *pte;
267 371
268 pte = pte_offset_map(p, 0); 372 pte = pte_offset_map(p, 0);
269 kvmppc_unmap_free_pte(kvm, pte, full); 373 kvmppc_unmap_free_pte(kvm, pte, full, lpid);
270 pmd_clear(p); 374 pmd_clear(p);
271 } 375 }
272 } 376 }
273 kvmppc_pmd_free(pmd); 377 kvmppc_pmd_free(pmd);
274} 378}
275 379
276static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud) 380static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
381 unsigned int lpid)
277{ 382{
278 unsigned long iu; 383 unsigned long iu;
279 pud_t *p = pud; 384 pud_t *p = pud;
@@ -287,36 +392,40 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
287 pmd_t *pmd; 392 pmd_t *pmd;
288 393
289 pmd = pmd_offset(p, 0); 394 pmd = pmd_offset(p, 0);
290 kvmppc_unmap_free_pmd(kvm, pmd, true); 395 kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
291 pud_clear(p); 396 pud_clear(p);
292 } 397 }
293 } 398 }
294 pud_free(kvm->mm, pud); 399 pud_free(kvm->mm, pud);
295} 400}
296 401
297void kvmppc_free_radix(struct kvm *kvm) 402void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
298{ 403{
299 unsigned long ig; 404 unsigned long ig;
300 pgd_t *pgd;
301 405
302 if (!kvm->arch.pgtable)
303 return;
304 pgd = kvm->arch.pgtable;
305 for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { 406 for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
306 pud_t *pud; 407 pud_t *pud;
307 408
308 if (!pgd_present(*pgd)) 409 if (!pgd_present(*pgd))
309 continue; 410 continue;
310 pud = pud_offset(pgd, 0); 411 pud = pud_offset(pgd, 0);
311 kvmppc_unmap_free_pud(kvm, pud); 412 kvmppc_unmap_free_pud(kvm, pud, lpid);
312 pgd_clear(pgd); 413 pgd_clear(pgd);
313 } 414 }
314 pgd_free(kvm->mm, kvm->arch.pgtable); 415}
315 kvm->arch.pgtable = NULL; 416
417void kvmppc_free_radix(struct kvm *kvm)
418{
419 if (kvm->arch.pgtable) {
420 kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
421 kvm->arch.lpid);
422 pgd_free(kvm->mm, kvm->arch.pgtable);
423 kvm->arch.pgtable = NULL;
424 }
316} 425}
317 426
318static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd, 427static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
319 unsigned long gpa) 428 unsigned long gpa, unsigned int lpid)
320{ 429{
321 pte_t *pte = pte_offset_kernel(pmd, 0); 430 pte_t *pte = pte_offset_kernel(pmd, 0);
322 431
@@ -326,13 +435,13 @@ static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
326 * flushing the PWC again. 435 * flushing the PWC again.
327 */ 436 */
328 pmd_clear(pmd); 437 pmd_clear(pmd);
329 kvmppc_radix_flush_pwc(kvm); 438 kvmppc_radix_flush_pwc(kvm, lpid);
330 439
331 kvmppc_unmap_free_pte(kvm, pte, false); 440 kvmppc_unmap_free_pte(kvm, pte, false, lpid);
332} 441}
333 442
334static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, 443static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
335 unsigned long gpa) 444 unsigned long gpa, unsigned int lpid)
336{ 445{
337 pmd_t *pmd = pmd_offset(pud, 0); 446 pmd_t *pmd = pmd_offset(pud, 0);
338 447
@@ -342,9 +451,9 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
342 * so can be freed without flushing the PWC again. 451 * so can be freed without flushing the PWC again.
343 */ 452 */
344 pud_clear(pud); 453 pud_clear(pud);
345 kvmppc_radix_flush_pwc(kvm); 454 kvmppc_radix_flush_pwc(kvm, lpid);
346 455
347 kvmppc_unmap_free_pmd(kvm, pmd, false); 456 kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
348} 457}
349 458
350/* 459/*
@@ -356,8 +465,10 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
356 */ 465 */
357#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)) 466#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
358 467
359static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, 468int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
360 unsigned int level, unsigned long mmu_seq) 469 unsigned long gpa, unsigned int level,
470 unsigned long mmu_seq, unsigned int lpid,
471 unsigned long *rmapp, struct rmap_nested **n_rmap)
361{ 472{
362 pgd_t *pgd; 473 pgd_t *pgd;
363 pud_t *pud, *new_pud = NULL; 474 pud_t *pud, *new_pud = NULL;
@@ -366,7 +477,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
366 int ret; 477 int ret;
367 478
368 /* Traverse the guest's 2nd-level tree, allocate new levels needed */ 479 /* Traverse the guest's 2nd-level tree, allocate new levels needed */
369 pgd = kvm->arch.pgtable + pgd_index(gpa); 480 pgd = pgtable + pgd_index(gpa);
370 pud = NULL; 481 pud = NULL;
371 if (pgd_present(*pgd)) 482 if (pgd_present(*pgd))
372 pud = pud_offset(pgd, gpa); 483 pud = pud_offset(pgd, gpa);
@@ -423,7 +534,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
423 goto out_unlock; 534 goto out_unlock;
424 } 535 }
425 /* Valid 1GB page here already, remove it */ 536 /* Valid 1GB page here already, remove it */
426 kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT); 537 kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
538 lpid);
427 } 539 }
428 if (level == 2) { 540 if (level == 2) {
429 if (!pud_none(*pud)) { 541 if (!pud_none(*pud)) {
@@ -432,9 +544,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
432 * install a large page, so remove and free the page 544 * install a large page, so remove and free the page
433 * table page. 545 * table page.
434 */ 546 */
435 kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa); 547 kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
436 } 548 }
437 kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte); 549 kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
550 if (rmapp && n_rmap)
551 kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
438 ret = 0; 552 ret = 0;
439 goto out_unlock; 553 goto out_unlock;
440 } 554 }
@@ -458,7 +572,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
458 WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) & 572 WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
459 PTE_BITS_MUST_MATCH); 573 PTE_BITS_MUST_MATCH);
460 kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), 574 kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
461 0, pte_val(pte), lgpa, PMD_SHIFT); 575 0, pte_val(pte), lgpa, PMD_SHIFT);
462 ret = 0; 576 ret = 0;
463 goto out_unlock; 577 goto out_unlock;
464 } 578 }
@@ -472,7 +586,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
472 goto out_unlock; 586 goto out_unlock;
473 } 587 }
474 /* Valid 2MB page here already, remove it */ 588 /* Valid 2MB page here already, remove it */
475 kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT); 589 kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
590 lpid);
476 } 591 }
477 if (level == 1) { 592 if (level == 1) {
478 if (!pmd_none(*pmd)) { 593 if (!pmd_none(*pmd)) {
@@ -481,9 +596,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
481 * install a large page, so remove and free the page 596 * install a large page, so remove and free the page
482 * table page. 597 * table page.
483 */ 598 */
484 kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa); 599 kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
485 } 600 }
486 kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); 601 kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
602 if (rmapp && n_rmap)
603 kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
487 ret = 0; 604 ret = 0;
488 goto out_unlock; 605 goto out_unlock;
489 } 606 }
@@ -508,6 +625,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
508 goto out_unlock; 625 goto out_unlock;
509 } 626 }
510 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); 627 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
628 if (rmapp && n_rmap)
629 kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
511 ret = 0; 630 ret = 0;
512 631
513 out_unlock: 632 out_unlock:
@@ -521,95 +640,49 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
521 return ret; 640 return ret;
522} 641}
523 642
524int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, 643bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing,
525 unsigned long ea, unsigned long dsisr) 644 unsigned long gpa, unsigned int lpid)
645{
646 unsigned long pgflags;
647 unsigned int shift;
648 pte_t *ptep;
649
650 /*
651 * Need to set an R or C bit in the 2nd-level tables;
652 * since we are just helping out the hardware here,
653 * it is sufficient to do what the hardware does.
654 */
655 pgflags = _PAGE_ACCESSED;
656 if (writing)
657 pgflags |= _PAGE_DIRTY;
658 /*
659 * We are walking the secondary (partition-scoped) page table here.
660 * We can do this without disabling irq because the Linux MM
661 * subsystem doesn't do THP splits and collapses on this tree.
662 */
663 ptep = __find_linux_pte(pgtable, gpa, NULL, &shift);
664 if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
665 kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
666 return true;
667 }
668 return false;
669}
670
671int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
672 unsigned long gpa,
673 struct kvm_memory_slot *memslot,
674 bool writing, bool kvm_ro,
675 pte_t *inserted_pte, unsigned int *levelp)
526{ 676{
527 struct kvm *kvm = vcpu->kvm; 677 struct kvm *kvm = vcpu->kvm;
528 unsigned long mmu_seq;
529 unsigned long gpa, gfn, hva;
530 struct kvm_memory_slot *memslot;
531 struct page *page = NULL; 678 struct page *page = NULL;
532 long ret; 679 unsigned long mmu_seq;
533 bool writing; 680 unsigned long hva, gfn = gpa >> PAGE_SHIFT;
534 bool upgrade_write = false; 681 bool upgrade_write = false;
535 bool *upgrade_p = &upgrade_write; 682 bool *upgrade_p = &upgrade_write;
536 pte_t pte, *ptep; 683 pte_t pte, *ptep;
537 unsigned long pgflags;
538 unsigned int shift, level; 684 unsigned int shift, level;
539 685 int ret;
540 /* Check for unusual errors */
541 if (dsisr & DSISR_UNSUPP_MMU) {
542 pr_err("KVM: Got unsupported MMU fault\n");
543 return -EFAULT;
544 }
545 if (dsisr & DSISR_BADACCESS) {
546 /* Reflect to the guest as DSI */
547 pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
548 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
549 return RESUME_GUEST;
550 }
551
552 /* Translate the logical address and get the page */
553 gpa = vcpu->arch.fault_gpa & ~0xfffUL;
554 gpa &= ~0xF000000000000000ul;
555 gfn = gpa >> PAGE_SHIFT;
556 if (!(dsisr & DSISR_PRTABLE_FAULT))
557 gpa |= ea & 0xfff;
558 memslot = gfn_to_memslot(kvm, gfn);
559
560 /* No memslot means it's an emulated MMIO region */
561 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
562 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
563 DSISR_SET_RC)) {
564 /*
565 * Bad address in guest page table tree, or other
566 * unusual error - reflect it to the guest as DSI.
567 */
568 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
569 return RESUME_GUEST;
570 }
571 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
572 dsisr & DSISR_ISSTORE);
573 }
574
575 writing = (dsisr & DSISR_ISSTORE) != 0;
576 if (memslot->flags & KVM_MEM_READONLY) {
577 if (writing) {
578 /* give the guest a DSI */
579 dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
580 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
581 return RESUME_GUEST;
582 }
583 upgrade_p = NULL;
584 }
585
586 if (dsisr & DSISR_SET_RC) {
587 /*
588 * Need to set an R or C bit in the 2nd-level tables;
589 * since we are just helping out the hardware here,
590 * it is sufficient to do what the hardware does.
591 */
592 pgflags = _PAGE_ACCESSED;
593 if (writing)
594 pgflags |= _PAGE_DIRTY;
595 /*
596 * We are walking the secondary page table here. We can do this
597 * without disabling irq.
598 */
599 spin_lock(&kvm->mmu_lock);
600 ptep = __find_linux_pte(kvm->arch.pgtable,
601 gpa, NULL, &shift);
602 if (ptep && pte_present(*ptep) &&
603 (!writing || pte_write(*ptep))) {
604 kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
605 gpa, shift);
606 dsisr &= ~DSISR_SET_RC;
607 }
608 spin_unlock(&kvm->mmu_lock);
609 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
610 DSISR_PROTFAULT | DSISR_SET_RC)))
611 return RESUME_GUEST;
612 }
613 686
614 /* used to check for invalidations in progress */ 687 /* used to check for invalidations in progress */
615 mmu_seq = kvm->mmu_notifier_seq; 688 mmu_seq = kvm->mmu_notifier_seq;
@@ -622,7 +695,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
622 * is that the page is writable. 695 * is that the page is writable.
623 */ 696 */
624 hva = gfn_to_hva_memslot(memslot, gfn); 697 hva = gfn_to_hva_memslot(memslot, gfn);
625 if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { 698 if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
626 upgrade_write = true; 699 upgrade_write = true;
627 } else { 700 } else {
628 unsigned long pfn; 701 unsigned long pfn;
@@ -690,7 +763,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
690 } 763 }
691 764
692 /* Allocate space in the tree and write the PTE */ 765 /* Allocate space in the tree and write the PTE */
693 ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); 766 ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
767 mmu_seq, kvm->arch.lpid, NULL, NULL);
768 if (inserted_pte)
769 *inserted_pte = pte;
770 if (levelp)
771 *levelp = level;
694 772
695 if (page) { 773 if (page) {
696 if (!ret && (pte_val(pte) & _PAGE_WRITE)) 774 if (!ret && (pte_val(pte) & _PAGE_WRITE))
@@ -698,6 +776,82 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
698 put_page(page); 776 put_page(page);
699 } 777 }
700 778
779 return ret;
780}
781
782int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
783 unsigned long ea, unsigned long dsisr)
784{
785 struct kvm *kvm = vcpu->kvm;
786 unsigned long gpa, gfn;
787 struct kvm_memory_slot *memslot;
788 long ret;
789 bool writing = !!(dsisr & DSISR_ISSTORE);
790 bool kvm_ro = false;
791
792 /* Check for unusual errors */
793 if (dsisr & DSISR_UNSUPP_MMU) {
794 pr_err("KVM: Got unsupported MMU fault\n");
795 return -EFAULT;
796 }
797 if (dsisr & DSISR_BADACCESS) {
798 /* Reflect to the guest as DSI */
799 pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
800 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
801 return RESUME_GUEST;
802 }
803
804 /* Translate the logical address */
805 gpa = vcpu->arch.fault_gpa & ~0xfffUL;
806 gpa &= ~0xF000000000000000ul;
807 gfn = gpa >> PAGE_SHIFT;
808 if (!(dsisr & DSISR_PRTABLE_FAULT))
809 gpa |= ea & 0xfff;
810
811 /* Get the corresponding memslot */
812 memslot = gfn_to_memslot(kvm, gfn);
813
814 /* No memslot means it's an emulated MMIO region */
815 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
816 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
817 DSISR_SET_RC)) {
818 /*
819 * Bad address in guest page table tree, or other
820 * unusual error - reflect it to the guest as DSI.
821 */
822 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
823 return RESUME_GUEST;
824 }
825 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
826 }
827
828 if (memslot->flags & KVM_MEM_READONLY) {
829 if (writing) {
830 /* give the guest a DSI */
831 kvmppc_core_queue_data_storage(vcpu, ea, DSISR_ISSTORE |
832 DSISR_PROTFAULT);
833 return RESUME_GUEST;
834 }
835 kvm_ro = true;
836 }
837
838 /* Failed to set the reference/change bits */
839 if (dsisr & DSISR_SET_RC) {
840 spin_lock(&kvm->mmu_lock);
841 if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable,
842 writing, gpa, kvm->arch.lpid))
843 dsisr &= ~DSISR_SET_RC;
844 spin_unlock(&kvm->mmu_lock);
845
846 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
847 DSISR_PROTFAULT | DSISR_SET_RC)))
848 return RESUME_GUEST;
849 }
850
851 /* Try to insert a pte */
852 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
853 kvm_ro, NULL, NULL);
854
701 if (ret == 0 || ret == -EAGAIN) 855 if (ret == 0 || ret == -EAGAIN)
702 ret = RESUME_GUEST; 856 ret = RESUME_GUEST;
703 return ret; 857 return ret;
@@ -710,20 +864,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
710 pte_t *ptep; 864 pte_t *ptep;
711 unsigned long gpa = gfn << PAGE_SHIFT; 865 unsigned long gpa = gfn << PAGE_SHIFT;
712 unsigned int shift; 866 unsigned int shift;
713 unsigned long old;
714 867
715 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); 868 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
716 if (ptep && pte_present(*ptep)) { 869 if (ptep && pte_present(*ptep))
717 old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0, 870 kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
718 gpa, shift); 871 kvm->arch.lpid);
719 kvmppc_radix_tlbie_page(kvm, gpa, shift);
720 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
721 unsigned long psize = PAGE_SIZE;
722 if (shift)
723 psize = 1ul << shift;
724 kvmppc_update_dirty_map(memslot, gfn, psize);
725 }
726 }
727 return 0; 872 return 0;
728} 873}
729 874
@@ -778,7 +923,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
778 ret = 1 << (shift - PAGE_SHIFT); 923 ret = 1 << (shift - PAGE_SHIFT);
779 kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, 924 kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
780 gpa, shift); 925 gpa, shift);
781 kvmppc_radix_tlbie_page(kvm, gpa, shift); 926 kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
782 } 927 }
783 return ret; 928 return ret;
784} 929}
@@ -863,6 +1008,215 @@ static void pmd_ctor(void *addr)
863 memset(addr, 0, RADIX_PMD_TABLE_SIZE); 1008 memset(addr, 0, RADIX_PMD_TABLE_SIZE);
864} 1009}
865 1010
1011struct debugfs_radix_state {
1012 struct kvm *kvm;
1013 struct mutex mutex;
1014 unsigned long gpa;
1015 int lpid;
1016 int chars_left;
1017 int buf_index;
1018 char buf[128];
1019 u8 hdr;
1020};
1021
1022static int debugfs_radix_open(struct inode *inode, struct file *file)
1023{
1024 struct kvm *kvm = inode->i_private;
1025 struct debugfs_radix_state *p;
1026
1027 p = kzalloc(sizeof(*p), GFP_KERNEL);
1028 if (!p)
1029 return -ENOMEM;
1030
1031 kvm_get_kvm(kvm);
1032 p->kvm = kvm;
1033 mutex_init(&p->mutex);
1034 file->private_data = p;
1035
1036 return nonseekable_open(inode, file);
1037}
1038
1039static int debugfs_radix_release(struct inode *inode, struct file *file)
1040{
1041 struct debugfs_radix_state *p = file->private_data;
1042
1043 kvm_put_kvm(p->kvm);
1044 kfree(p);
1045 return 0;
1046}
1047
1048static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
1049 size_t len, loff_t *ppos)
1050{
1051 struct debugfs_radix_state *p = file->private_data;
1052 ssize_t ret, r;
1053 unsigned long n;
1054 struct kvm *kvm;
1055 unsigned long gpa;
1056 pgd_t *pgt;
1057 struct kvm_nested_guest *nested;
1058 pgd_t pgd, *pgdp;
1059 pud_t pud, *pudp;
1060 pmd_t pmd, *pmdp;
1061 pte_t *ptep;
1062 int shift;
1063 unsigned long pte;
1064
1065 kvm = p->kvm;
1066 if (!kvm_is_radix(kvm))
1067 return 0;
1068
1069 ret = mutex_lock_interruptible(&p->mutex);
1070 if (ret)
1071 return ret;
1072
1073 if (p->chars_left) {
1074 n = p->chars_left;
1075 if (n > len)
1076 n = len;
1077 r = copy_to_user(buf, p->buf + p->buf_index, n);
1078 n -= r;
1079 p->chars_left -= n;
1080 p->buf_index += n;
1081 buf += n;
1082 len -= n;
1083 ret = n;
1084 if (r) {
1085 if (!n)
1086 ret = -EFAULT;
1087 goto out;
1088 }
1089 }
1090
1091 gpa = p->gpa;
1092 nested = NULL;
1093 pgt = NULL;
1094 while (len != 0 && p->lpid >= 0) {
1095 if (gpa >= RADIX_PGTABLE_RANGE) {
1096 gpa = 0;
1097 pgt = NULL;
1098 if (nested) {
1099 kvmhv_put_nested(nested);
1100 nested = NULL;
1101 }
1102 p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid);
1103 p->hdr = 0;
1104 if (p->lpid < 0)
1105 break;
1106 }
1107 if (!pgt) {
1108 if (p->lpid == 0) {
1109 pgt = kvm->arch.pgtable;
1110 } else {
1111 nested = kvmhv_get_nested(kvm, p->lpid, false);
1112 if (!nested) {
1113 gpa = RADIX_PGTABLE_RANGE;
1114 continue;
1115 }
1116 pgt = nested->shadow_pgtable;
1117 }
1118 }
1119 n = 0;
1120 if (!p->hdr) {
1121 if (p->lpid > 0)
1122 n = scnprintf(p->buf, sizeof(p->buf),
1123 "\nNested LPID %d: ", p->lpid);
1124 n += scnprintf(p->buf + n, sizeof(p->buf) - n,
1125 "pgdir: %lx\n", (unsigned long)pgt);
1126 p->hdr = 1;
1127 goto copy;
1128 }
1129
1130 pgdp = pgt + pgd_index(gpa);
1131 pgd = READ_ONCE(*pgdp);
1132 if (!(pgd_val(pgd) & _PAGE_PRESENT)) {
1133 gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE;
1134 continue;
1135 }
1136
1137 pudp = pud_offset(&pgd, gpa);
1138 pud = READ_ONCE(*pudp);
1139 if (!(pud_val(pud) & _PAGE_PRESENT)) {
1140 gpa = (gpa & PUD_MASK) + PUD_SIZE;
1141 continue;
1142 }
1143 if (pud_val(pud) & _PAGE_PTE) {
1144 pte = pud_val(pud);
1145 shift = PUD_SHIFT;
1146 goto leaf;
1147 }
1148
1149 pmdp = pmd_offset(&pud, gpa);
1150 pmd = READ_ONCE(*pmdp);
1151 if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
1152 gpa = (gpa & PMD_MASK) + PMD_SIZE;
1153 continue;
1154 }
1155 if (pmd_val(pmd) & _PAGE_PTE) {
1156 pte = pmd_val(pmd);
1157 shift = PMD_SHIFT;
1158 goto leaf;
1159 }
1160
1161 ptep = pte_offset_kernel(&pmd, gpa);
1162 pte = pte_val(READ_ONCE(*ptep));
1163 if (!(pte & _PAGE_PRESENT)) {
1164 gpa += PAGE_SIZE;
1165 continue;
1166 }
1167 shift = PAGE_SHIFT;
1168 leaf:
1169 n = scnprintf(p->buf, sizeof(p->buf),
1170 " %lx: %lx %d\n", gpa, pte, shift);
1171 gpa += 1ul << shift;
1172 copy:
1173 p->chars_left = n;
1174 if (n > len)
1175 n = len;
1176 r = copy_to_user(buf, p->buf, n);
1177 n -= r;
1178 p->chars_left -= n;
1179 p->buf_index = n;
1180 buf += n;
1181 len -= n;
1182 ret += n;
1183 if (r) {
1184 if (!ret)
1185 ret = -EFAULT;
1186 break;
1187 }
1188 }
1189 p->gpa = gpa;
1190 if (nested)
1191 kvmhv_put_nested(nested);
1192
1193 out:
1194 mutex_unlock(&p->mutex);
1195 return ret;
1196}
1197
1198static ssize_t debugfs_radix_write(struct file *file, const char __user *buf,
1199 size_t len, loff_t *ppos)
1200{
1201 return -EACCES;
1202}
1203
1204static const struct file_operations debugfs_radix_fops = {
1205 .owner = THIS_MODULE,
1206 .open = debugfs_radix_open,
1207 .release = debugfs_radix_release,
1208 .read = debugfs_radix_read,
1209 .write = debugfs_radix_write,
1210 .llseek = generic_file_llseek,
1211};
1212
1213void kvmhv_radix_debugfs_init(struct kvm *kvm)
1214{
1215 kvm->arch.radix_dentry = debugfs_create_file("radix", 0400,
1216 kvm->arch.debugfs_dir, kvm,
1217 &debugfs_radix_fops);
1218}
1219
866int kvmppc_radix_init(void) 1220int kvmppc_radix_init(void)
867{ 1221{
868 unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE; 1222 unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 9a3f2646ecc7..62a8d03ba7e9 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
363 return ret; 363 return ret;
364} 364}
365 365
366static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
367 unsigned long tce)
368{
369 unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
370 enum dma_data_direction dir = iommu_tce_direction(tce);
371 struct kvmppc_spapr_tce_iommu_table *stit;
372 unsigned long ua = 0;
373
374 /* Allow userspace to poison TCE table */
375 if (dir == DMA_NONE)
376 return H_SUCCESS;
377
378 if (iommu_tce_check_gpa(stt->page_shift, gpa))
379 return H_TOO_HARD;
380
381 if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
382 return H_TOO_HARD;
383
384 list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
385 unsigned long hpa = 0;
386 struct mm_iommu_table_group_mem_t *mem;
387 long shift = stit->tbl->it_page_shift;
388
389 mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
390 if (!mem)
391 return H_TOO_HARD;
392
393 if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
394 return H_TOO_HARD;
395 }
396
397 return H_SUCCESS;
398}
399
366static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) 400static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
367{ 401{
368 unsigned long hpa = 0; 402 unsigned long hpa = 0;
@@ -376,11 +410,10 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
376{ 410{
377 struct mm_iommu_table_group_mem_t *mem = NULL; 411 struct mm_iommu_table_group_mem_t *mem = NULL;
378 const unsigned long pgsize = 1ULL << tbl->it_page_shift; 412 const unsigned long pgsize = 1ULL << tbl->it_page_shift;
379 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); 413 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
380 414
381 if (!pua) 415 if (!pua)
382 /* it_userspace allocation might be delayed */ 416 return H_SUCCESS;
383 return H_TOO_HARD;
384 417
385 mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize); 418 mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
386 if (!mem) 419 if (!mem)
@@ -401,7 +434,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
401 long ret; 434 long ret;
402 435
403 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) 436 if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
404 return H_HARDWARE; 437 return H_TOO_HARD;
405 438
406 if (dir == DMA_NONE) 439 if (dir == DMA_NONE)
407 return H_SUCCESS; 440 return H_SUCCESS;
@@ -449,15 +482,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
449 return H_TOO_HARD; 482 return H_TOO_HARD;
450 483
451 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) 484 if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
452 return H_HARDWARE; 485 return H_TOO_HARD;
453 486
454 if (mm_iommu_mapped_inc(mem)) 487 if (mm_iommu_mapped_inc(mem))
455 return H_CLOSED; 488 return H_TOO_HARD;
456 489
457 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); 490 ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
458 if (WARN_ON_ONCE(ret)) { 491 if (WARN_ON_ONCE(ret)) {
459 mm_iommu_mapped_dec(mem); 492 mm_iommu_mapped_dec(mem);
460 return H_HARDWARE; 493 return H_TOO_HARD;
461 } 494 }
462 495
463 if (dir != DMA_NONE) 496 if (dir != DMA_NONE)
@@ -517,8 +550,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
517 550
518 idx = srcu_read_lock(&vcpu->kvm->srcu); 551 idx = srcu_read_lock(&vcpu->kvm->srcu);
519 552
520 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, 553 if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
521 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) {
522 ret = H_PARAMETER; 554 ret = H_PARAMETER;
523 goto unlock_exit; 555 goto unlock_exit;
524 } 556 }
@@ -533,14 +565,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
533 ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, 565 ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
534 entry, ua, dir); 566 entry, ua, dir);
535 567
536 if (ret == H_SUCCESS) 568 if (ret != H_SUCCESS) {
537 continue; 569 kvmppc_clear_tce(stit->tbl, entry);
538
539 if (ret == H_TOO_HARD)
540 goto unlock_exit; 570 goto unlock_exit;
541 571 }
542 WARN_ON_ONCE(1);
543 kvmppc_clear_tce(stit->tbl, entry);
544 } 572 }
545 573
546 kvmppc_tce_put(stt, entry, tce); 574 kvmppc_tce_put(stt, entry, tce);
@@ -583,7 +611,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
583 return ret; 611 return ret;
584 612
585 idx = srcu_read_lock(&vcpu->kvm->srcu); 613 idx = srcu_read_lock(&vcpu->kvm->srcu);
586 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { 614 if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
587 ret = H_TOO_HARD; 615 ret = H_TOO_HARD;
588 goto unlock_exit; 616 goto unlock_exit;
589 } 617 }
@@ -599,10 +627,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
599 ret = kvmppc_tce_validate(stt, tce); 627 ret = kvmppc_tce_validate(stt, tce);
600 if (ret != H_SUCCESS) 628 if (ret != H_SUCCESS)
601 goto unlock_exit; 629 goto unlock_exit;
630 }
631
632 for (i = 0; i < npages; ++i) {
633 /*
634 * This looks unsafe, because we validate, then regrab
635 * the TCE from userspace which could have been changed by
636 * another thread.
637 *
638 * But it actually is safe, because the relevant checks will be
639 * re-executed in the following code. If userspace tries to
640 * change this dodgily it will result in a messier failure mode
641 * but won't threaten the host.
642 */
643 if (get_user(tce, tces + i)) {
644 ret = H_TOO_HARD;
645 goto unlock_exit;
646 }
647 tce = be64_to_cpu(tce);
602 648
603 if (kvmppc_gpa_to_ua(vcpu->kvm, 649 if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
604 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
605 &ua, NULL))
606 return H_PARAMETER; 650 return H_PARAMETER;
607 651
608 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 652 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -610,14 +654,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
610 stit->tbl, entry + i, ua, 654 stit->tbl, entry + i, ua,
611 iommu_tce_direction(tce)); 655 iommu_tce_direction(tce));
612 656
613 if (ret == H_SUCCESS) 657 if (ret != H_SUCCESS) {
614 continue; 658 kvmppc_clear_tce(stit->tbl, entry);
615
616 if (ret == H_TOO_HARD)
617 goto unlock_exit; 659 goto unlock_exit;
618 660 }
619 WARN_ON_ONCE(1);
620 kvmppc_clear_tce(stit->tbl, entry);
621 } 661 }
622 662
623 kvmppc_tce_put(stt, entry + i, tce); 663 kvmppc_tce_put(stt, entry + i, tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 6821ead4b4eb..2206bc729b9a 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
87} 87}
88EXPORT_SYMBOL_GPL(kvmppc_find_table); 88EXPORT_SYMBOL_GPL(kvmppc_find_table);
89 89
90#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
90/* 91/*
91 * Validates TCE address. 92 * Validates TCE address.
92 * At the moment flags and page mask are validated. 93 * At the moment flags and page mask are validated.
@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table);
94 * to the table and user space is supposed to process them), we can skip 95 * to the table and user space is supposed to process them), we can skip
95 * checking other things (such as TCE is a guest RAM address or the page 96 * checking other things (such as TCE is a guest RAM address or the page
96 * was actually allocated). 97 * was actually allocated).
97 *
98 * WARNING: This will be called in real-mode on HV KVM and virtual
99 * mode on PR KVM
100 */ 98 */
101long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) 99static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
100 unsigned long tce)
102{ 101{
103 unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE); 102 unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
104 enum dma_data_direction dir = iommu_tce_direction(tce); 103 enum dma_data_direction dir = iommu_tce_direction(tce);
104 struct kvmppc_spapr_tce_iommu_table *stit;
105 unsigned long ua = 0;
105 106
106 /* Allow userspace to poison TCE table */ 107 /* Allow userspace to poison TCE table */
107 if (dir == DMA_NONE) 108 if (dir == DMA_NONE)
@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
110 if (iommu_tce_check_gpa(stt->page_shift, gpa)) 111 if (iommu_tce_check_gpa(stt->page_shift, gpa))
111 return H_PARAMETER; 112 return H_PARAMETER;
112 113
114 if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
115 return H_TOO_HARD;
116
117 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
118 unsigned long hpa = 0;
119 struct mm_iommu_table_group_mem_t *mem;
120 long shift = stit->tbl->it_page_shift;
121
122 mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
123 if (!mem)
124 return H_TOO_HARD;
125
126 if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
127 return H_TOO_HARD;
128 }
129
113 return H_SUCCESS; 130 return H_SUCCESS;
114} 131}
115EXPORT_SYMBOL_GPL(kvmppc_tce_validate); 132#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
116 133
117/* Note on the use of page_address() in real mode, 134/* Note on the use of page_address() in real mode,
118 * 135 *
@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
164} 181}
165EXPORT_SYMBOL_GPL(kvmppc_tce_put); 182EXPORT_SYMBOL_GPL(kvmppc_tce_put);
166 183
167long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, 184long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
168 unsigned long *ua, unsigned long **prmap) 185 unsigned long *ua, unsigned long **prmap)
169{ 186{
170 unsigned long gfn = gpa >> PAGE_SHIFT; 187 unsigned long gfn = tce >> PAGE_SHIFT;
171 struct kvm_memory_slot *memslot; 188 struct kvm_memory_slot *memslot;
172 189
173 memslot = search_memslots(kvm_memslots(kvm), gfn); 190 memslot = search_memslots(kvm_memslots(kvm), gfn);
@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
175 return -EINVAL; 192 return -EINVAL;
176 193
177 *ua = __gfn_to_hva_memslot(memslot, gfn) | 194 *ua = __gfn_to_hva_memslot(memslot, gfn) |
178 (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); 195 (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
179 196
180#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 197#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
181 if (prmap) 198 if (prmap)
@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
184 201
185 return 0; 202 return 0;
186} 203}
187EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); 204EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
188 205
189#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 206#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
190static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, 207static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
@@ -197,7 +214,7 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
197 214
198 if (!ret && ((*direction == DMA_FROM_DEVICE) || 215 if (!ret && ((*direction == DMA_FROM_DEVICE) ||
199 (*direction == DMA_BIDIRECTIONAL))) { 216 (*direction == DMA_BIDIRECTIONAL))) {
200 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); 217 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
201 /* 218 /*
202 * kvmppc_rm_tce_iommu_do_map() updates the UA cache after 219 * kvmppc_rm_tce_iommu_do_map() updates the UA cache after
203 * calling this so we still get here a valid UA. 220 * calling this so we still get here a valid UA.
@@ -223,7 +240,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
223{ 240{
224 struct mm_iommu_table_group_mem_t *mem = NULL; 241 struct mm_iommu_table_group_mem_t *mem = NULL;
225 const unsigned long pgsize = 1ULL << tbl->it_page_shift; 242 const unsigned long pgsize = 1ULL << tbl->it_page_shift;
226 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); 243 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
227 244
228 if (!pua) 245 if (!pua)
229 /* it_userspace allocation might be delayed */ 246 /* it_userspace allocation might be delayed */
@@ -287,7 +304,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
287{ 304{
288 long ret; 305 long ret;
289 unsigned long hpa = 0; 306 unsigned long hpa = 0;
290 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); 307 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
291 struct mm_iommu_table_group_mem_t *mem; 308 struct mm_iommu_table_group_mem_t *mem;
292 309
293 if (!pua) 310 if (!pua)
@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
300 317
301 if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, 318 if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
302 &hpa))) 319 &hpa)))
303 return H_HARDWARE; 320 return H_TOO_HARD;
304 321
305 if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) 322 if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
306 return H_CLOSED; 323 return H_TOO_HARD;
307 324
308 ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); 325 ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
309 if (ret) { 326 if (ret) {
@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
368 if (ret != H_SUCCESS) 385 if (ret != H_SUCCESS)
369 return ret; 386 return ret;
370 387
371 ret = kvmppc_tce_validate(stt, tce); 388 ret = kvmppc_rm_tce_validate(stt, tce);
372 if (ret != H_SUCCESS) 389 if (ret != H_SUCCESS)
373 return ret; 390 return ret;
374 391
375 dir = iommu_tce_direction(tce); 392 dir = iommu_tce_direction(tce);
376 if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, 393 if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
377 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
378 return H_PARAMETER; 394 return H_PARAMETER;
379 395
380 entry = ioba >> stt->page_shift; 396 entry = ioba >> stt->page_shift;
@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
387 ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, 403 ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
388 stit->tbl, entry, ua, dir); 404 stit->tbl, entry, ua, dir);
389 405
390 if (ret == H_SUCCESS) 406 if (ret != H_SUCCESS) {
391 continue; 407 kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
392
393 if (ret == H_TOO_HARD)
394 return ret; 408 return ret;
395 409 }
396 WARN_ON_ONCE_RM(1);
397 kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
398 } 410 }
399 411
400 kvmppc_tce_put(stt, entry, tce); 412 kvmppc_tce_put(stt, entry, tce);
@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
480 */ 492 */
481 struct mm_iommu_table_group_mem_t *mem; 493 struct mm_iommu_table_group_mem_t *mem;
482 494
483 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) 495 if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
484 return H_TOO_HARD; 496 return H_TOO_HARD;
485 497
486 mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); 498 mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
496 * We do not require memory to be preregistered in this case 508 * We do not require memory to be preregistered in this case
497 * so lock rmap and do __find_linux_pte_or_hugepte(). 509 * so lock rmap and do __find_linux_pte_or_hugepte().
498 */ 510 */
499 if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) 511 if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
500 return H_TOO_HARD; 512 return H_TOO_HARD;
501 513
502 rmap = (void *) vmalloc_to_phys(rmap); 514 rmap = (void *) vmalloc_to_phys(rmap);
503 if (WARN_ON_ONCE_RM(!rmap)) 515 if (WARN_ON_ONCE_RM(!rmap))
504 return H_HARDWARE; 516 return H_TOO_HARD;
505 517
506 /* 518 /*
507 * Synchronize with the MMU notifier callbacks in 519 * Synchronize with the MMU notifier callbacks in
@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
521 for (i = 0; i < npages; ++i) { 533 for (i = 0; i < npages; ++i) {
522 unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); 534 unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
523 535
524 ret = kvmppc_tce_validate(stt, tce); 536 ret = kvmppc_rm_tce_validate(stt, tce);
525 if (ret != H_SUCCESS) 537 if (ret != H_SUCCESS)
526 goto unlock_exit; 538 goto unlock_exit;
539 }
540
541 for (i = 0; i < npages; ++i) {
542 unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
527 543
528 ua = 0; 544 ua = 0;
529 if (kvmppc_gpa_to_ua(vcpu->kvm, 545 if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
530 tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
531 &ua, NULL))
532 return H_PARAMETER; 546 return H_PARAMETER;
533 547
534 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { 548 list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
536 stit->tbl, entry + i, ua, 550 stit->tbl, entry + i, ua,
537 iommu_tce_direction(tce)); 551 iommu_tce_direction(tce));
538 552
539 if (ret == H_SUCCESS) 553 if (ret != H_SUCCESS) {
540 continue; 554 kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
541 555 entry);
542 if (ret == H_TOO_HARD)
543 goto unlock_exit; 556 goto unlock_exit;
544 557 }
545 WARN_ON_ONCE_RM(1);
546 kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
547 } 558 }
548 559
549 kvmppc_tce_put(stt, entry + i, tce); 560 kvmppc_tce_put(stt, entry + i, tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 36b11c5a0dbb..8c7e933e942e 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -36,7 +36,6 @@
36#define OP_31_XOP_MTSR 210 36#define OP_31_XOP_MTSR 210
37#define OP_31_XOP_MTSRIN 242 37#define OP_31_XOP_MTSRIN 242
38#define OP_31_XOP_TLBIEL 274 38#define OP_31_XOP_TLBIEL 274
39#define OP_31_XOP_TLBIE 306
40/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ 39/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
41#define OP_31_XOP_FAKE_SC1 308 40#define OP_31_XOP_FAKE_SC1 308
42#define OP_31_XOP_SLBMTE 402 41#define OP_31_XOP_SLBMTE 402
@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
110 vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; 109 vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
111 vcpu->arch.tar_tm = vcpu->arch.tar; 110 vcpu->arch.tar_tm = vcpu->arch.tar;
112 vcpu->arch.lr_tm = vcpu->arch.regs.link; 111 vcpu->arch.lr_tm = vcpu->arch.regs.link;
113 vcpu->arch.cr_tm = vcpu->arch.cr; 112 vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
114 vcpu->arch.xer_tm = vcpu->arch.regs.xer; 113 vcpu->arch.xer_tm = vcpu->arch.regs.xer;
115 vcpu->arch.vrsave_tm = vcpu->arch.vrsave; 114 vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
116} 115}
@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
129 vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; 128 vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
130 vcpu->arch.tar = vcpu->arch.tar_tm; 129 vcpu->arch.tar = vcpu->arch.tar_tm;
131 vcpu->arch.regs.link = vcpu->arch.lr_tm; 130 vcpu->arch.regs.link = vcpu->arch.lr_tm;
132 vcpu->arch.cr = vcpu->arch.cr_tm; 131 vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
133 vcpu->arch.regs.xer = vcpu->arch.xer_tm; 132 vcpu->arch.regs.xer = vcpu->arch.xer_tm;
134 vcpu->arch.vrsave = vcpu->arch.vrsave_tm; 133 vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
135} 134}
@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
141 uint64_t texasr; 140 uint64_t texasr;
142 141
143 /* CR0 = 0 | MSR[TS] | 0 */ 142 /* CR0 = 0 | MSR[TS] | 0 */
144 vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | 143 vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
145 (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) 144 (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
146 << CR0_SHIFT); 145 << CR0_SHIFT);
147 146
@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
220 tm_abort(ra_val); 219 tm_abort(ra_val);
221 220
222 /* CR0 = 0 | MSR[TS] | 0 */ 221 /* CR0 = 0 | MSR[TS] | 0 */
223 vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | 222 vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
224 (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) 223 (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
225 << CR0_SHIFT); 224 << CR0_SHIFT);
226 225
@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
494 493
495 if (!(kvmppc_get_msr(vcpu) & MSR_PR)) { 494 if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
496 preempt_disable(); 495 preempt_disable();
497 vcpu->arch.cr = (CR0_TBEGIN_FAILURE | 496 vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
498 (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT))); 497 (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
499 498
500 vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT | 499 vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
501 (((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) 500 (((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3e3a71594e63..bf8def2159c3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -50,6 +50,7 @@
50#include <asm/reg.h> 50#include <asm/reg.h>
51#include <asm/ppc-opcode.h> 51#include <asm/ppc-opcode.h>
52#include <asm/asm-prototypes.h> 52#include <asm/asm-prototypes.h>
53#include <asm/archrandom.h>
53#include <asm/debug.h> 54#include <asm/debug.h>
54#include <asm/disassemble.h> 55#include <asm/disassemble.h>
55#include <asm/cputable.h> 56#include <asm/cputable.h>
@@ -104,6 +105,10 @@ static bool indep_threads_mode = true;
104module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR); 105module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
105MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)"); 106MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
106 107
108static bool one_vm_per_core;
109module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
110MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
111
107#ifdef CONFIG_KVM_XICS 112#ifdef CONFIG_KVM_XICS
108static struct kernel_param_ops module_param_ops = { 113static struct kernel_param_ops module_param_ops = {
109 .set = param_set_int, 114 .set = param_set_int,
@@ -117,6 +122,16 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
117MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); 122MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
118#endif 123#endif
119 124
125/* If set, guests are allowed to create and control nested guests */
126static bool nested = true;
127module_param(nested, bool, S_IRUGO | S_IWUSR);
128MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
129
130static inline bool nesting_enabled(struct kvm *kvm)
131{
132 return kvm->arch.nested_enable && kvm_is_radix(kvm);
133}
134
120/* If set, the threads on each CPU core have to be in the same MMU mode */ 135/* If set, the threads on each CPU core have to be in the same MMU mode */
121static bool no_mixing_hpt_and_radix; 136static bool no_mixing_hpt_and_radix;
122 137
@@ -173,6 +188,10 @@ static bool kvmppc_ipi_thread(int cpu)
173{ 188{
174 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); 189 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
175 190
191 /* If we're a nested hypervisor, fall back to ordinary IPIs for now */
192 if (kvmhv_on_pseries())
193 return false;
194
176 /* On POWER9 we can use msgsnd to IPI any cpu */ 195 /* On POWER9 we can use msgsnd to IPI any cpu */
177 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 196 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
178 msg |= get_hard_smp_processor_id(cpu); 197 msg |= get_hard_smp_processor_id(cpu);
@@ -410,8 +429,8 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
410 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1); 429 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
411 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", 430 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
412 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); 431 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
413 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", 432 pr_err("cr = %.8lx xer = %.16lx dsisr = %.8x\n",
414 vcpu->arch.cr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr); 433 vcpu->arch.regs.ccr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
415 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); 434 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
416 pr_err("fault dar = %.16lx dsisr = %.8x\n", 435 pr_err("fault dar = %.16lx dsisr = %.8x\n",
417 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 436 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
@@ -730,8 +749,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
730 /* 749 /*
731 * Ensure that the read of vcore->dpdes comes after the read 750 * Ensure that the read of vcore->dpdes comes after the read
732 * of vcpu->doorbell_request. This barrier matches the 751 * of vcpu->doorbell_request. This barrier matches the
733 * lwsync in book3s_hv_rmhandlers.S just before the 752 * smb_wmb() in kvmppc_guest_entry_inject().
734 * fast_guest_return label.
735 */ 753 */
736 smp_rmb(); 754 smp_rmb();
737 vc = vcpu->arch.vcore; 755 vc = vcpu->arch.vcore;
@@ -912,6 +930,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
912 break; 930 break;
913 } 931 }
914 return RESUME_HOST; 932 return RESUME_HOST;
933 case H_SET_DABR:
934 ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4));
935 break;
936 case H_SET_XDABR:
937 ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
938 kvmppc_get_gpr(vcpu, 5));
939 break;
940 case H_GET_TCE:
941 ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
942 kvmppc_get_gpr(vcpu, 5));
943 if (ret == H_TOO_HARD)
944 return RESUME_HOST;
945 break;
915 case H_PUT_TCE: 946 case H_PUT_TCE:
916 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4), 947 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
917 kvmppc_get_gpr(vcpu, 5), 948 kvmppc_get_gpr(vcpu, 5),
@@ -935,6 +966,32 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
935 if (ret == H_TOO_HARD) 966 if (ret == H_TOO_HARD)
936 return RESUME_HOST; 967 return RESUME_HOST;
937 break; 968 break;
969 case H_RANDOM:
970 if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
971 ret = H_HARDWARE;
972 break;
973
974 case H_SET_PARTITION_TABLE:
975 ret = H_FUNCTION;
976 if (nesting_enabled(vcpu->kvm))
977 ret = kvmhv_set_partition_table(vcpu);
978 break;
979 case H_ENTER_NESTED:
980 ret = H_FUNCTION;
981 if (!nesting_enabled(vcpu->kvm))
982 break;
983 ret = kvmhv_enter_nested_guest(vcpu);
984 if (ret == H_INTERRUPT) {
985 kvmppc_set_gpr(vcpu, 3, 0);
986 return -EINTR;
987 }
988 break;
989 case H_TLB_INVALIDATE:
990 ret = H_FUNCTION;
991 if (nesting_enabled(vcpu->kvm))
992 ret = kvmhv_do_nested_tlbie(vcpu);
993 break;
994
938 default: 995 default:
939 return RESUME_HOST; 996 return RESUME_HOST;
940 } 997 }
@@ -943,6 +1000,24 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
943 return RESUME_GUEST; 1000 return RESUME_GUEST;
944} 1001}
945 1002
1003/*
1004 * Handle H_CEDE in the nested virtualization case where we haven't
1005 * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
1006 * This has to be done early, not in kvmppc_pseries_do_hcall(), so
1007 * that the cede logic in kvmppc_run_single_vcpu() works properly.
1008 */
1009static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
1010{
1011 vcpu->arch.shregs.msr |= MSR_EE;
1012 vcpu->arch.ceded = 1;
1013 smp_mb();
1014 if (vcpu->arch.prodded) {
1015 vcpu->arch.prodded = 0;
1016 smp_mb();
1017 vcpu->arch.ceded = 0;
1018 }
1019}
1020
946static int kvmppc_hcall_impl_hv(unsigned long cmd) 1021static int kvmppc_hcall_impl_hv(unsigned long cmd)
947{ 1022{
948 switch (cmd) { 1023 switch (cmd) {
@@ -1085,7 +1160,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
1085 return RESUME_GUEST; 1160 return RESUME_GUEST;
1086} 1161}
1087 1162
1088/* Called with vcpu->arch.vcore->lock held */
1089static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 1163static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1090 struct task_struct *tsk) 1164 struct task_struct *tsk)
1091{ 1165{
@@ -1190,7 +1264,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1190 break; 1264 break;
1191 case BOOK3S_INTERRUPT_H_INST_STORAGE: 1265 case BOOK3S_INTERRUPT_H_INST_STORAGE:
1192 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); 1266 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1193 vcpu->arch.fault_dsisr = 0; 1267 vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
1268 DSISR_SRR1_MATCH_64S;
1269 if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
1270 vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
1194 r = RESUME_PAGE_FAULT; 1271 r = RESUME_PAGE_FAULT;
1195 break; 1272 break;
1196 /* 1273 /*
@@ -1206,10 +1283,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1206 swab32(vcpu->arch.emul_inst) : 1283 swab32(vcpu->arch.emul_inst) :
1207 vcpu->arch.emul_inst; 1284 vcpu->arch.emul_inst;
1208 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { 1285 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
1209 /* Need vcore unlocked to call kvmppc_get_last_inst */
1210 spin_unlock(&vcpu->arch.vcore->lock);
1211 r = kvmppc_emulate_debug_inst(run, vcpu); 1286 r = kvmppc_emulate_debug_inst(run, vcpu);
1212 spin_lock(&vcpu->arch.vcore->lock);
1213 } else { 1287 } else {
1214 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 1288 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1215 r = RESUME_GUEST; 1289 r = RESUME_GUEST;
@@ -1225,12 +1299,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1225 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: 1299 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
1226 r = EMULATE_FAIL; 1300 r = EMULATE_FAIL;
1227 if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && 1301 if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
1228 cpu_has_feature(CPU_FTR_ARCH_300)) { 1302 cpu_has_feature(CPU_FTR_ARCH_300))
1229 /* Need vcore unlocked to call kvmppc_get_last_inst */
1230 spin_unlock(&vcpu->arch.vcore->lock);
1231 r = kvmppc_emulate_doorbell_instr(vcpu); 1303 r = kvmppc_emulate_doorbell_instr(vcpu);
1232 spin_lock(&vcpu->arch.vcore->lock);
1233 }
1234 if (r == EMULATE_FAIL) { 1304 if (r == EMULATE_FAIL) {
1235 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 1305 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1236 r = RESUME_GUEST; 1306 r = RESUME_GUEST;
@@ -1265,6 +1335,104 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1265 return r; 1335 return r;
1266} 1336}
1267 1337
1338static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
1339{
1340 int r;
1341 int srcu_idx;
1342
1343 vcpu->stat.sum_exits++;
1344
1345 /*
1346 * This can happen if an interrupt occurs in the last stages
1347 * of guest entry or the first stages of guest exit (i.e. after
1348 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
1349 * and before setting it to KVM_GUEST_MODE_HOST_HV).
1350 * That can happen due to a bug, or due to a machine check
1351 * occurring at just the wrong time.
1352 */
1353 if (vcpu->arch.shregs.msr & MSR_HV) {
1354 pr_emerg("KVM trap in HV mode while nested!\n");
1355 pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1356 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1357 vcpu->arch.shregs.msr);
1358 kvmppc_dump_regs(vcpu);
1359 return RESUME_HOST;
1360 }
1361 switch (vcpu->arch.trap) {
1362 /* We're good on these - the host merely wanted to get our attention */
1363 case BOOK3S_INTERRUPT_HV_DECREMENTER:
1364 vcpu->stat.dec_exits++;
1365 r = RESUME_GUEST;
1366 break;
1367 case BOOK3S_INTERRUPT_EXTERNAL:
1368 vcpu->stat.ext_intr_exits++;
1369 r = RESUME_HOST;
1370 break;
1371 case BOOK3S_INTERRUPT_H_DOORBELL:
1372 case BOOK3S_INTERRUPT_H_VIRT:
1373 vcpu->stat.ext_intr_exits++;
1374 r = RESUME_GUEST;
1375 break;
1376 /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
1377 case BOOK3S_INTERRUPT_HMI:
1378 case BOOK3S_INTERRUPT_PERFMON:
1379 case BOOK3S_INTERRUPT_SYSTEM_RESET:
1380 r = RESUME_GUEST;
1381 break;
1382 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1383 /* Pass the machine check to the L1 guest */
1384 r = RESUME_HOST;
1385 /* Print the MCE event to host console. */
1386 machine_check_print_event_info(&vcpu->arch.mce_evt, false);
1387 break;
1388 /*
1389 * We get these next two if the guest accesses a page which it thinks
1390 * it has mapped but which is not actually present, either because
1391 * it is for an emulated I/O device or because the corresonding
1392 * host page has been paged out.
1393 */
1394 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
1395 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1396 r = kvmhv_nested_page_fault(vcpu);
1397 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1398 break;
1399 case BOOK3S_INTERRUPT_H_INST_STORAGE:
1400 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1401 vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
1402 DSISR_SRR1_MATCH_64S;
1403 if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
1404 vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
1405 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1406 r = kvmhv_nested_page_fault(vcpu);
1407 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1408 break;
1409
1410#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1411 case BOOK3S_INTERRUPT_HV_SOFTPATCH:
1412 /*
1413 * This occurs for various TM-related instructions that
1414 * we need to emulate on POWER9 DD2.2. We have already
1415 * handled the cases where the guest was in real-suspend
1416 * mode and was transitioning to transactional state.
1417 */
1418 r = kvmhv_p9_tm_emulation(vcpu);
1419 break;
1420#endif
1421
1422 case BOOK3S_INTERRUPT_HV_RM_HARD:
1423 vcpu->arch.trap = 0;
1424 r = RESUME_GUEST;
1425 if (!xive_enabled())
1426 kvmppc_xics_rm_complete(vcpu, 0);
1427 break;
1428 default:
1429 r = RESUME_HOST;
1430 break;
1431 }
1432
1433 return r;
1434}
1435
1268static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu, 1436static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
1269 struct kvm_sregs *sregs) 1437 struct kvm_sregs *sregs)
1270{ 1438{
@@ -1555,6 +1723,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1555 case KVM_REG_PPC_ONLINE: 1723 case KVM_REG_PPC_ONLINE:
1556 *val = get_reg_val(id, vcpu->arch.online); 1724 *val = get_reg_val(id, vcpu->arch.online);
1557 break; 1725 break;
1726 case KVM_REG_PPC_PTCR:
1727 *val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
1728 break;
1558 default: 1729 default:
1559 r = -EINVAL; 1730 r = -EINVAL;
1560 break; 1731 break;
@@ -1786,6 +1957,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1786 atomic_dec(&vcpu->arch.vcore->online_count); 1957 atomic_dec(&vcpu->arch.vcore->online_count);
1787 vcpu->arch.online = i; 1958 vcpu->arch.online = i;
1788 break; 1959 break;
1960 case KVM_REG_PPC_PTCR:
1961 vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
1962 break;
1789 default: 1963 default:
1790 r = -EINVAL; 1964 r = -EINVAL;
1791 break; 1965 break;
@@ -2019,15 +2193,18 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
2019 * Set the default HFSCR for the guest from the host value. 2193 * Set the default HFSCR for the guest from the host value.
2020 * This value is only used on POWER9. 2194 * This value is only used on POWER9.
2021 * On POWER9, we want to virtualize the doorbell facility, so we 2195 * On POWER9, we want to virtualize the doorbell facility, so we
2022 * turn off the HFSCR bit, which causes those instructions to trap. 2196 * don't set the HFSCR_MSGP bit, and that causes those instructions
2197 * to trap and then we emulate them.
2023 */ 2198 */
2024 vcpu->arch.hfscr = mfspr(SPRN_HFSCR); 2199 vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
2025 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) 2200 HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
2201 if (cpu_has_feature(CPU_FTR_HVMODE)) {
2202 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
2203 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
2204 vcpu->arch.hfscr |= HFSCR_TM;
2205 }
2206 if (cpu_has_feature(CPU_FTR_TM_COMP))
2026 vcpu->arch.hfscr |= HFSCR_TM; 2207 vcpu->arch.hfscr |= HFSCR_TM;
2027 else if (!cpu_has_feature(CPU_FTR_TM_COMP))
2028 vcpu->arch.hfscr &= ~HFSCR_TM;
2029 if (cpu_has_feature(CPU_FTR_ARCH_300))
2030 vcpu->arch.hfscr &= ~HFSCR_MSGP;
2031 2208
2032 kvmppc_mmu_book3s_hv_init(vcpu); 2209 kvmppc_mmu_book3s_hv_init(vcpu);
2033 2210
@@ -2242,10 +2419,18 @@ static void kvmppc_release_hwthread(int cpu)
2242 2419
2243static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) 2420static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
2244{ 2421{
2422 struct kvm_nested_guest *nested = vcpu->arch.nested;
2423 cpumask_t *cpu_in_guest;
2245 int i; 2424 int i;
2246 2425
2247 cpu = cpu_first_thread_sibling(cpu); 2426 cpu = cpu_first_thread_sibling(cpu);
2248 cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); 2427 if (nested) {
2428 cpumask_set_cpu(cpu, &nested->need_tlb_flush);
2429 cpu_in_guest = &nested->cpu_in_guest;
2430 } else {
2431 cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
2432 cpu_in_guest = &kvm->arch.cpu_in_guest;
2433 }
2249 /* 2434 /*
2250 * Make sure setting of bit in need_tlb_flush precedes 2435 * Make sure setting of bit in need_tlb_flush precedes
2251 * testing of cpu_in_guest bits. The matching barrier on 2436 * testing of cpu_in_guest bits. The matching barrier on
@@ -2253,13 +2438,23 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
2253 */ 2438 */
2254 smp_mb(); 2439 smp_mb();
2255 for (i = 0; i < threads_per_core; ++i) 2440 for (i = 0; i < threads_per_core; ++i)
2256 if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest)) 2441 if (cpumask_test_cpu(cpu + i, cpu_in_guest))
2257 smp_call_function_single(cpu + i, do_nothing, NULL, 1); 2442 smp_call_function_single(cpu + i, do_nothing, NULL, 1);
2258} 2443}
2259 2444
2260static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) 2445static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
2261{ 2446{
2447 struct kvm_nested_guest *nested = vcpu->arch.nested;
2262 struct kvm *kvm = vcpu->kvm; 2448 struct kvm *kvm = vcpu->kvm;
2449 int prev_cpu;
2450
2451 if (!cpu_has_feature(CPU_FTR_HVMODE))
2452 return;
2453
2454 if (nested)
2455 prev_cpu = nested->prev_cpu[vcpu->arch.nested_vcpu_id];
2456 else
2457 prev_cpu = vcpu->arch.prev_cpu;
2263 2458
2264 /* 2459 /*
2265 * With radix, the guest can do TLB invalidations itself, 2460 * With radix, the guest can do TLB invalidations itself,
@@ -2273,12 +2468,46 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
2273 * ran to flush the TLB. The TLB is shared between threads, 2468 * ran to flush the TLB. The TLB is shared between threads,
2274 * so we use a single bit in .need_tlb_flush for all 4 threads. 2469 * so we use a single bit in .need_tlb_flush for all 4 threads.
2275 */ 2470 */
2276 if (vcpu->arch.prev_cpu != pcpu) { 2471 if (prev_cpu != pcpu) {
2277 if (vcpu->arch.prev_cpu >= 0 && 2472 if (prev_cpu >= 0 &&
2278 cpu_first_thread_sibling(vcpu->arch.prev_cpu) != 2473 cpu_first_thread_sibling(prev_cpu) !=
2279 cpu_first_thread_sibling(pcpu)) 2474 cpu_first_thread_sibling(pcpu))
2280 radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); 2475 radix_flush_cpu(kvm, prev_cpu, vcpu);
2281 vcpu->arch.prev_cpu = pcpu; 2476 if (nested)
2477 nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
2478 else
2479 vcpu->arch.prev_cpu = pcpu;
2480 }
2481}
2482
2483static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
2484 struct kvm_nested_guest *nested)
2485{
2486 cpumask_t *need_tlb_flush;
2487 int lpid;
2488
2489 if (!cpu_has_feature(CPU_FTR_HVMODE))
2490 return;
2491
2492 if (cpu_has_feature(CPU_FTR_ARCH_300))
2493 pcpu &= ~0x3UL;
2494
2495 if (nested) {
2496 lpid = nested->shadow_lpid;
2497 need_tlb_flush = &nested->need_tlb_flush;
2498 } else {
2499 lpid = kvm->arch.lpid;
2500 need_tlb_flush = &kvm->arch.need_tlb_flush;
2501 }
2502
2503 mtspr(SPRN_LPID, lpid);
2504 isync();
2505 smp_mb();
2506
2507 if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
2508 radix__local_flush_tlb_lpid_guest(lpid);
2509 /* Clear the bit after the TLB flush */
2510 cpumask_clear_cpu(pcpu, need_tlb_flush);
2282 } 2511 }
2283} 2512}
2284 2513
@@ -2493,6 +2722,10 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2493 if (!cpu_has_feature(CPU_FTR_ARCH_207S)) 2722 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2494 return false; 2723 return false;
2495 2724
2725 /* In one_vm_per_core mode, require all vcores to be from the same vm */
2726 if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
2727 return false;
2728
2496 /* Some POWER9 chips require all threads to be in the same MMU mode */ 2729 /* Some POWER9 chips require all threads to be in the same MMU mode */
2497 if (no_mixing_hpt_and_radix && 2730 if (no_mixing_hpt_and_radix &&
2498 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) 2731 kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
@@ -2600,6 +2833,14 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2600 spin_lock(&vc->lock); 2833 spin_lock(&vc->lock);
2601 now = get_tb(); 2834 now = get_tb();
2602 for_each_runnable_thread(i, vcpu, vc) { 2835 for_each_runnable_thread(i, vcpu, vc) {
2836 /*
2837 * It's safe to unlock the vcore in the loop here, because
2838 * for_each_runnable_thread() is safe against removal of
2839 * the vcpu, and the vcore state is VCORE_EXITING here,
2840 * so any vcpus becoming runnable will have their arch.trap
2841 * set to zero and can't actually run in the guest.
2842 */
2843 spin_unlock(&vc->lock);
2603 /* cancel pending dec exception if dec is positive */ 2844 /* cancel pending dec exception if dec is positive */
2604 if (now < vcpu->arch.dec_expires && 2845 if (now < vcpu->arch.dec_expires &&
2605 kvmppc_core_pending_dec(vcpu)) 2846 kvmppc_core_pending_dec(vcpu))
@@ -2615,6 +2856,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2615 vcpu->arch.ret = ret; 2856 vcpu->arch.ret = ret;
2616 vcpu->arch.trap = 0; 2857 vcpu->arch.trap = 0;
2617 2858
2859 spin_lock(&vc->lock);
2618 if (is_kvmppc_resume_guest(vcpu->arch.ret)) { 2860 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
2619 if (vcpu->arch.pending_exceptions) 2861 if (vcpu->arch.pending_exceptions)
2620 kvmppc_core_prepare_to_enter(vcpu); 2862 kvmppc_core_prepare_to_enter(vcpu);
@@ -2963,8 +3205,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2963 spin_unlock(&core_info.vc[sub]->lock); 3205 spin_unlock(&core_info.vc[sub]->lock);
2964 3206
2965 if (kvm_is_radix(vc->kvm)) { 3207 if (kvm_is_radix(vc->kvm)) {
2966 int tmp = pcpu;
2967
2968 /* 3208 /*
2969 * Do we need to flush the process scoped TLB for the LPAR? 3209 * Do we need to flush the process scoped TLB for the LPAR?
2970 * 3210 *
@@ -2975,17 +3215,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2975 * 3215 *
2976 * Hash must be flushed in realmode in order to use tlbiel. 3216 * Hash must be flushed in realmode in order to use tlbiel.
2977 */ 3217 */
2978 mtspr(SPRN_LPID, vc->kvm->arch.lpid); 3218 kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL);
2979 isync();
2980
2981 if (cpu_has_feature(CPU_FTR_ARCH_300))
2982 tmp &= ~0x3UL;
2983
2984 if (cpumask_test_cpu(tmp, &vc->kvm->arch.need_tlb_flush)) {
2985 radix__local_flush_tlb_lpid_guest(vc->kvm->arch.lpid);
2986 /* Clear the bit after the TLB flush */
2987 cpumask_clear_cpu(tmp, &vc->kvm->arch.need_tlb_flush);
2988 }
2989 } 3219 }
2990 3220
2991 /* 3221 /*
@@ -3080,6 +3310,300 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
3080} 3310}
3081 3311
3082/* 3312/*
3313 * Load up hypervisor-mode registers on P9.
3314 */
3315static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
3316 unsigned long lpcr)
3317{
3318 struct kvmppc_vcore *vc = vcpu->arch.vcore;
3319 s64 hdec;
3320 u64 tb, purr, spurr;
3321 int trap;
3322 unsigned long host_hfscr = mfspr(SPRN_HFSCR);
3323 unsigned long host_ciabr = mfspr(SPRN_CIABR);
3324 unsigned long host_dawr = mfspr(SPRN_DAWR);
3325 unsigned long host_dawrx = mfspr(SPRN_DAWRX);
3326 unsigned long host_psscr = mfspr(SPRN_PSSCR);
3327 unsigned long host_pidr = mfspr(SPRN_PID);
3328
3329 hdec = time_limit - mftb();
3330 if (hdec < 0)
3331 return BOOK3S_INTERRUPT_HV_DECREMENTER;
3332 mtspr(SPRN_HDEC, hdec);
3333
3334 if (vc->tb_offset) {
3335 u64 new_tb = mftb() + vc->tb_offset;
3336 mtspr(SPRN_TBU40, new_tb);
3337 tb = mftb();
3338 if ((tb & 0xffffff) < (new_tb & 0xffffff))
3339 mtspr(SPRN_TBU40, new_tb + 0x1000000);
3340 vc->tb_offset_applied = vc->tb_offset;
3341 }
3342
3343 if (vc->pcr)
3344 mtspr(SPRN_PCR, vc->pcr);
3345 mtspr(SPRN_DPDES, vc->dpdes);
3346 mtspr(SPRN_VTB, vc->vtb);
3347
3348 local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
3349 local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
3350 mtspr(SPRN_PURR, vcpu->arch.purr);
3351 mtspr(SPRN_SPURR, vcpu->arch.spurr);
3352
3353 if (cpu_has_feature(CPU_FTR_DAWR)) {
3354 mtspr(SPRN_DAWR, vcpu->arch.dawr);
3355 mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
3356 }
3357 mtspr(SPRN_CIABR, vcpu->arch.ciabr);
3358 mtspr(SPRN_IC, vcpu->arch.ic);
3359 mtspr(SPRN_PID, vcpu->arch.pid);
3360
3361 mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
3362 (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
3363
3364 mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
3365
3366 mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
3367 mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
3368 mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
3369 mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
3370
3371 mtspr(SPRN_AMOR, ~0UL);
3372
3373 mtspr(SPRN_LPCR, lpcr);
3374 isync();
3375
3376 kvmppc_xive_push_vcpu(vcpu);
3377
3378 mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
3379 mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
3380
3381 trap = __kvmhv_vcpu_entry_p9(vcpu);
3382
3383 /* Advance host PURR/SPURR by the amount used by guest */
3384 purr = mfspr(SPRN_PURR);
3385 spurr = mfspr(SPRN_SPURR);
3386 mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
3387 purr - vcpu->arch.purr);
3388 mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
3389 spurr - vcpu->arch.spurr);
3390 vcpu->arch.purr = purr;
3391 vcpu->arch.spurr = spurr;
3392
3393 vcpu->arch.ic = mfspr(SPRN_IC);
3394 vcpu->arch.pid = mfspr(SPRN_PID);
3395 vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
3396
3397 vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
3398 vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
3399 vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
3400 vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
3401
3402 mtspr(SPRN_PSSCR, host_psscr);
3403 mtspr(SPRN_HFSCR, host_hfscr);
3404 mtspr(SPRN_CIABR, host_ciabr);
3405 mtspr(SPRN_DAWR, host_dawr);
3406 mtspr(SPRN_DAWRX, host_dawrx);
3407 mtspr(SPRN_PID, host_pidr);
3408
3409 /*
3410 * Since this is radix, do a eieio; tlbsync; ptesync sequence in
3411 * case we interrupted the guest between a tlbie and a ptesync.
3412 */
3413 asm volatile("eieio; tlbsync; ptesync");
3414
3415 mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
3416 isync();
3417
3418 vc->dpdes = mfspr(SPRN_DPDES);
3419 vc->vtb = mfspr(SPRN_VTB);
3420 mtspr(SPRN_DPDES, 0);
3421 if (vc->pcr)
3422 mtspr(SPRN_PCR, 0);
3423
3424 if (vc->tb_offset_applied) {
3425 u64 new_tb = mftb() - vc->tb_offset_applied;
3426 mtspr(SPRN_TBU40, new_tb);
3427 tb = mftb();
3428 if ((tb & 0xffffff) < (new_tb & 0xffffff))
3429 mtspr(SPRN_TBU40, new_tb + 0x1000000);
3430 vc->tb_offset_applied = 0;
3431 }
3432
3433 mtspr(SPRN_HDEC, 0x7fffffff);
3434 mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
3435
3436 return trap;
3437}
3438
3439/*
3440 * Virtual-mode guest entry for POWER9 and later when the host and
3441 * guest are both using the radix MMU. The LPIDR has already been set.
3442 */
3443int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
3444 unsigned long lpcr)
3445{
3446 struct kvmppc_vcore *vc = vcpu->arch.vcore;
3447 unsigned long host_dscr = mfspr(SPRN_DSCR);
3448 unsigned long host_tidr = mfspr(SPRN_TIDR);
3449 unsigned long host_iamr = mfspr(SPRN_IAMR);
3450 s64 dec;
3451 u64 tb;
3452 int trap, save_pmu;
3453
3454 dec = mfspr(SPRN_DEC);
3455 tb = mftb();
3456 if (dec < 512)
3457 return BOOK3S_INTERRUPT_HV_DECREMENTER;
3458 local_paca->kvm_hstate.dec_expires = dec + tb;
3459 if (local_paca->kvm_hstate.dec_expires < time_limit)
3460 time_limit = local_paca->kvm_hstate.dec_expires;
3461
3462 vcpu->arch.ceded = 0;
3463
3464 kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
3465
3466 kvmppc_subcore_enter_guest();
3467
3468 vc->entry_exit_map = 1;
3469 vc->in_guest = 1;
3470
3471 if (vcpu->arch.vpa.pinned_addr) {
3472 struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
3473 u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
3474 lp->yield_count = cpu_to_be32(yield_count);
3475 vcpu->arch.vpa.dirty = 1;
3476 }
3477
3478 if (cpu_has_feature(CPU_FTR_TM) ||
3479 cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
3480 kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
3481
3482 kvmhv_load_guest_pmu(vcpu);
3483
3484 msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
3485 load_fp_state(&vcpu->arch.fp);
3486#ifdef CONFIG_ALTIVEC
3487 load_vr_state(&vcpu->arch.vr);
3488#endif
3489
3490 mtspr(SPRN_DSCR, vcpu->arch.dscr);
3491 mtspr(SPRN_IAMR, vcpu->arch.iamr);
3492 mtspr(SPRN_PSPB, vcpu->arch.pspb);
3493 mtspr(SPRN_FSCR, vcpu->arch.fscr);
3494 mtspr(SPRN_TAR, vcpu->arch.tar);
3495 mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
3496 mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
3497 mtspr(SPRN_BESCR, vcpu->arch.bescr);
3498 mtspr(SPRN_WORT, vcpu->arch.wort);
3499 mtspr(SPRN_TIDR, vcpu->arch.tid);
3500 mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
3501 mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
3502 mtspr(SPRN_AMR, vcpu->arch.amr);
3503 mtspr(SPRN_UAMOR, vcpu->arch.uamor);
3504
3505 if (!(vcpu->arch.ctrl & 1))
3506 mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
3507
3508 mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
3509
3510 if (kvmhv_on_pseries()) {
3511 /* call our hypervisor to load up HV regs and go */
3512 struct hv_guest_state hvregs;
3513
3514 kvmhv_save_hv_regs(vcpu, &hvregs);
3515 hvregs.lpcr = lpcr;
3516 vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
3517 hvregs.version = HV_GUEST_STATE_VERSION;
3518 if (vcpu->arch.nested) {
3519 hvregs.lpid = vcpu->arch.nested->shadow_lpid;
3520 hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
3521 } else {
3522 hvregs.lpid = vcpu->kvm->arch.lpid;
3523 hvregs.vcpu_token = vcpu->vcpu_id;
3524 }
3525 hvregs.hdec_expiry = time_limit;
3526 trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
3527 __pa(&vcpu->arch.regs));
3528 kvmhv_restore_hv_return_state(vcpu, &hvregs);
3529 vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
3530 vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
3531 vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
3532
3533 /* H_CEDE has to be handled now, not later */
3534 if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
3535 kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
3536 kvmppc_nested_cede(vcpu);
3537 trap = 0;
3538 }
3539 } else {
3540 trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
3541 }
3542
3543 vcpu->arch.slb_max = 0;
3544 dec = mfspr(SPRN_DEC);
3545 tb = mftb();
3546 vcpu->arch.dec_expires = dec + tb;
3547 vcpu->cpu = -1;
3548 vcpu->arch.thread_cpu = -1;
3549 vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
3550
3551 vcpu->arch.iamr = mfspr(SPRN_IAMR);
3552 vcpu->arch.pspb = mfspr(SPRN_PSPB);
3553 vcpu->arch.fscr = mfspr(SPRN_FSCR);
3554 vcpu->arch.tar = mfspr(SPRN_TAR);
3555 vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
3556 vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
3557 vcpu->arch.bescr = mfspr(SPRN_BESCR);
3558 vcpu->arch.wort = mfspr(SPRN_WORT);
3559 vcpu->arch.tid = mfspr(SPRN_TIDR);
3560 vcpu->arch.amr = mfspr(SPRN_AMR);
3561 vcpu->arch.uamor = mfspr(SPRN_UAMOR);
3562 vcpu->arch.dscr = mfspr(SPRN_DSCR);
3563
3564 mtspr(SPRN_PSPB, 0);
3565 mtspr(SPRN_WORT, 0);
3566 mtspr(SPRN_AMR, 0);
3567 mtspr(SPRN_UAMOR, 0);
3568 mtspr(SPRN_DSCR, host_dscr);
3569 mtspr(SPRN_TIDR, host_tidr);
3570 mtspr(SPRN_IAMR, host_iamr);
3571 mtspr(SPRN_PSPB, 0);
3572
3573 msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
3574 store_fp_state(&vcpu->arch.fp);
3575#ifdef CONFIG_ALTIVEC
3576 store_vr_state(&vcpu->arch.vr);
3577#endif
3578
3579 if (cpu_has_feature(CPU_FTR_TM) ||
3580 cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
3581 kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
3582
3583 save_pmu = 1;
3584 if (vcpu->arch.vpa.pinned_addr) {
3585 struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
3586 u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
3587 lp->yield_count = cpu_to_be32(yield_count);
3588 vcpu->arch.vpa.dirty = 1;
3589 save_pmu = lp->pmcregs_in_use;
3590 }
3591
3592 kvmhv_save_guest_pmu(vcpu, save_pmu);
3593
3594 vc->entry_exit_map = 0x101;
3595 vc->in_guest = 0;
3596
3597 mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
3598
3599 kvmhv_load_host_pmu();
3600
3601 kvmppc_subcore_exit_guest();
3602
3603 return trap;
3604}
3605
3606/*
3083 * Wait for some other vcpu thread to execute us, and 3607 * Wait for some other vcpu thread to execute us, and
3084 * wake us up when we need to handle something in the host. 3608 * wake us up when we need to handle something in the host.
3085 */ 3609 */
@@ -3256,6 +3780,11 @@ out:
3256 trace_kvmppc_vcore_wakeup(do_sleep, block_ns); 3780 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
3257} 3781}
3258 3782
3783/*
3784 * This never fails for a radix guest, as none of the operations it does
3785 * for a radix guest can fail or have a way to report failure.
3786 * kvmhv_run_single_vcpu() relies on this fact.
3787 */
3259static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) 3788static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
3260{ 3789{
3261 int r = 0; 3790 int r = 0;
@@ -3405,6 +3934,171 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3405 return vcpu->arch.ret; 3934 return vcpu->arch.ret;
3406} 3935}
3407 3936
3937int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
3938 struct kvm_vcpu *vcpu, u64 time_limit,
3939 unsigned long lpcr)
3940{
3941 int trap, r, pcpu;
3942 int srcu_idx;
3943 struct kvmppc_vcore *vc;
3944 struct kvm *kvm = vcpu->kvm;
3945 struct kvm_nested_guest *nested = vcpu->arch.nested;
3946
3947 trace_kvmppc_run_vcpu_enter(vcpu);
3948
3949 kvm_run->exit_reason = 0;
3950 vcpu->arch.ret = RESUME_GUEST;
3951 vcpu->arch.trap = 0;
3952
3953 vc = vcpu->arch.vcore;
3954 vcpu->arch.ceded = 0;
3955 vcpu->arch.run_task = current;
3956 vcpu->arch.kvm_run = kvm_run;
3957 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
3958 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
3959 vcpu->arch.busy_preempt = TB_NIL;
3960 vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
3961 vc->runnable_threads[0] = vcpu;
3962 vc->n_runnable = 1;
3963 vc->runner = vcpu;
3964
3965 /* See if the MMU is ready to go */
3966 if (!kvm->arch.mmu_ready)
3967 kvmhv_setup_mmu(vcpu);
3968
3969 if (need_resched())
3970 cond_resched();
3971
3972 kvmppc_update_vpas(vcpu);
3973
3974 init_vcore_to_run(vc);
3975 vc->preempt_tb = TB_NIL;
3976
3977 preempt_disable();
3978 pcpu = smp_processor_id();
3979 vc->pcpu = pcpu;
3980 kvmppc_prepare_radix_vcpu(vcpu, pcpu);
3981
3982 local_irq_disable();
3983 hard_irq_disable();
3984 if (signal_pending(current))
3985 goto sigpend;
3986 if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
3987 goto out;
3988
3989 if (!nested) {
3990 kvmppc_core_prepare_to_enter(vcpu);
3991 if (vcpu->arch.doorbell_request) {
3992 vc->dpdes = 1;
3993 smp_wmb();
3994 vcpu->arch.doorbell_request = 0;
3995 }
3996 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
3997 &vcpu->arch.pending_exceptions))
3998 lpcr |= LPCR_MER;
3999 } else if (vcpu->arch.pending_exceptions ||
4000 vcpu->arch.doorbell_request ||
4001 xive_interrupt_pending(vcpu)) {
4002 vcpu->arch.ret = RESUME_HOST;
4003 goto out;
4004 }
4005
4006 kvmppc_clear_host_core(pcpu);
4007
4008 local_paca->kvm_hstate.tid = 0;
4009 local_paca->kvm_hstate.napping = 0;
4010 local_paca->kvm_hstate.kvm_split_mode = NULL;
4011 kvmppc_start_thread(vcpu, vc);
4012 kvmppc_create_dtl_entry(vcpu, vc);
4013 trace_kvm_guest_enter(vcpu);
4014
4015 vc->vcore_state = VCORE_RUNNING;
4016 trace_kvmppc_run_core(vc, 0);
4017
4018 if (cpu_has_feature(CPU_FTR_HVMODE))
4019 kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested);
4020
4021 trace_hardirqs_on();
4022 guest_enter_irqoff();
4023
4024 srcu_idx = srcu_read_lock(&kvm->srcu);
4025
4026 this_cpu_disable_ftrace();
4027
4028 trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
4029 vcpu->arch.trap = trap;
4030
4031 this_cpu_enable_ftrace();
4032
4033 srcu_read_unlock(&kvm->srcu, srcu_idx);
4034
4035 if (cpu_has_feature(CPU_FTR_HVMODE)) {
4036 mtspr(SPRN_LPID, kvm->arch.host_lpid);
4037 isync();
4038 }
4039
4040 trace_hardirqs_off();
4041 set_irq_happened(trap);
4042
4043 kvmppc_set_host_core(pcpu);
4044
4045 local_irq_enable();
4046 guest_exit();
4047
4048 cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
4049
4050 preempt_enable();
4051
4052 /* cancel pending decrementer exception if DEC is now positive */
4053 if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
4054 kvmppc_core_dequeue_dec(vcpu);
4055
4056 trace_kvm_guest_exit(vcpu);
4057 r = RESUME_GUEST;
4058 if (trap) {
4059 if (!nested)
4060 r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
4061 else
4062 r = kvmppc_handle_nested_exit(vcpu);
4063 }
4064 vcpu->arch.ret = r;
4065
4066 if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
4067 !kvmppc_vcpu_woken(vcpu)) {
4068 kvmppc_set_timer(vcpu);
4069 while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
4070 if (signal_pending(current)) {
4071 vcpu->stat.signal_exits++;
4072 kvm_run->exit_reason = KVM_EXIT_INTR;
4073 vcpu->arch.ret = -EINTR;
4074 break;
4075 }
4076 spin_lock(&vc->lock);
4077 kvmppc_vcore_blocked(vc);
4078 spin_unlock(&vc->lock);
4079 }
4080 }
4081 vcpu->arch.ceded = 0;
4082
4083 vc->vcore_state = VCORE_INACTIVE;
4084 trace_kvmppc_run_core(vc, 1);
4085
4086 done:
4087 kvmppc_remove_runnable(vc, vcpu);
4088 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
4089
4090 return vcpu->arch.ret;
4091
4092 sigpend:
4093 vcpu->stat.signal_exits++;
4094 kvm_run->exit_reason = KVM_EXIT_INTR;
4095 vcpu->arch.ret = -EINTR;
4096 out:
4097 local_irq_enable();
4098 preempt_enable();
4099 goto done;
4100}
4101
3408static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) 4102static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3409{ 4103{
3410 int r; 4104 int r;
@@ -3480,7 +4174,20 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
3480 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 4174 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
3481 4175
3482 do { 4176 do {
3483 r = kvmppc_run_vcpu(run, vcpu); 4177 /*
4178 * The early POWER9 chips that can't mix radix and HPT threads
4179 * on the same core also need the workaround for the problem
4180 * where the TLB would prefetch entries in the guest exit path
4181 * for radix guests using the guest PIDR value and LPID 0.
4182 * The workaround is in the old path (kvmppc_run_vcpu())
4183 * but not the new path (kvmhv_run_single_vcpu()).
4184 */
4185 if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
4186 !no_mixing_hpt_and_radix)
4187 r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0,
4188 vcpu->arch.vcore->lpcr);
4189 else
4190 r = kvmppc_run_vcpu(run, vcpu);
3484 4191
3485 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 4192 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
3486 !(vcpu->arch.shregs.msr & MSR_PR)) { 4193 !(vcpu->arch.shregs.msr & MSR_PR)) {
@@ -3559,6 +4266,10 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
3559 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01); 4266 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
3560 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L); 4267 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
3561 4268
4269 /* If running as a nested hypervisor, we don't support HPT guests */
4270 if (kvmhv_on_pseries())
4271 info->flags |= KVM_PPC_NO_HASH;
4272
3562 return 0; 4273 return 0;
3563} 4274}
3564 4275
@@ -3723,8 +4434,7 @@ void kvmppc_setup_partition_table(struct kvm *kvm)
3723 __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE; 4434 __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
3724 dw1 = PATB_GR | kvm->arch.process_table; 4435 dw1 = PATB_GR | kvm->arch.process_table;
3725 } 4436 }
3726 4437 kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
3727 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
3728} 4438}
3729 4439
3730/* 4440/*
@@ -3820,6 +4530,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
3820/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ 4530/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
3821int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) 4531int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
3822{ 4532{
4533 if (nesting_enabled(kvm))
4534 kvmhv_release_all_nested(kvm);
3823 kvmppc_free_radix(kvm); 4535 kvmppc_free_radix(kvm);
3824 kvmppc_update_lpcr(kvm, LPCR_VPM1, 4536 kvmppc_update_lpcr(kvm, LPCR_VPM1,
3825 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); 4537 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
@@ -3841,6 +4553,7 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
3841 kvmppc_free_hpt(&kvm->arch.hpt); 4553 kvmppc_free_hpt(&kvm->arch.hpt);
3842 kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, 4554 kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
3843 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); 4555 LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
4556 kvmppc_rmap_reset(kvm);
3844 kvm->arch.radix = 1; 4557 kvm->arch.radix = 1;
3845 return 0; 4558 return 0;
3846} 4559}
@@ -3940,6 +4653,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3940 4653
3941 kvmppc_alloc_host_rm_ops(); 4654 kvmppc_alloc_host_rm_ops();
3942 4655
4656 kvmhv_vm_nested_init(kvm);
4657
3943 /* 4658 /*
3944 * Since we don't flush the TLB when tearing down a VM, 4659 * Since we don't flush the TLB when tearing down a VM,
3945 * and this lpid might have previously been used, 4660 * and this lpid might have previously been used,
@@ -3958,9 +4673,13 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3958 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 4673 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
3959 4674
3960 /* Init LPCR for virtual RMA mode */ 4675 /* Init LPCR for virtual RMA mode */
3961 kvm->arch.host_lpid = mfspr(SPRN_LPID); 4676 if (cpu_has_feature(CPU_FTR_HVMODE)) {
3962 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 4677 kvm->arch.host_lpid = mfspr(SPRN_LPID);
3963 lpcr &= LPCR_PECE | LPCR_LPES; 4678 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
4679 lpcr &= LPCR_PECE | LPCR_LPES;
4680 } else {
4681 lpcr = 0;
4682 }
3964 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | 4683 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
3965 LPCR_VPM0 | LPCR_VPM1; 4684 LPCR_VPM0 | LPCR_VPM1;
3966 kvm->arch.vrma_slb_v = SLB_VSID_B_1T | 4685 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
@@ -4027,8 +4746,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
4027 * On POWER9, we only need to do this if the "indep_threads_mode" 4746 * On POWER9, we only need to do this if the "indep_threads_mode"
4028 * module parameter has been set to N. 4747 * module parameter has been set to N.
4029 */ 4748 */
4030 if (cpu_has_feature(CPU_FTR_ARCH_300)) 4749 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4031 kvm->arch.threads_indep = indep_threads_mode; 4750 if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
4751 pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
4752 kvm->arch.threads_indep = true;
4753 } else {
4754 kvm->arch.threads_indep = indep_threads_mode;
4755 }
4756 }
4032 if (!kvm->arch.threads_indep) 4757 if (!kvm->arch.threads_indep)
4033 kvm_hv_vm_activated(); 4758 kvm_hv_vm_activated();
4034 4759
@@ -4051,6 +4776,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
4051 snprintf(buf, sizeof(buf), "vm%d", current->pid); 4776 snprintf(buf, sizeof(buf), "vm%d", current->pid);
4052 kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir); 4777 kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
4053 kvmppc_mmu_debugfs_init(kvm); 4778 kvmppc_mmu_debugfs_init(kvm);
4779 if (radix_enabled())
4780 kvmhv_radix_debugfs_init(kvm);
4054 4781
4055 return 0; 4782 return 0;
4056} 4783}
@@ -4073,13 +4800,21 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
4073 4800
4074 kvmppc_free_vcores(kvm); 4801 kvmppc_free_vcores(kvm);
4075 4802
4076 kvmppc_free_lpid(kvm->arch.lpid);
4077 4803
4078 if (kvm_is_radix(kvm)) 4804 if (kvm_is_radix(kvm))
4079 kvmppc_free_radix(kvm); 4805 kvmppc_free_radix(kvm);
4080 else 4806 else
4081 kvmppc_free_hpt(&kvm->arch.hpt); 4807 kvmppc_free_hpt(&kvm->arch.hpt);
4082 4808
4809 /* Perform global invalidation and return lpid to the pool */
4810 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
4811 if (nesting_enabled(kvm))
4812 kvmhv_release_all_nested(kvm);
4813 kvm->arch.process_table = 0;
4814 kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
4815 }
4816 kvmppc_free_lpid(kvm->arch.lpid);
4817
4083 kvmppc_free_pimap(kvm); 4818 kvmppc_free_pimap(kvm);
4084} 4819}
4085 4820
@@ -4104,11 +4839,15 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
4104 4839
4105static int kvmppc_core_check_processor_compat_hv(void) 4840static int kvmppc_core_check_processor_compat_hv(void)
4106{ 4841{
4107 if (!cpu_has_feature(CPU_FTR_HVMODE) || 4842 if (cpu_has_feature(CPU_FTR_HVMODE) &&
4108 !cpu_has_feature(CPU_FTR_ARCH_206)) 4843 cpu_has_feature(CPU_FTR_ARCH_206))
4109 return -EIO; 4844 return 0;
4110 4845
4111 return 0; 4846 /* POWER9 in radix mode is capable of being a nested hypervisor. */
4847 if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
4848 return 0;
4849
4850 return -EIO;
4112} 4851}
4113 4852
4114#ifdef CONFIG_KVM_XICS 4853#ifdef CONFIG_KVM_XICS
@@ -4426,6 +5165,10 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4426 if (radix && !radix_enabled()) 5165 if (radix && !radix_enabled())
4427 return -EINVAL; 5166 return -EINVAL;
4428 5167
5168 /* If we're a nested hypervisor, we currently only support radix */
5169 if (kvmhv_on_pseries() && !radix)
5170 return -EINVAL;
5171
4429 mutex_lock(&kvm->lock); 5172 mutex_lock(&kvm->lock);
4430 if (radix != kvm_is_radix(kvm)) { 5173 if (radix != kvm_is_radix(kvm)) {
4431 if (kvm->arch.mmu_ready) { 5174 if (kvm->arch.mmu_ready) {
@@ -4458,6 +5201,19 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4458 return err; 5201 return err;
4459} 5202}
4460 5203
5204static int kvmhv_enable_nested(struct kvm *kvm)
5205{
5206 if (!nested)
5207 return -EPERM;
5208 if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
5209 return -ENODEV;
5210
5211 /* kvm == NULL means the caller is testing if the capability exists */
5212 if (kvm)
5213 kvm->arch.nested_enable = true;
5214 return 0;
5215}
5216
4461static struct kvmppc_ops kvm_ops_hv = { 5217static struct kvmppc_ops kvm_ops_hv = {
4462 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, 5218 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
4463 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, 5219 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -4497,6 +5253,7 @@ static struct kvmppc_ops kvm_ops_hv = {
4497 .configure_mmu = kvmhv_configure_mmu, 5253 .configure_mmu = kvmhv_configure_mmu,
4498 .get_rmmu_info = kvmhv_get_rmmu_info, 5254 .get_rmmu_info = kvmhv_get_rmmu_info,
4499 .set_smt_mode = kvmhv_set_smt_mode, 5255 .set_smt_mode = kvmhv_set_smt_mode,
5256 .enable_nested = kvmhv_enable_nested,
4500}; 5257};
4501 5258
4502static int kvm_init_subcore_bitmap(void) 5259static int kvm_init_subcore_bitmap(void)
@@ -4547,6 +5304,10 @@ static int kvmppc_book3s_init_hv(void)
4547 if (r < 0) 5304 if (r < 0)
4548 return -ENODEV; 5305 return -ENODEV;
4549 5306
5307 r = kvmhv_nested_init();
5308 if (r)
5309 return r;
5310
4550 r = kvm_init_subcore_bitmap(); 5311 r = kvm_init_subcore_bitmap();
4551 if (r) 5312 if (r)
4552 return r; 5313 return r;
@@ -4557,7 +5318,8 @@ static int kvmppc_book3s_init_hv(void)
4557 * indirectly, via OPAL. 5318 * indirectly, via OPAL.
4558 */ 5319 */
4559#ifdef CONFIG_SMP 5320#ifdef CONFIG_SMP
4560 if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) { 5321 if (!xive_enabled() && !kvmhv_on_pseries() &&
5322 !local_paca->kvm_hstate.xics_phys) {
4561 struct device_node *np; 5323 struct device_node *np;
4562 5324
4563 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); 5325 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
@@ -4605,6 +5367,7 @@ static void kvmppc_book3s_exit_hv(void)
4605 if (kvmppc_radix_possible()) 5367 if (kvmppc_radix_possible())
4606 kvmppc_radix_exit(); 5368 kvmppc_radix_exit();
4607 kvmppc_hv_ops = NULL; 5369 kvmppc_hv_ops = NULL;
5370 kvmhv_nested_exit();
4608} 5371}
4609 5372
4610module_init(kvmppc_book3s_init_hv); 5373module_init(kvmppc_book3s_init_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fc6bb9630a9c..a71e2fc00a4e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu)
231 void __iomem *xics_phys; 231 void __iomem *xics_phys;
232 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); 232 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
233 233
234 /* For a nested hypervisor, use the XICS via hcall */
235 if (kvmhv_on_pseries()) {
236 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
237
238 plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
239 IPI_PRIORITY);
240 return;
241 }
242
234 /* On POWER9 we can use msgsnd for any destination cpu. */ 243 /* On POWER9 we can use msgsnd for any destination cpu. */
235 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 244 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
236 msg |= get_hard_smp_processor_id(cpu); 245 msg |= get_hard_smp_processor_id(cpu);
@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again)
460 return 1; 469 return 1;
461 470
462 /* Now read the interrupt from the ICP */ 471 /* Now read the interrupt from the ICP */
463 xics_phys = local_paca->kvm_hstate.xics_phys; 472 if (kvmhv_on_pseries()) {
464 rc = 0; 473 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
465 if (!xics_phys) 474
466 rc = opal_int_get_xirr(&xirr, false); 475 rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
467 else 476 xirr = cpu_to_be32(retbuf[0]);
468 xirr = __raw_rm_readl(xics_phys + XICS_XIRR); 477 } else {
478 xics_phys = local_paca->kvm_hstate.xics_phys;
479 rc = 0;
480 if (!xics_phys)
481 rc = opal_int_get_xirr(&xirr, false);
482 else
483 xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
484 }
469 if (rc < 0) 485 if (rc < 0)
470 return 1; 486 return 1;
471 487
@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again)
494 */ 510 */
495 if (xisr == XICS_IPI) { 511 if (xisr == XICS_IPI) {
496 rc = 0; 512 rc = 0;
497 if (xics_phys) { 513 if (kvmhv_on_pseries()) {
514 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
515
516 plpar_hcall_raw(H_IPI, retbuf,
517 hard_smp_processor_id(), 0xff);
518 plpar_hcall_raw(H_EOI, retbuf, h_xirr);
519 } else if (xics_phys) {
498 __raw_rm_writeb(0xff, xics_phys + XICS_MFRR); 520 __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
499 __raw_rm_writel(xirr, xics_phys + XICS_XIRR); 521 __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
500 } else { 522 } else {
@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again)
520 /* We raced with the host, 542 /* We raced with the host,
521 * we need to resend that IPI, bummer 543 * we need to resend that IPI, bummer
522 */ 544 */
523 if (xics_phys) 545 if (kvmhv_on_pseries()) {
546 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
547
548 plpar_hcall_raw(H_IPI, retbuf,
549 hard_smp_processor_id(),
550 IPI_PRIORITY);
551 } else if (xics_phys)
524 __raw_rm_writeb(IPI_PRIORITY, 552 __raw_rm_writeb(IPI_PRIORITY,
525 xics_phys + XICS_MFRR); 553 xics_phys + XICS_MFRR);
526 else 554 else
@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
729 smp_mb(); 757 smp_mb();
730 local_paca->kvm_hstate.kvm_split_mode = NULL; 758 local_paca->kvm_hstate.kvm_split_mode = NULL;
731} 759}
760
761/*
762 * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
763 * Can we inject a Decrementer or a External interrupt?
764 */
765void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
766{
767 int ext;
768 unsigned long vec = 0;
769 unsigned long lpcr;
770
771 /* Insert EXTERNAL bit into LPCR at the MER bit position */
772 ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
773 lpcr = mfspr(SPRN_LPCR);
774 lpcr |= ext << LPCR_MER_SH;
775 mtspr(SPRN_LPCR, lpcr);
776 isync();
777
778 if (vcpu->arch.shregs.msr & MSR_EE) {
779 if (ext) {
780 vec = BOOK3S_INTERRUPT_EXTERNAL;
781 } else {
782 long int dec = mfspr(SPRN_DEC);
783 if (!(lpcr & LPCR_LD))
784 dec = (int) dec;
785 if (dec < 0)
786 vec = BOOK3S_INTERRUPT_DECREMENTER;
787 }
788 }
789 if (vec) {
790 unsigned long msr, old_msr = vcpu->arch.shregs.msr;
791
792 kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
793 kvmppc_set_srr1(vcpu, old_msr);
794 kvmppc_set_pc(vcpu, vec);
795 msr = vcpu->arch.intr_msr;
796 if (MSR_TM_ACTIVE(old_msr))
797 msr |= MSR_TS_S;
798 vcpu->arch.shregs.msr = msr;
799 }
800
801 if (vcpu->arch.doorbell_request) {
802 mtspr(SPRN_DPDES, 1);
803 vcpu->arch.vcore->dpdes = 1;
804 smp_wmb();
805 vcpu->arch.doorbell_request = 0;
806 }
807}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 666b91c79eb4..a6d10010d9e8 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION
64END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 64END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
65 65
66 /* Save host PMU registers */ 66 /* Save host PMU registers */
67BEGIN_FTR_SECTION 67 bl kvmhv_save_host_pmu
68 /* Work around P8 PMAE bug */
69 li r3, -1
70 clrrdi r3, r3, 10
71 mfspr r8, SPRN_MMCR2
72 mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
73 isync
74END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
75 li r3, 1
76 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
77 mfspr r7, SPRN_MMCR0 /* save MMCR0 */
78 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
79 mfspr r6, SPRN_MMCRA
80 /* Clear MMCRA in order to disable SDAR updates */
81 li r5, 0
82 mtspr SPRN_MMCRA, r5
83 isync
84 lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
85 cmpwi r5, 0
86 beq 31f /* skip if not */
87 mfspr r5, SPRN_MMCR1
88 mfspr r9, SPRN_SIAR
89 mfspr r10, SPRN_SDAR
90 std r7, HSTATE_MMCR0(r13)
91 std r5, HSTATE_MMCR1(r13)
92 std r6, HSTATE_MMCRA(r13)
93 std r9, HSTATE_SIAR(r13)
94 std r10, HSTATE_SDAR(r13)
95BEGIN_FTR_SECTION
96 mfspr r9, SPRN_SIER
97 std r8, HSTATE_MMCR2(r13)
98 std r9, HSTATE_SIER(r13)
99END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
100 mfspr r3, SPRN_PMC1
101 mfspr r5, SPRN_PMC2
102 mfspr r6, SPRN_PMC3
103 mfspr r7, SPRN_PMC4
104 mfspr r8, SPRN_PMC5
105 mfspr r9, SPRN_PMC6
106 stw r3, HSTATE_PMC1(r13)
107 stw r5, HSTATE_PMC2(r13)
108 stw r6, HSTATE_PMC3(r13)
109 stw r7, HSTATE_PMC4(r13)
110 stw r8, HSTATE_PMC5(r13)
111 stw r9, HSTATE_PMC6(r13)
11231:
113 68
114 /* 69 /*
115 * Put whatever is in the decrementer into the 70 * Put whatever is in the decrementer into the
@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
161 ld r0, PPC_LR_STKOFF(r1) 116 ld r0, PPC_LR_STKOFF(r1)
162 mtlr r0 117 mtlr r0
163 blr 118 blr
119
120_GLOBAL(kvmhv_save_host_pmu)
121BEGIN_FTR_SECTION
122 /* Work around P8 PMAE bug */
123 li r3, -1
124 clrrdi r3, r3, 10
125 mfspr r8, SPRN_MMCR2
126 mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
127 isync
128END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
129 li r3, 1
130 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
131 mfspr r7, SPRN_MMCR0 /* save MMCR0 */
132 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
133 mfspr r6, SPRN_MMCRA
134 /* Clear MMCRA in order to disable SDAR updates */
135 li r5, 0
136 mtspr SPRN_MMCRA, r5
137 isync
138 lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
139 cmpwi r5, 0
140 beq 31f /* skip if not */
141 mfspr r5, SPRN_MMCR1
142 mfspr r9, SPRN_SIAR
143 mfspr r10, SPRN_SDAR
144 std r7, HSTATE_MMCR0(r13)
145 std r5, HSTATE_MMCR1(r13)
146 std r6, HSTATE_MMCRA(r13)
147 std r9, HSTATE_SIAR(r13)
148 std r10, HSTATE_SDAR(r13)
149BEGIN_FTR_SECTION
150 mfspr r9, SPRN_SIER
151 std r8, HSTATE_MMCR2(r13)
152 std r9, HSTATE_SIER(r13)
153END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
154 mfspr r3, SPRN_PMC1
155 mfspr r5, SPRN_PMC2
156 mfspr r6, SPRN_PMC3
157 mfspr r7, SPRN_PMC4
158 mfspr r8, SPRN_PMC5
159 mfspr r9, SPRN_PMC6
160 stw r3, HSTATE_PMC1(r13)
161 stw r5, HSTATE_PMC2(r13)
162 stw r6, HSTATE_PMC3(r13)
163 stw r7, HSTATE_PMC4(r13)
164 stw r8, HSTATE_PMC5(r13)
165 stw r9, HSTATE_PMC6(r13)
16631: blr
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
new file mode 100644
index 000000000000..401d2ecbebc5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -0,0 +1,1291 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright IBM Corporation, 2018
4 * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
5 * Paul Mackerras <paulus@ozlabs.org>
6 *
7 * Description: KVM functions specific to running nested KVM-HV guests
8 * on Book3S processors (specifically POWER9 and later).
9 */
10
11#include <linux/kernel.h>
12#include <linux/kvm_host.h>
13#include <linux/llist.h>
14
15#include <asm/kvm_ppc.h>
16#include <asm/kvm_book3s.h>
17#include <asm/mmu.h>
18#include <asm/pgtable.h>
19#include <asm/pgalloc.h>
20#include <asm/pte-walk.h>
21#include <asm/reg.h>
22
23static struct patb_entry *pseries_partition_tb;
24
25static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
26static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
27
28void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
29{
30 struct kvmppc_vcore *vc = vcpu->arch.vcore;
31
32 hr->pcr = vc->pcr;
33 hr->dpdes = vc->dpdes;
34 hr->hfscr = vcpu->arch.hfscr;
35 hr->tb_offset = vc->tb_offset;
36 hr->dawr0 = vcpu->arch.dawr;
37 hr->dawrx0 = vcpu->arch.dawrx;
38 hr->ciabr = vcpu->arch.ciabr;
39 hr->purr = vcpu->arch.purr;
40 hr->spurr = vcpu->arch.spurr;
41 hr->ic = vcpu->arch.ic;
42 hr->vtb = vc->vtb;
43 hr->srr0 = vcpu->arch.shregs.srr0;
44 hr->srr1 = vcpu->arch.shregs.srr1;
45 hr->sprg[0] = vcpu->arch.shregs.sprg0;
46 hr->sprg[1] = vcpu->arch.shregs.sprg1;
47 hr->sprg[2] = vcpu->arch.shregs.sprg2;
48 hr->sprg[3] = vcpu->arch.shregs.sprg3;
49 hr->pidr = vcpu->arch.pid;
50 hr->cfar = vcpu->arch.cfar;
51 hr->ppr = vcpu->arch.ppr;
52}
53
54static void byteswap_pt_regs(struct pt_regs *regs)
55{
56 unsigned long *addr = (unsigned long *) regs;
57
58 for (; addr < ((unsigned long *) (regs + 1)); addr++)
59 *addr = swab64(*addr);
60}
61
62static void byteswap_hv_regs(struct hv_guest_state *hr)
63{
64 hr->version = swab64(hr->version);
65 hr->lpid = swab32(hr->lpid);
66 hr->vcpu_token = swab32(hr->vcpu_token);
67 hr->lpcr = swab64(hr->lpcr);
68 hr->pcr = swab64(hr->pcr);
69 hr->amor = swab64(hr->amor);
70 hr->dpdes = swab64(hr->dpdes);
71 hr->hfscr = swab64(hr->hfscr);
72 hr->tb_offset = swab64(hr->tb_offset);
73 hr->dawr0 = swab64(hr->dawr0);
74 hr->dawrx0 = swab64(hr->dawrx0);
75 hr->ciabr = swab64(hr->ciabr);
76 hr->hdec_expiry = swab64(hr->hdec_expiry);
77 hr->purr = swab64(hr->purr);
78 hr->spurr = swab64(hr->spurr);
79 hr->ic = swab64(hr->ic);
80 hr->vtb = swab64(hr->vtb);
81 hr->hdar = swab64(hr->hdar);
82 hr->hdsisr = swab64(hr->hdsisr);
83 hr->heir = swab64(hr->heir);
84 hr->asdr = swab64(hr->asdr);
85 hr->srr0 = swab64(hr->srr0);
86 hr->srr1 = swab64(hr->srr1);
87 hr->sprg[0] = swab64(hr->sprg[0]);
88 hr->sprg[1] = swab64(hr->sprg[1]);
89 hr->sprg[2] = swab64(hr->sprg[2]);
90 hr->sprg[3] = swab64(hr->sprg[3]);
91 hr->pidr = swab64(hr->pidr);
92 hr->cfar = swab64(hr->cfar);
93 hr->ppr = swab64(hr->ppr);
94}
95
96static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
97 struct hv_guest_state *hr)
98{
99 struct kvmppc_vcore *vc = vcpu->arch.vcore;
100
101 hr->dpdes = vc->dpdes;
102 hr->hfscr = vcpu->arch.hfscr;
103 hr->purr = vcpu->arch.purr;
104 hr->spurr = vcpu->arch.spurr;
105 hr->ic = vcpu->arch.ic;
106 hr->vtb = vc->vtb;
107 hr->srr0 = vcpu->arch.shregs.srr0;
108 hr->srr1 = vcpu->arch.shregs.srr1;
109 hr->sprg[0] = vcpu->arch.shregs.sprg0;
110 hr->sprg[1] = vcpu->arch.shregs.sprg1;
111 hr->sprg[2] = vcpu->arch.shregs.sprg2;
112 hr->sprg[3] = vcpu->arch.shregs.sprg3;
113 hr->pidr = vcpu->arch.pid;
114 hr->cfar = vcpu->arch.cfar;
115 hr->ppr = vcpu->arch.ppr;
116 switch (trap) {
117 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
118 hr->hdar = vcpu->arch.fault_dar;
119 hr->hdsisr = vcpu->arch.fault_dsisr;
120 hr->asdr = vcpu->arch.fault_gpa;
121 break;
122 case BOOK3S_INTERRUPT_H_INST_STORAGE:
123 hr->asdr = vcpu->arch.fault_gpa;
124 break;
125 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
126 hr->heir = vcpu->arch.emul_inst;
127 break;
128 }
129}
130
131static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
132{
133 /*
134 * Don't let L1 enable features for L2 which we've disabled for L1,
135 * but preserve the interrupt cause field.
136 */
137 hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
138
139 /* Don't let data address watchpoint match in hypervisor state */
140 hr->dawrx0 &= ~DAWRX_HYP;
141
142 /* Don't let completed instruction address breakpt match in HV state */
143 if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
144 hr->ciabr &= ~CIABR_PRIV;
145}
146
147static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
148{
149 struct kvmppc_vcore *vc = vcpu->arch.vcore;
150
151 vc->pcr = hr->pcr;
152 vc->dpdes = hr->dpdes;
153 vcpu->arch.hfscr = hr->hfscr;
154 vcpu->arch.dawr = hr->dawr0;
155 vcpu->arch.dawrx = hr->dawrx0;
156 vcpu->arch.ciabr = hr->ciabr;
157 vcpu->arch.purr = hr->purr;
158 vcpu->arch.spurr = hr->spurr;
159 vcpu->arch.ic = hr->ic;
160 vc->vtb = hr->vtb;
161 vcpu->arch.shregs.srr0 = hr->srr0;
162 vcpu->arch.shregs.srr1 = hr->srr1;
163 vcpu->arch.shregs.sprg0 = hr->sprg[0];
164 vcpu->arch.shregs.sprg1 = hr->sprg[1];
165 vcpu->arch.shregs.sprg2 = hr->sprg[2];
166 vcpu->arch.shregs.sprg3 = hr->sprg[3];
167 vcpu->arch.pid = hr->pidr;
168 vcpu->arch.cfar = hr->cfar;
169 vcpu->arch.ppr = hr->ppr;
170}
171
172void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
173 struct hv_guest_state *hr)
174{
175 struct kvmppc_vcore *vc = vcpu->arch.vcore;
176
177 vc->dpdes = hr->dpdes;
178 vcpu->arch.hfscr = hr->hfscr;
179 vcpu->arch.purr = hr->purr;
180 vcpu->arch.spurr = hr->spurr;
181 vcpu->arch.ic = hr->ic;
182 vc->vtb = hr->vtb;
183 vcpu->arch.fault_dar = hr->hdar;
184 vcpu->arch.fault_dsisr = hr->hdsisr;
185 vcpu->arch.fault_gpa = hr->asdr;
186 vcpu->arch.emul_inst = hr->heir;
187 vcpu->arch.shregs.srr0 = hr->srr0;
188 vcpu->arch.shregs.srr1 = hr->srr1;
189 vcpu->arch.shregs.sprg0 = hr->sprg[0];
190 vcpu->arch.shregs.sprg1 = hr->sprg[1];
191 vcpu->arch.shregs.sprg2 = hr->sprg[2];
192 vcpu->arch.shregs.sprg3 = hr->sprg[3];
193 vcpu->arch.pid = hr->pidr;
194 vcpu->arch.cfar = hr->cfar;
195 vcpu->arch.ppr = hr->ppr;
196}
197
198long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
199{
200 long int err, r;
201 struct kvm_nested_guest *l2;
202 struct pt_regs l2_regs, saved_l1_regs;
203 struct hv_guest_state l2_hv, saved_l1_hv;
204 struct kvmppc_vcore *vc = vcpu->arch.vcore;
205 u64 hv_ptr, regs_ptr;
206 u64 hdec_exp;
207 s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
208 u64 mask;
209 unsigned long lpcr;
210
211 if (vcpu->kvm->arch.l1_ptcr == 0)
212 return H_NOT_AVAILABLE;
213
214 /* copy parameters in */
215 hv_ptr = kvmppc_get_gpr(vcpu, 4);
216 err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
217 sizeof(struct hv_guest_state));
218 if (err)
219 return H_PARAMETER;
220 if (kvmppc_need_byteswap(vcpu))
221 byteswap_hv_regs(&l2_hv);
222 if (l2_hv.version != HV_GUEST_STATE_VERSION)
223 return H_P2;
224
225 regs_ptr = kvmppc_get_gpr(vcpu, 5);
226 err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
227 sizeof(struct pt_regs));
228 if (err)
229 return H_PARAMETER;
230 if (kvmppc_need_byteswap(vcpu))
231 byteswap_pt_regs(&l2_regs);
232 if (l2_hv.vcpu_token >= NR_CPUS)
233 return H_PARAMETER;
234
235 /* translate lpid */
236 l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
237 if (!l2)
238 return H_PARAMETER;
239 if (!l2->l1_gr_to_hr) {
240 mutex_lock(&l2->tlb_lock);
241 kvmhv_update_ptbl_cache(l2);
242 mutex_unlock(&l2->tlb_lock);
243 }
244
245 /* save l1 values of things */
246 vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
247 saved_l1_regs = vcpu->arch.regs;
248 kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
249
250 /* convert TB values/offsets to host (L0) values */
251 hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
252 vc->tb_offset += l2_hv.tb_offset;
253
254 /* set L1 state to L2 state */
255 vcpu->arch.nested = l2;
256 vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
257 vcpu->arch.regs = l2_regs;
258 vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
259 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
260 LPCR_LPES | LPCR_MER;
261 lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
262 sanitise_hv_regs(vcpu, &l2_hv);
263 restore_hv_regs(vcpu, &l2_hv);
264
265 vcpu->arch.ret = RESUME_GUEST;
266 vcpu->arch.trap = 0;
267 do {
268 if (mftb() >= hdec_exp) {
269 vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
270 r = RESUME_HOST;
271 break;
272 }
273 r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
274 lpcr);
275 } while (is_kvmppc_resume_guest(r));
276
277 /* save L2 state for return */
278 l2_regs = vcpu->arch.regs;
279 l2_regs.msr = vcpu->arch.shregs.msr;
280 delta_purr = vcpu->arch.purr - l2_hv.purr;
281 delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
282 delta_ic = vcpu->arch.ic - l2_hv.ic;
283 delta_vtb = vc->vtb - l2_hv.vtb;
284 save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
285
286 /* restore L1 state */
287 vcpu->arch.nested = NULL;
288 vcpu->arch.regs = saved_l1_regs;
289 vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
290 /* set L1 MSR TS field according to L2 transaction state */
291 if (l2_regs.msr & MSR_TS_MASK)
292 vcpu->arch.shregs.msr |= MSR_TS_S;
293 vc->tb_offset = saved_l1_hv.tb_offset;
294 restore_hv_regs(vcpu, &saved_l1_hv);
295 vcpu->arch.purr += delta_purr;
296 vcpu->arch.spurr += delta_spurr;
297 vcpu->arch.ic += delta_ic;
298 vc->vtb += delta_vtb;
299
300 kvmhv_put_nested(l2);
301
302 /* copy l2_hv_state and regs back to guest */
303 if (kvmppc_need_byteswap(vcpu)) {
304 byteswap_hv_regs(&l2_hv);
305 byteswap_pt_regs(&l2_regs);
306 }
307 err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
308 sizeof(struct hv_guest_state));
309 if (err)
310 return H_AUTHORITY;
311 err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
312 sizeof(struct pt_regs));
313 if (err)
314 return H_AUTHORITY;
315
316 if (r == -EINTR)
317 return H_INTERRUPT;
318
319 return vcpu->arch.trap;
320}
321
322long kvmhv_nested_init(void)
323{
324 long int ptb_order;
325 unsigned long ptcr;
326 long rc;
327
328 if (!kvmhv_on_pseries())
329 return 0;
330 if (!radix_enabled())
331 return -ENODEV;
332
333 /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
334 ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
335 if (ptb_order < 8)
336 ptb_order = 8;
337 pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
338 GFP_KERNEL);
339 if (!pseries_partition_tb) {
340 pr_err("kvm-hv: failed to allocated nested partition table\n");
341 return -ENOMEM;
342 }
343
344 ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
345 rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
346 if (rc != H_SUCCESS) {
347 pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
348 rc);
349 kfree(pseries_partition_tb);
350 pseries_partition_tb = NULL;
351 return -ENODEV;
352 }
353
354 return 0;
355}
356
357void kvmhv_nested_exit(void)
358{
359 /*
360 * N.B. the kvmhv_on_pseries() test is there because it enables
361 * the compiler to remove the call to plpar_hcall_norets()
362 * when CONFIG_PPC_PSERIES=n.
363 */
364 if (kvmhv_on_pseries() && pseries_partition_tb) {
365 plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
366 kfree(pseries_partition_tb);
367 pseries_partition_tb = NULL;
368 }
369}
370
371static void kvmhv_flush_lpid(unsigned int lpid)
372{
373 long rc;
374
375 if (!kvmhv_on_pseries()) {
376 radix__flush_tlb_lpid(lpid);
377 return;
378 }
379
380 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
381 lpid, TLBIEL_INVAL_SET_LPID);
382 if (rc)
383 pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
384}
385
386void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
387{
388 if (!kvmhv_on_pseries()) {
389 mmu_partition_table_set_entry(lpid, dw0, dw1);
390 return;
391 }
392
393 pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
394 pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
395 /* L0 will do the necessary barriers */
396 kvmhv_flush_lpid(lpid);
397}
398
399static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
400{
401 unsigned long dw0;
402
403 dw0 = PATB_HR | radix__get_tree_size() |
404 __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
405 kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
406}
407
408void kvmhv_vm_nested_init(struct kvm *kvm)
409{
410 kvm->arch.max_nested_lpid = -1;
411}
412
413/*
414 * Handle the H_SET_PARTITION_TABLE hcall.
415 * r4 = guest real address of partition table + log_2(size) - 12
416 * (formatted as for the PTCR).
417 */
418long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
419{
420 struct kvm *kvm = vcpu->kvm;
421 unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
422 int srcu_idx;
423 long ret = H_SUCCESS;
424
425 srcu_idx = srcu_read_lock(&kvm->srcu);
426 /*
427 * Limit the partition table to 4096 entries (because that's what
428 * hardware supports), and check the base address.
429 */
430 if ((ptcr & PRTS_MASK) > 12 - 8 ||
431 !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
432 ret = H_PARAMETER;
433 srcu_read_unlock(&kvm->srcu, srcu_idx);
434 if (ret == H_SUCCESS)
435 kvm->arch.l1_ptcr = ptcr;
436 return ret;
437}
438
439/*
440 * Reload the partition table entry for a guest.
441 * Caller must hold gp->tlb_lock.
442 */
443static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
444{
445 int ret;
446 struct patb_entry ptbl_entry;
447 unsigned long ptbl_addr;
448 struct kvm *kvm = gp->l1_host;
449
450 ret = -EFAULT;
451 ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
452 if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
453 ret = kvm_read_guest(kvm, ptbl_addr,
454 &ptbl_entry, sizeof(ptbl_entry));
455 if (ret) {
456 gp->l1_gr_to_hr = 0;
457 gp->process_table = 0;
458 } else {
459 gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
460 gp->process_table = be64_to_cpu(ptbl_entry.patb1);
461 }
462 kvmhv_set_nested_ptbl(gp);
463}
464
465struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
466{
467 struct kvm_nested_guest *gp;
468 long shadow_lpid;
469
470 gp = kzalloc(sizeof(*gp), GFP_KERNEL);
471 if (!gp)
472 return NULL;
473 gp->l1_host = kvm;
474 gp->l1_lpid = lpid;
475 mutex_init(&gp->tlb_lock);
476 gp->shadow_pgtable = pgd_alloc(kvm->mm);
477 if (!gp->shadow_pgtable)
478 goto out_free;
479 shadow_lpid = kvmppc_alloc_lpid();
480 if (shadow_lpid < 0)
481 goto out_free2;
482 gp->shadow_lpid = shadow_lpid;
483
484 memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
485
486 return gp;
487
488 out_free2:
489 pgd_free(kvm->mm, gp->shadow_pgtable);
490 out_free:
491 kfree(gp);
492 return NULL;
493}
494
495/*
496 * Free up any resources allocated for a nested guest.
497 */
498static void kvmhv_release_nested(struct kvm_nested_guest *gp)
499{
500 struct kvm *kvm = gp->l1_host;
501
502 if (gp->shadow_pgtable) {
503 /*
504 * No vcpu is using this struct and no call to
505 * kvmhv_get_nested can find this struct,
506 * so we don't need to hold kvm->mmu_lock.
507 */
508 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
509 gp->shadow_lpid);
510 pgd_free(kvm->mm, gp->shadow_pgtable);
511 }
512 kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
513 kvmppc_free_lpid(gp->shadow_lpid);
514 kfree(gp);
515}
516
517static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
518{
519 struct kvm *kvm = gp->l1_host;
520 int lpid = gp->l1_lpid;
521 long ref;
522
523 spin_lock(&kvm->mmu_lock);
524 if (gp == kvm->arch.nested_guests[lpid]) {
525 kvm->arch.nested_guests[lpid] = NULL;
526 if (lpid == kvm->arch.max_nested_lpid) {
527 while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
528 ;
529 kvm->arch.max_nested_lpid = lpid;
530 }
531 --gp->refcnt;
532 }
533 ref = gp->refcnt;
534 spin_unlock(&kvm->mmu_lock);
535 if (ref == 0)
536 kvmhv_release_nested(gp);
537}
538
539/*
540 * Free up all nested resources allocated for this guest.
541 * This is called with no vcpus of the guest running, when
542 * switching the guest to HPT mode or when destroying the
543 * guest.
544 */
545void kvmhv_release_all_nested(struct kvm *kvm)
546{
547 int i;
548 struct kvm_nested_guest *gp;
549 struct kvm_nested_guest *freelist = NULL;
550 struct kvm_memory_slot *memslot;
551 int srcu_idx;
552
553 spin_lock(&kvm->mmu_lock);
554 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
555 gp = kvm->arch.nested_guests[i];
556 if (!gp)
557 continue;
558 kvm->arch.nested_guests[i] = NULL;
559 if (--gp->refcnt == 0) {
560 gp->next = freelist;
561 freelist = gp;
562 }
563 }
564 kvm->arch.max_nested_lpid = -1;
565 spin_unlock(&kvm->mmu_lock);
566 while ((gp = freelist) != NULL) {
567 freelist = gp->next;
568 kvmhv_release_nested(gp);
569 }
570
571 srcu_idx = srcu_read_lock(&kvm->srcu);
572 kvm_for_each_memslot(memslot, kvm_memslots(kvm))
573 kvmhv_free_memslot_nest_rmap(memslot);
574 srcu_read_unlock(&kvm->srcu, srcu_idx);
575}
576
577/* caller must hold gp->tlb_lock */
578static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
579{
580 struct kvm *kvm = gp->l1_host;
581
582 spin_lock(&kvm->mmu_lock);
583 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
584 spin_unlock(&kvm->mmu_lock);
585 kvmhv_flush_lpid(gp->shadow_lpid);
586 kvmhv_update_ptbl_cache(gp);
587 if (gp->l1_gr_to_hr == 0)
588 kvmhv_remove_nested(gp);
589}
590
591struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
592 bool create)
593{
594 struct kvm_nested_guest *gp, *newgp;
595
596 if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
597 l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
598 return NULL;
599
600 spin_lock(&kvm->mmu_lock);
601 gp = kvm->arch.nested_guests[l1_lpid];
602 if (gp)
603 ++gp->refcnt;
604 spin_unlock(&kvm->mmu_lock);
605
606 if (gp || !create)
607 return gp;
608
609 newgp = kvmhv_alloc_nested(kvm, l1_lpid);
610 if (!newgp)
611 return NULL;
612 spin_lock(&kvm->mmu_lock);
613 if (kvm->arch.nested_guests[l1_lpid]) {
614 /* someone else beat us to it */
615 gp = kvm->arch.nested_guests[l1_lpid];
616 } else {
617 kvm->arch.nested_guests[l1_lpid] = newgp;
618 ++newgp->refcnt;
619 gp = newgp;
620 newgp = NULL;
621 if (l1_lpid > kvm->arch.max_nested_lpid)
622 kvm->arch.max_nested_lpid = l1_lpid;
623 }
624 ++gp->refcnt;
625 spin_unlock(&kvm->mmu_lock);
626
627 if (newgp)
628 kvmhv_release_nested(newgp);
629
630 return gp;
631}
632
633void kvmhv_put_nested(struct kvm_nested_guest *gp)
634{
635 struct kvm *kvm = gp->l1_host;
636 long ref;
637
638 spin_lock(&kvm->mmu_lock);
639 ref = --gp->refcnt;
640 spin_unlock(&kvm->mmu_lock);
641 if (ref == 0)
642 kvmhv_release_nested(gp);
643}
644
645static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
646{
647 if (lpid > kvm->arch.max_nested_lpid)
648 return NULL;
649 return kvm->arch.nested_guests[lpid];
650}
651
652static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
653{
654 return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
655 RMAP_NESTED_GPA_MASK));
656}
657
658void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
659 struct rmap_nested **n_rmap)
660{
661 struct llist_node *entry = ((struct llist_head *) rmapp)->first;
662 struct rmap_nested *cursor;
663 u64 rmap, new_rmap = (*n_rmap)->rmap;
664
665 /* Are there any existing entries? */
666 if (!(*rmapp)) {
667 /* No -> use the rmap as a single entry */
668 *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
669 return;
670 }
671
672 /* Do any entries match what we're trying to insert? */
673 for_each_nest_rmap_safe(cursor, entry, &rmap) {
674 if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
675 return;
676 }
677
678 /* Do we need to create a list or just add the new entry? */
679 rmap = *rmapp;
680 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
681 *rmapp = 0UL;
682 llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
683 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
684 (*n_rmap)->list.next = (struct llist_node *) rmap;
685
686 /* Set NULL so not freed by caller */
687 *n_rmap = NULL;
688}
689
690static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
691 unsigned long hpa, unsigned long mask)
692{
693 struct kvm_nested_guest *gp;
694 unsigned long gpa;
695 unsigned int shift, lpid;
696 pte_t *ptep;
697
698 gpa = n_rmap & RMAP_NESTED_GPA_MASK;
699 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
700 gp = kvmhv_find_nested(kvm, lpid);
701 if (!gp)
702 return;
703
704 /* Find and invalidate the pte */
705 ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
706 /* Don't spuriously invalidate ptes if the pfn has changed */
707 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
708 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
709}
710
711static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
712 unsigned long hpa, unsigned long mask)
713{
714 struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
715 struct rmap_nested *cursor;
716 unsigned long rmap;
717
718 for_each_nest_rmap_safe(cursor, entry, &rmap) {
719 kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
720 kfree(cursor);
721 }
722}
723
724/* called with kvm->mmu_lock held */
725void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
726 struct kvm_memory_slot *memslot,
727 unsigned long gpa, unsigned long hpa,
728 unsigned long nbytes)
729{
730 unsigned long gfn, end_gfn;
731 unsigned long addr_mask;
732
733 if (!memslot)
734 return;
735 gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
736 end_gfn = gfn + (nbytes >> PAGE_SHIFT);
737
738 addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
739 hpa &= addr_mask;
740
741 for (; gfn < end_gfn; gfn++) {
742 unsigned long *rmap = &memslot->arch.rmap[gfn];
743 kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
744 }
745}
746
747static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
748{
749 unsigned long page;
750
751 for (page = 0; page < free->npages; page++) {
752 unsigned long rmap, *rmapp = &free->arch.rmap[page];
753 struct rmap_nested *cursor;
754 struct llist_node *entry;
755
756 entry = llist_del_all((struct llist_head *) rmapp);
757 for_each_nest_rmap_safe(cursor, entry, &rmap)
758 kfree(cursor);
759 }
760}
761
762static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
763 struct kvm_nested_guest *gp,
764 long gpa, int *shift_ret)
765{
766 struct kvm *kvm = vcpu->kvm;
767 bool ret = false;
768 pte_t *ptep;
769 int shift;
770
771 spin_lock(&kvm->mmu_lock);
772 ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
773 if (!shift)
774 shift = PAGE_SHIFT;
775 if (ptep && pte_present(*ptep)) {
776 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
777 ret = true;
778 }
779 spin_unlock(&kvm->mmu_lock);
780
781 if (shift_ret)
782 *shift_ret = shift;
783 return ret;
784}
785
786static inline int get_ric(unsigned int instr)
787{
788 return (instr >> 18) & 0x3;
789}
790
791static inline int get_prs(unsigned int instr)
792{
793 return (instr >> 17) & 0x1;
794}
795
796static inline int get_r(unsigned int instr)
797{
798 return (instr >> 16) & 0x1;
799}
800
801static inline int get_lpid(unsigned long r_val)
802{
803 return r_val & 0xffffffff;
804}
805
806static inline int get_is(unsigned long r_val)
807{
808 return (r_val >> 10) & 0x3;
809}
810
811static inline int get_ap(unsigned long r_val)
812{
813 return (r_val >> 5) & 0x7;
814}
815
816static inline long get_epn(unsigned long r_val)
817{
818 return r_val >> 12;
819}
820
821static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
822 int ap, long epn)
823{
824 struct kvm *kvm = vcpu->kvm;
825 struct kvm_nested_guest *gp;
826 long npages;
827 int shift, shadow_shift;
828 unsigned long addr;
829
830 shift = ap_to_shift(ap);
831 addr = epn << 12;
832 if (shift < 0)
833 /* Invalid ap encoding */
834 return -EINVAL;
835
836 addr &= ~((1UL << shift) - 1);
837 npages = 1UL << (shift - PAGE_SHIFT);
838
839 gp = kvmhv_get_nested(kvm, lpid, false);
840 if (!gp) /* No such guest -> nothing to do */
841 return 0;
842 mutex_lock(&gp->tlb_lock);
843
844 /* There may be more than one host page backing this single guest pte */
845 do {
846 kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
847
848 npages -= 1UL << (shadow_shift - PAGE_SHIFT);
849 addr += 1UL << shadow_shift;
850 } while (npages > 0);
851
852 mutex_unlock(&gp->tlb_lock);
853 kvmhv_put_nested(gp);
854 return 0;
855}
856
857static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
858 struct kvm_nested_guest *gp, int ric)
859{
860 struct kvm *kvm = vcpu->kvm;
861
862 mutex_lock(&gp->tlb_lock);
863 switch (ric) {
864 case 0:
865 /* Invalidate TLB */
866 spin_lock(&kvm->mmu_lock);
867 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
868 gp->shadow_lpid);
869 kvmhv_flush_lpid(gp->shadow_lpid);
870 spin_unlock(&kvm->mmu_lock);
871 break;
872 case 1:
873 /*
874 * Invalidate PWC
875 * We don't cache this -> nothing to do
876 */
877 break;
878 case 2:
879 /* Invalidate TLB, PWC and caching of partition table entries */
880 kvmhv_flush_nested(gp);
881 break;
882 default:
883 break;
884 }
885 mutex_unlock(&gp->tlb_lock);
886}
887
888static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
889{
890 struct kvm *kvm = vcpu->kvm;
891 struct kvm_nested_guest *gp;
892 int i;
893
894 spin_lock(&kvm->mmu_lock);
895 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
896 gp = kvm->arch.nested_guests[i];
897 if (gp) {
898 spin_unlock(&kvm->mmu_lock);
899 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
900 spin_lock(&kvm->mmu_lock);
901 }
902 }
903 spin_unlock(&kvm->mmu_lock);
904}
905
906static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
907 unsigned long rsval, unsigned long rbval)
908{
909 struct kvm *kvm = vcpu->kvm;
910 struct kvm_nested_guest *gp;
911 int r, ric, prs, is, ap;
912 int lpid;
913 long epn;
914 int ret = 0;
915
916 ric = get_ric(instr);
917 prs = get_prs(instr);
918 r = get_r(instr);
919 lpid = get_lpid(rsval);
920 is = get_is(rbval);
921
922 /*
923 * These cases are invalid and are not handled:
924 * r != 1 -> Only radix supported
925 * prs == 1 -> Not HV privileged
926 * ric == 3 -> No cluster bombs for radix
927 * is == 1 -> Partition scoped translations not associated with pid
928 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
929 */
930 if ((!r) || (prs) || (ric == 3) || (is == 1) ||
931 ((!is) && (ric == 1 || ric == 2)))
932 return -EINVAL;
933
934 switch (is) {
935 case 0:
936 /*
937 * We know ric == 0
938 * Invalidate TLB for a given target address
939 */
940 epn = get_epn(rbval);
941 ap = get_ap(rbval);
942 ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
943 break;
944 case 2:
945 /* Invalidate matching LPID */
946 gp = kvmhv_get_nested(kvm, lpid, false);
947 if (gp) {
948 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
949 kvmhv_put_nested(gp);
950 }
951 break;
952 case 3:
953 /* Invalidate ALL LPIDs */
954 kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
955 break;
956 default:
957 ret = -EINVAL;
958 break;
959 }
960
961 return ret;
962}
963
964/*
965 * This handles the H_TLB_INVALIDATE hcall.
966 * Parameters are (r4) tlbie instruction code, (r5) rS contents,
967 * (r6) rB contents.
968 */
969long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
970{
971 int ret;
972
973 ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
974 kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
975 if (ret)
976 return H_PARAMETER;
977 return H_SUCCESS;
978}
979
980/* Used to convert a nested guest real address to a L1 guest real address */
981static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
982 struct kvm_nested_guest *gp,
983 unsigned long n_gpa, unsigned long dsisr,
984 struct kvmppc_pte *gpte_p)
985{
986 u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
987 int ret;
988
989 ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
990 &fault_addr);
991
992 if (ret) {
993 /* We didn't find a pte */
994 if (ret == -EINVAL) {
995 /* Unsupported mmu config */
996 flags |= DSISR_UNSUPP_MMU;
997 } else if (ret == -ENOENT) {
998 /* No translation found */
999 flags |= DSISR_NOHPTE;
1000 } else if (ret == -EFAULT) {
1001 /* Couldn't access L1 real address */
1002 flags |= DSISR_PRTABLE_FAULT;
1003 vcpu->arch.fault_gpa = fault_addr;
1004 } else {
1005 /* Unknown error */
1006 return ret;
1007 }
1008 goto forward_to_l1;
1009 } else {
1010 /* We found a pte -> check permissions */
1011 if (dsisr & DSISR_ISSTORE) {
1012 /* Can we write? */
1013 if (!gpte_p->may_write) {
1014 flags |= DSISR_PROTFAULT;
1015 goto forward_to_l1;
1016 }
1017 } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
1018 /* Can we execute? */
1019 if (!gpte_p->may_execute) {
1020 flags |= SRR1_ISI_N_OR_G;
1021 goto forward_to_l1;
1022 }
1023 } else {
1024 /* Can we read? */
1025 if (!gpte_p->may_read && !gpte_p->may_write) {
1026 flags |= DSISR_PROTFAULT;
1027 goto forward_to_l1;
1028 }
1029 }
1030 }
1031
1032 return 0;
1033
1034forward_to_l1:
1035 vcpu->arch.fault_dsisr = flags;
1036 if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
1037 vcpu->arch.shregs.msr &= ~0x783f0000ul;
1038 vcpu->arch.shregs.msr |= flags;
1039 }
1040 return RESUME_HOST;
1041}
1042
1043static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
1044 struct kvm_nested_guest *gp,
1045 unsigned long n_gpa,
1046 struct kvmppc_pte gpte,
1047 unsigned long dsisr)
1048{
1049 struct kvm *kvm = vcpu->kvm;
1050 bool writing = !!(dsisr & DSISR_ISSTORE);
1051 u64 pgflags;
1052 bool ret;
1053
1054 /* Are the rc bits set in the L1 partition scoped pte? */
1055 pgflags = _PAGE_ACCESSED;
1056 if (writing)
1057 pgflags |= _PAGE_DIRTY;
1058 if (pgflags & ~gpte.rc)
1059 return RESUME_HOST;
1060
1061 spin_lock(&kvm->mmu_lock);
1062 /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
1063 ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
1064 gpte.raddr, kvm->arch.lpid);
1065 spin_unlock(&kvm->mmu_lock);
1066 if (!ret)
1067 return -EINVAL;
1068
1069 /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
1070 ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
1071 gp->shadow_lpid);
1072 if (!ret)
1073 return -EINVAL;
1074 return 0;
1075}
1076
1077static inline int kvmppc_radix_level_to_shift(int level)
1078{
1079 switch (level) {
1080 case 2:
1081 return PUD_SHIFT;
1082 case 1:
1083 return PMD_SHIFT;
1084 default:
1085 return PAGE_SHIFT;
1086 }
1087}
1088
1089static inline int kvmppc_radix_shift_to_level(int shift)
1090{
1091 if (shift == PUD_SHIFT)
1092 return 2;
1093 if (shift == PMD_SHIFT)
1094 return 1;
1095 if (shift == PAGE_SHIFT)
1096 return 0;
1097 WARN_ON_ONCE(1);
1098 return 0;
1099}
1100
1101/* called with gp->tlb_lock held */
1102static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
1103 struct kvm_nested_guest *gp)
1104{
1105 struct kvm *kvm = vcpu->kvm;
1106 struct kvm_memory_slot *memslot;
1107 struct rmap_nested *n_rmap;
1108 struct kvmppc_pte gpte;
1109 pte_t pte, *pte_p;
1110 unsigned long mmu_seq;
1111 unsigned long dsisr = vcpu->arch.fault_dsisr;
1112 unsigned long ea = vcpu->arch.fault_dar;
1113 unsigned long *rmapp;
1114 unsigned long n_gpa, gpa, gfn, perm = 0UL;
1115 unsigned int shift, l1_shift, level;
1116 bool writing = !!(dsisr & DSISR_ISSTORE);
1117 bool kvm_ro = false;
1118 long int ret;
1119
1120 if (!gp->l1_gr_to_hr) {
1121 kvmhv_update_ptbl_cache(gp);
1122 if (!gp->l1_gr_to_hr)
1123 return RESUME_HOST;
1124 }
1125
1126 /* Convert the nested guest real address into a L1 guest real address */
1127
1128 n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
1129 if (!(dsisr & DSISR_PRTABLE_FAULT))
1130 n_gpa |= ea & 0xFFF;
1131 ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
1132
1133 /*
1134 * If the hardware found a translation but we don't now have a usable
1135 * translation in the l1 partition-scoped tree, remove the shadow pte
1136 * and let the guest retry.
1137 */
1138 if (ret == RESUME_HOST &&
1139 (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
1140 DSISR_BAD_COPYPASTE)))
1141 goto inval;
1142 if (ret)
1143 return ret;
1144
1145 /* Failed to set the reference/change bits */
1146 if (dsisr & DSISR_SET_RC) {
1147 ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
1148 if (ret == RESUME_HOST)
1149 return ret;
1150 if (ret)
1151 goto inval;
1152 dsisr &= ~DSISR_SET_RC;
1153 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
1154 DSISR_PROTFAULT)))
1155 return RESUME_GUEST;
1156 }
1157
1158 /*
1159 * We took an HISI or HDSI while we were running a nested guest which
1160 * means we have no partition scoped translation for that. This means
1161 * we need to insert a pte for the mapping into our shadow_pgtable.
1162 */
1163
1164 l1_shift = gpte.page_shift;
1165 if (l1_shift < PAGE_SHIFT) {
1166 /* We don't support l1 using a page size smaller than our own */
1167 pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
1168 l1_shift, PAGE_SHIFT);
1169 return -EINVAL;
1170 }
1171 gpa = gpte.raddr;
1172 gfn = gpa >> PAGE_SHIFT;
1173
1174 /* 1. Get the corresponding host memslot */
1175
1176 memslot = gfn_to_memslot(kvm, gfn);
1177 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
1178 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
1179 /* unusual error -> reflect to the guest as a DSI */
1180 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
1181 return RESUME_GUEST;
1182 }
1183 /* passthrough of emulated MMIO case... */
1184 pr_err("emulated MMIO passthrough?\n");
1185 return -EINVAL;
1186 }
1187 if (memslot->flags & KVM_MEM_READONLY) {
1188 if (writing) {
1189 /* Give the guest a DSI */
1190 kvmppc_core_queue_data_storage(vcpu, ea,
1191 DSISR_ISSTORE | DSISR_PROTFAULT);
1192 return RESUME_GUEST;
1193 }
1194 kvm_ro = true;
1195 }
1196
1197 /* 2. Find the host pte for this L1 guest real address */
1198
1199 /* Used to check for invalidations in progress */
1200 mmu_seq = kvm->mmu_notifier_seq;
1201 smp_rmb();
1202
1203 /* See if can find translation in our partition scoped tables for L1 */
1204 pte = __pte(0);
1205 spin_lock(&kvm->mmu_lock);
1206 pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
1207 if (!shift)
1208 shift = PAGE_SHIFT;
1209 if (pte_p)
1210 pte = *pte_p;
1211 spin_unlock(&kvm->mmu_lock);
1212
1213 if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
1214 /* No suitable pte found -> try to insert a mapping */
1215 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
1216 writing, kvm_ro, &pte, &level);
1217 if (ret == -EAGAIN)
1218 return RESUME_GUEST;
1219 else if (ret)
1220 return ret;
1221 shift = kvmppc_radix_level_to_shift(level);
1222 }
1223
1224 /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
1225
1226 /* The permissions is the combination of the host and l1 guest ptes */
1227 perm |= gpte.may_read ? 0UL : _PAGE_READ;
1228 perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
1229 perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
1230 pte = __pte(pte_val(pte) & ~perm);
1231
1232 /* What size pte can we insert? */
1233 if (shift > l1_shift) {
1234 u64 mask;
1235 unsigned int actual_shift = PAGE_SHIFT;
1236 if (PMD_SHIFT < l1_shift)
1237 actual_shift = PMD_SHIFT;
1238 mask = (1UL << shift) - (1UL << actual_shift);
1239 pte = __pte(pte_val(pte) | (gpa & mask));
1240 shift = actual_shift;
1241 }
1242 level = kvmppc_radix_shift_to_level(shift);
1243 n_gpa &= ~((1UL << shift) - 1);
1244
1245 /* 4. Insert the pte into our shadow_pgtable */
1246
1247 n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
1248 if (!n_rmap)
1249 return RESUME_GUEST; /* Let the guest try again */
1250 n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
1251 (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
1252 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1253 ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
1254 mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
1255 if (n_rmap)
1256 kfree(n_rmap);
1257 if (ret == -EAGAIN)
1258 ret = RESUME_GUEST; /* Let the guest try again */
1259
1260 return ret;
1261
1262 inval:
1263 kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
1264 return RESUME_GUEST;
1265}
1266
1267long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
1268{
1269 struct kvm_nested_guest *gp = vcpu->arch.nested;
1270 long int ret;
1271
1272 mutex_lock(&gp->tlb_lock);
1273 ret = __kvmhv_nested_page_fault(vcpu, gp);
1274 mutex_unlock(&gp->tlb_lock);
1275 return ret;
1276}
1277
1278int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
1279{
1280 int ret = -1;
1281
1282 spin_lock(&kvm->mmu_lock);
1283 while (++lpid <= kvm->arch.max_nested_lpid) {
1284 if (kvm->arch.nested_guests[lpid]) {
1285 ret = lpid;
1286 break;
1287 }
1288 }
1289 spin_unlock(&kvm->mmu_lock);
1290 return ret;
1291}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index b11043b23c18..0787f12c1a1b 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
177 177
178 local_paca->sibling_subcore_state->in_guest[subcore_id] = 1; 178 local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
179} 179}
180EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
180 181
181void kvmppc_subcore_exit_guest(void) 182void kvmppc_subcore_exit_guest(void)
182{ 183{
@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
187 188
188 local_paca->sibling_subcore_state->in_guest[subcore_id] = 0; 189 local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
189} 190}
191EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
190 192
191static bool kvmppc_tb_resync_required(void) 193static bool kvmppc_tb_resync_required(void)
192{ 194{
@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void)
331 } else { 333 } else {
332 wait_for_tb_resync(); 334 wait_for_tb_resync();
333 } 335 }
336
337 /*
338 * Reset tb_offset_applied so the guest exit code won't try
339 * to subtract the previous timebase offset from the timebase.
340 */
341 if (local_paca->kvm_hstate.kvm_vcore)
342 local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
343
334 return 0; 344 return 0;
335} 345}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 758d1d23215e..b3f5786b20dc 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
136 136
137 /* Mark the target VCPU as having an interrupt pending */ 137 /* Mark the target VCPU as having an interrupt pending */
138 vcpu->stat.queue_intr++; 138 vcpu->stat.queue_intr++;
139 set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); 139 set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
140 140
141 /* Kick self ? Just set MER and return */ 141 /* Kick self ? Just set MER and return */
142 if (vcpu == this_vcpu) { 142 if (vcpu == this_vcpu) {
@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
170static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) 170static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
171{ 171{
172 /* Note: Only called on self ! */ 172 /* Note: Only called on self ! */
173 clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 173 clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
174 &vcpu->arch.pending_exceptions);
175 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); 174 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
176} 175}
177 176
@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
768 void __iomem *xics_phys; 767 void __iomem *xics_phys;
769 int64_t rc; 768 int64_t rc;
770 769
770 if (kvmhv_on_pseries()) {
771 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
772
773 iosync();
774 plpar_hcall_raw(H_EOI, retbuf, hwirq);
775 return;
776 }
777
771 rc = pnv_opal_pci_msi_eoi(c, hwirq); 778 rc = pnv_opal_pci_msi_eoi(c, hwirq);
772 779
773 if (rc) 780 if (rc)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 1d14046124a0..9b8d50a7cbaf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -28,6 +28,7 @@
28#include <asm/exception-64s.h> 28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h> 29#include <asm/kvm_book3s_asm.h>
30#include <asm/book3s/64/mmu-hash.h> 30#include <asm/book3s/64/mmu-hash.h>
31#include <asm/export.h>
31#include <asm/tm.h> 32#include <asm/tm.h>
32#include <asm/opal.h> 33#include <asm/opal.h>
33#include <asm/xive-regs.h> 34#include <asm/xive-regs.h>
@@ -46,8 +47,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
46#define NAPPING_NOVCPU 2 47#define NAPPING_NOVCPU 2
47 48
48/* Stack frame offsets for kvmppc_hv_entry */ 49/* Stack frame offsets for kvmppc_hv_entry */
49#define SFS 160 50#define SFS 208
50#define STACK_SLOT_TRAP (SFS-4) 51#define STACK_SLOT_TRAP (SFS-4)
52#define STACK_SLOT_SHORT_PATH (SFS-8)
51#define STACK_SLOT_TID (SFS-16) 53#define STACK_SLOT_TID (SFS-16)
52#define STACK_SLOT_PSSCR (SFS-24) 54#define STACK_SLOT_PSSCR (SFS-24)
53#define STACK_SLOT_PID (SFS-32) 55#define STACK_SLOT_PID (SFS-32)
@@ -56,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
56#define STACK_SLOT_DAWR (SFS-56) 58#define STACK_SLOT_DAWR (SFS-56)
57#define STACK_SLOT_DAWRX (SFS-64) 59#define STACK_SLOT_DAWRX (SFS-64)
58#define STACK_SLOT_HFSCR (SFS-72) 60#define STACK_SLOT_HFSCR (SFS-72)
61/* the following is used by the P9 short path */
62#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
59 63
60/* 64/*
61 * Call kvmppc_hv_entry in real mode. 65 * Call kvmppc_hv_entry in real mode.
@@ -113,45 +117,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
113 mtspr SPRN_SPRG_VDSO_WRITE,r3 117 mtspr SPRN_SPRG_VDSO_WRITE,r3
114 118
115 /* Reload the host's PMU registers */ 119 /* Reload the host's PMU registers */
116 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ 120 bl kvmhv_load_host_pmu
117 cmpwi r4, 0
118 beq 23f /* skip if not */
119BEGIN_FTR_SECTION
120 ld r3, HSTATE_MMCR0(r13)
121 andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
122 cmpwi r4, MMCR0_PMAO
123 beql kvmppc_fix_pmao
124END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
125 lwz r3, HSTATE_PMC1(r13)
126 lwz r4, HSTATE_PMC2(r13)
127 lwz r5, HSTATE_PMC3(r13)
128 lwz r6, HSTATE_PMC4(r13)
129 lwz r8, HSTATE_PMC5(r13)
130 lwz r9, HSTATE_PMC6(r13)
131 mtspr SPRN_PMC1, r3
132 mtspr SPRN_PMC2, r4
133 mtspr SPRN_PMC3, r5
134 mtspr SPRN_PMC4, r6
135 mtspr SPRN_PMC5, r8
136 mtspr SPRN_PMC6, r9
137 ld r3, HSTATE_MMCR0(r13)
138 ld r4, HSTATE_MMCR1(r13)
139 ld r5, HSTATE_MMCRA(r13)
140 ld r6, HSTATE_SIAR(r13)
141 ld r7, HSTATE_SDAR(r13)
142 mtspr SPRN_MMCR1, r4
143 mtspr SPRN_MMCRA, r5
144 mtspr SPRN_SIAR, r6
145 mtspr SPRN_SDAR, r7
146BEGIN_FTR_SECTION
147 ld r8, HSTATE_MMCR2(r13)
148 ld r9, HSTATE_SIER(r13)
149 mtspr SPRN_MMCR2, r8
150 mtspr SPRN_SIER, r9
151END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
152 mtspr SPRN_MMCR0, r3
153 isync
15423:
155 121
156 /* 122 /*
157 * Reload DEC. HDEC interrupts were disabled when 123 * Reload DEC. HDEC interrupts were disabled when
@@ -796,66 +762,23 @@ BEGIN_FTR_SECTION
796 b 91f 762 b 91f
797END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 763END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
798 /* 764 /*
799 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 765 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
800 */ 766 */
801 mr r3, r4 767 mr r3, r4
802 ld r4, VCPU_MSR(r3) 768 ld r4, VCPU_MSR(r3)
769 li r5, 0 /* don't preserve non-vol regs */
803 bl kvmppc_restore_tm_hv 770 bl kvmppc_restore_tm_hv
771 nop
804 ld r4, HSTATE_KVM_VCPU(r13) 772 ld r4, HSTATE_KVM_VCPU(r13)
80591: 77391:
806#endif 774#endif
807 775
808 /* Load guest PMU registers */ 776 /* Load guest PMU registers; r4 = vcpu pointer here */
809 /* R4 is live here (vcpu pointer) */ 777 mr r3, r4
810 li r3, 1 778 bl kvmhv_load_guest_pmu
811 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
812 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
813 isync
814BEGIN_FTR_SECTION
815 ld r3, VCPU_MMCR(r4)
816 andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
817 cmpwi r5, MMCR0_PMAO
818 beql kvmppc_fix_pmao
819END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
820 lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
821 lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
822 lwz r6, VCPU_PMC + 8(r4)
823 lwz r7, VCPU_PMC + 12(r4)
824 lwz r8, VCPU_PMC + 16(r4)
825 lwz r9, VCPU_PMC + 20(r4)
826 mtspr SPRN_PMC1, r3
827 mtspr SPRN_PMC2, r5
828 mtspr SPRN_PMC3, r6
829 mtspr SPRN_PMC4, r7
830 mtspr SPRN_PMC5, r8
831 mtspr SPRN_PMC6, r9
832 ld r3, VCPU_MMCR(r4)
833 ld r5, VCPU_MMCR + 8(r4)
834 ld r6, VCPU_MMCR + 16(r4)
835 ld r7, VCPU_SIAR(r4)
836 ld r8, VCPU_SDAR(r4)
837 mtspr SPRN_MMCR1, r5
838 mtspr SPRN_MMCRA, r6
839 mtspr SPRN_SIAR, r7
840 mtspr SPRN_SDAR, r8
841BEGIN_FTR_SECTION
842 ld r5, VCPU_MMCR + 24(r4)
843 ld r6, VCPU_SIER(r4)
844 mtspr SPRN_MMCR2, r5
845 mtspr SPRN_SIER, r6
846BEGIN_FTR_SECTION_NESTED(96)
847 lwz r7, VCPU_PMC + 24(r4)
848 lwz r8, VCPU_PMC + 28(r4)
849 ld r9, VCPU_MMCR + 32(r4)
850 mtspr SPRN_SPMC1, r7
851 mtspr SPRN_SPMC2, r8
852 mtspr SPRN_MMCRS, r9
853END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
854END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
855 mtspr SPRN_MMCR0, r3
856 isync
857 779
858 /* Load up FP, VMX and VSX registers */ 780 /* Load up FP, VMX and VSX registers */
781 ld r4, HSTATE_KVM_VCPU(r13)
859 bl kvmppc_load_fp 782 bl kvmppc_load_fp
860 783
861 ld r14, VCPU_GPR(R14)(r4) 784 ld r14, VCPU_GPR(R14)(r4)
@@ -1100,73 +1023,40 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1100no_xive: 1023no_xive:
1101#endif /* CONFIG_KVM_XICS */ 1024#endif /* CONFIG_KVM_XICS */
1102 1025
1103deliver_guest_interrupt: 1026 li r0, 0
1104 ld r6, VCPU_CTR(r4) 1027 stw r0, STACK_SLOT_SHORT_PATH(r1)
1105 ld r7, VCPU_XER(r4)
1106
1107 mtctr r6
1108 mtxer r7
1109 1028
1110kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ 1029deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
1111 ld r10, VCPU_PC(r4) 1030 /* Check if we can deliver an external or decrementer interrupt now */
1112 ld r11, VCPU_MSR(r4) 1031 ld r0, VCPU_PENDING_EXC(r4)
1032BEGIN_FTR_SECTION
1033 /* On POWER9, also check for emulated doorbell interrupt */
1034 lbz r3, VCPU_DBELL_REQ(r4)
1035 or r0, r0, r3
1036END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1037 cmpdi r0, 0
1038 beq 71f
1039 mr r3, r4
1040 bl kvmppc_guest_entry_inject_int
1041 ld r4, HSTATE_KVM_VCPU(r13)
104271:
1113 ld r6, VCPU_SRR0(r4) 1043 ld r6, VCPU_SRR0(r4)
1114 ld r7, VCPU_SRR1(r4) 1044 ld r7, VCPU_SRR1(r4)
1115 mtspr SPRN_SRR0, r6 1045 mtspr SPRN_SRR0, r6
1116 mtspr SPRN_SRR1, r7 1046 mtspr SPRN_SRR1, r7
1117 1047
1048fast_guest_entry_c:
1049 ld r10, VCPU_PC(r4)
1050 ld r11, VCPU_MSR(r4)
1118 /* r11 = vcpu->arch.msr & ~MSR_HV */ 1051 /* r11 = vcpu->arch.msr & ~MSR_HV */
1119 rldicl r11, r11, 63 - MSR_HV_LG, 1 1052 rldicl r11, r11, 63 - MSR_HV_LG, 1
1120 rotldi r11, r11, 1 + MSR_HV_LG 1053 rotldi r11, r11, 1 + MSR_HV_LG
1121 ori r11, r11, MSR_ME 1054 ori r11, r11, MSR_ME
1122 1055
1123 /* Check if we can deliver an external or decrementer interrupt now */ 1056 ld r6, VCPU_CTR(r4)
1124 ld r0, VCPU_PENDING_EXC(r4) 1057 ld r7, VCPU_XER(r4)
1125 rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 1058 mtctr r6
1126 cmpdi cr1, r0, 0 1059 mtxer r7
1127 andi. r8, r11, MSR_EE
1128 mfspr r8, SPRN_LPCR
1129 /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
1130 rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
1131 mtspr SPRN_LPCR, r8
1132 isync
1133 beq 5f
1134 li r0, BOOK3S_INTERRUPT_EXTERNAL
1135 bne cr1, 12f
1136 mfspr r0, SPRN_DEC
1137BEGIN_FTR_SECTION
1138 /* On POWER9 check whether the guest has large decrementer enabled */
1139 andis. r8, r8, LPCR_LD@h
1140 bne 15f
1141END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1142 extsw r0, r0
114315: cmpdi r0, 0
1144 li r0, BOOK3S_INTERRUPT_DECREMENTER
1145 bge 5f
1146
114712: mtspr SPRN_SRR0, r10
1148 mr r10,r0
1149 mtspr SPRN_SRR1, r11
1150 mr r9, r4
1151 bl kvmppc_msr_interrupt
11525:
1153BEGIN_FTR_SECTION
1154 b fast_guest_return
1155END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1156 /* On POWER9, check for pending doorbell requests */
1157 lbz r0, VCPU_DBELL_REQ(r4)
1158 cmpwi r0, 0
1159 beq fast_guest_return
1160 ld r5, HSTATE_KVM_VCORE(r13)
1161 /* Set DPDES register so the CPU will take a doorbell interrupt */
1162 li r0, 1
1163 mtspr SPRN_DPDES, r0
1164 std r0, VCORE_DPDES(r5)
1165 /* Make sure other cpus see vcore->dpdes set before dbell req clear */
1166 lwsync
1167 /* Clear the pending doorbell request */
1168 li r0, 0
1169 stb r0, VCPU_DBELL_REQ(r4)
1170 1060
1171/* 1061/*
1172 * Required state: 1062 * Required state:
@@ -1202,7 +1092,7 @@ BEGIN_FTR_SECTION
1202END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1092END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1203 1093
1204 ld r5, VCPU_LR(r4) 1094 ld r5, VCPU_LR(r4)
1205 lwz r6, VCPU_CR(r4) 1095 ld r6, VCPU_CR(r4)
1206 mtlr r5 1096 mtlr r5
1207 mtcr r6 1097 mtcr r6
1208 1098
@@ -1234,6 +1124,83 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1234 HRFI_TO_GUEST 1124 HRFI_TO_GUEST
1235 b . 1125 b .
1236 1126
1127/*
1128 * Enter the guest on a P9 or later system where we have exactly
1129 * one vcpu per vcore and we don't need to go to real mode
1130 * (which implies that host and guest are both using radix MMU mode).
1131 * r3 = vcpu pointer
1132 * Most SPRs and all the VSRs have been loaded already.
1133 */
1134_GLOBAL(__kvmhv_vcpu_entry_p9)
1135EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
1136 mflr r0
1137 std r0, PPC_LR_STKOFF(r1)
1138 stdu r1, -SFS(r1)
1139
1140 li r0, 1
1141 stw r0, STACK_SLOT_SHORT_PATH(r1)
1142
1143 std r3, HSTATE_KVM_VCPU(r13)
1144 mfcr r4
1145 stw r4, SFS+8(r1)
1146
1147 std r1, HSTATE_HOST_R1(r13)
1148
1149 reg = 14
1150 .rept 18
1151 std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1152 reg = reg + 1
1153 .endr
1154
1155 reg = 14
1156 .rept 18
1157 ld reg, __VCPU_GPR(reg)(r3)
1158 reg = reg + 1
1159 .endr
1160
1161 mfmsr r10
1162 std r10, HSTATE_HOST_MSR(r13)
1163
1164 mr r4, r3
1165 b fast_guest_entry_c
1166guest_exit_short_path:
1167
1168 li r0, KVM_GUEST_MODE_NONE
1169 stb r0, HSTATE_IN_GUEST(r13)
1170
1171 reg = 14
1172 .rept 18
1173 std reg, __VCPU_GPR(reg)(r9)
1174 reg = reg + 1
1175 .endr
1176
1177 reg = 14
1178 .rept 18
1179 ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1180 reg = reg + 1
1181 .endr
1182
1183 lwz r4, SFS+8(r1)
1184 mtcr r4
1185
1186 mr r3, r12 /* trap number */
1187
1188 addi r1, r1, SFS
1189 ld r0, PPC_LR_STKOFF(r1)
1190 mtlr r0
1191
1192 /* If we are in real mode, do a rfid to get back to the caller */
1193 mfmsr r4
1194 andi. r5, r4, MSR_IR
1195 bnelr
1196 rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
1197 mtspr SPRN_SRR0, r0
1198 ld r10, HSTATE_HOST_MSR(r13)
1199 rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
1200 mtspr SPRN_SRR1, r10
1201 RFI_TO_KERNEL
1202 b .
1203
1237secondary_too_late: 1204secondary_too_late:
1238 li r12, 0 1205 li r12, 0
1239 stw r12, STACK_SLOT_TRAP(r1) 1206 stw r12, STACK_SLOT_TRAP(r1)
@@ -1313,7 +1280,7 @@ kvmppc_interrupt_hv:
1313 std r3, VCPU_GPR(R12)(r9) 1280 std r3, VCPU_GPR(R12)(r9)
1314 /* CR is in the high half of r12 */ 1281 /* CR is in the high half of r12 */
1315 srdi r4, r12, 32 1282 srdi r4, r12, 32
1316 stw r4, VCPU_CR(r9) 1283 std r4, VCPU_CR(r9)
1317BEGIN_FTR_SECTION 1284BEGIN_FTR_SECTION
1318 ld r3, HSTATE_CFAR(r13) 1285 ld r3, HSTATE_CFAR(r13)
1319 std r3, VCPU_CFAR(r9) 1286 std r3, VCPU_CFAR(r9)
@@ -1387,18 +1354,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1387 std r3, VCPU_CTR(r9) 1354 std r3, VCPU_CTR(r9)
1388 std r4, VCPU_XER(r9) 1355 std r4, VCPU_XER(r9)
1389 1356
1390#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1357 /* Save more register state */
1391 /* For softpatch interrupt, go off and do TM instruction emulation */ 1358 mfdar r3
1392 cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH 1359 mfdsisr r4
1393 beq kvmppc_tm_emul 1360 std r3, VCPU_DAR(r9)
1394#endif 1361 stw r4, VCPU_DSISR(r9)
1395 1362
1396 /* If this is a page table miss then see if it's theirs or ours */ 1363 /* If this is a page table miss then see if it's theirs or ours */
1397 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 1364 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1398 beq kvmppc_hdsi 1365 beq kvmppc_hdsi
1366 std r3, VCPU_FAULT_DAR(r9)
1367 stw r4, VCPU_FAULT_DSISR(r9)
1399 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE 1368 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1400 beq kvmppc_hisi 1369 beq kvmppc_hisi
1401 1370
1371#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1372 /* For softpatch interrupt, go off and do TM instruction emulation */
1373 cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
1374 beq kvmppc_tm_emul
1375#endif
1376
1402 /* See if this is a leftover HDEC interrupt */ 1377 /* See if this is a leftover HDEC interrupt */
1403 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 1378 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
1404 bne 2f 1379 bne 2f
@@ -1418,10 +1393,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1418BEGIN_FTR_SECTION 1393BEGIN_FTR_SECTION
1419 PPC_MSGSYNC 1394 PPC_MSGSYNC
1420 lwsync 1395 lwsync
1396 /* always exit if we're running a nested guest */
1397 ld r0, VCPU_NESTED(r9)
1398 cmpdi r0, 0
1399 bne guest_exit_cont
1421END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1400END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1422 lbz r0, HSTATE_HOST_IPI(r13) 1401 lbz r0, HSTATE_HOST_IPI(r13)
1423 cmpwi r0, 0 1402 cmpwi r0, 0
1424 beq 4f 1403 beq maybe_reenter_guest
1425 b guest_exit_cont 1404 b guest_exit_cont
14263: 14053:
1427 /* If it's a hypervisor facility unavailable interrupt, save HFSCR */ 1406 /* If it's a hypervisor facility unavailable interrupt, save HFSCR */
@@ -1433,82 +1412,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
143314: 141214:
1434 /* External interrupt ? */ 1413 /* External interrupt ? */
1435 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1414 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
1436 bne+ guest_exit_cont 1415 beq kvmppc_guest_external
1437
1438 /* External interrupt, first check for host_ipi. If this is
1439 * set, we know the host wants us out so let's do it now
1440 */
1441 bl kvmppc_read_intr
1442
1443 /*
1444 * Restore the active volatile registers after returning from
1445 * a C function.
1446 */
1447 ld r9, HSTATE_KVM_VCPU(r13)
1448 li r12, BOOK3S_INTERRUPT_EXTERNAL
1449
1450 /*
1451 * kvmppc_read_intr return codes:
1452 *
1453 * Exit to host (r3 > 0)
1454 * 1 An interrupt is pending that needs to be handled by the host
1455 * Exit guest and return to host by branching to guest_exit_cont
1456 *
1457 * 2 Passthrough that needs completion in the host
1458 * Exit guest and return to host by branching to guest_exit_cont
1459 * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
1460 * to indicate to the host to complete handling the interrupt
1461 *
1462 * Before returning to guest, we check if any CPU is heading out
1463 * to the host and if so, we head out also. If no CPUs are heading
1464 * check return values <= 0.
1465 *
1466 * Return to guest (r3 <= 0)
1467 * 0 No external interrupt is pending
1468 * -1 A guest wakeup IPI (which has now been cleared)
1469 * In either case, we return to guest to deliver any pending
1470 * guest interrupts.
1471 *
1472 * -2 A PCI passthrough external interrupt was handled
1473 * (interrupt was delivered directly to guest)
1474 * Return to guest to deliver any pending guest interrupts.
1475 */
1476
1477 cmpdi r3, 1
1478 ble 1f
1479
1480 /* Return code = 2 */
1481 li r12, BOOK3S_INTERRUPT_HV_RM_HARD
1482 stw r12, VCPU_TRAP(r9)
1483 b guest_exit_cont
1484
14851: /* Return code <= 1 */
1486 cmpdi r3, 0
1487 bgt guest_exit_cont
1488
1489 /* Return code <= 0 */
14904: ld r5, HSTATE_KVM_VCORE(r13)
1491 lwz r0, VCORE_ENTRY_EXIT(r5)
1492 cmpwi r0, 0x100
1493 mr r4, r9
1494 blt deliver_guest_interrupt
1495
1496guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1497 /* Save more register state */
1498 mfdar r6
1499 mfdsisr r7
1500 std r6, VCPU_DAR(r9)
1501 stw r7, VCPU_DSISR(r9)
1502 /* don't overwrite fault_dar/fault_dsisr if HDSI */
1503 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
1504 beq mc_cont
1505 std r6, VCPU_FAULT_DAR(r9)
1506 stw r7, VCPU_FAULT_DSISR(r9)
1507
1508 /* See if it is a machine check */ 1416 /* See if it is a machine check */
1509 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1417 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1510 beq machine_check_realmode 1418 beq machine_check_realmode
1511mc_cont: 1419 /* Or a hypervisor maintenance interrupt */
1420 cmpwi r12, BOOK3S_INTERRUPT_HMI
1421 beq hmi_realmode
1422
1423guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1424
1512#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1425#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1513 addi r3, r9, VCPU_TB_RMEXIT 1426 addi r3, r9, VCPU_TB_RMEXIT
1514 mr r4, r9 1427 mr r4, r9
@@ -1552,6 +1465,11 @@ mc_cont:
15521: 14651:
1553#endif /* CONFIG_KVM_XICS */ 1466#endif /* CONFIG_KVM_XICS */
1554 1467
1468 /* If we came in through the P9 short path, go back out to C now */
1469 lwz r0, STACK_SLOT_SHORT_PATH(r1)
1470 cmpwi r0, 0
1471 bne guest_exit_short_path
1472
1555 /* For hash guest, read the guest SLB and save it away */ 1473 /* For hash guest, read the guest SLB and save it away */
1556 ld r5, VCPU_KVM(r9) 1474 ld r5, VCPU_KVM(r9)
1557 lbz r0, KVM_RADIX(r5) 1475 lbz r0, KVM_RADIX(r5)
@@ -1780,11 +1698,13 @@ BEGIN_FTR_SECTION
1780 b 91f 1698 b 91f
1781END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 1699END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
1782 /* 1700 /*
1783 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 1701 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
1784 */ 1702 */
1785 mr r3, r9 1703 mr r3, r9
1786 ld r4, VCPU_MSR(r3) 1704 ld r4, VCPU_MSR(r3)
1705 li r5, 0 /* don't preserve non-vol regs */
1787 bl kvmppc_save_tm_hv 1706 bl kvmppc_save_tm_hv
1707 nop
1788 ld r9, HSTATE_KVM_VCPU(r13) 1708 ld r9, HSTATE_KVM_VCPU(r13)
178991: 170991:
1790#endif 1710#endif
@@ -1802,83 +1722,12 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
180225: 172225:
1803 /* Save PMU registers if requested */ 1723 /* Save PMU registers if requested */
1804 /* r8 and cr0.eq are live here */ 1724 /* r8 and cr0.eq are live here */
1805BEGIN_FTR_SECTION 1725 mr r3, r9
1806 /* 1726 li r4, 1
1807 * POWER8 seems to have a hardware bug where setting
1808 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
1809 * when some counters are already negative doesn't seem
1810 * to cause a performance monitor alert (and hence interrupt).
1811 * The effect of this is that when saving the PMU state,
1812 * if there is no PMU alert pending when we read MMCR0
1813 * before freezing the counters, but one becomes pending
1814 * before we read the counters, we lose it.
1815 * To work around this, we need a way to freeze the counters
1816 * before reading MMCR0. Normally, freezing the counters
1817 * is done by writing MMCR0 (to set MMCR0[FC]) which
1818 * unavoidably writes MMCR0[PMA0] as well. On POWER8,
1819 * we can also freeze the counters using MMCR2, by writing
1820 * 1s to all the counter freeze condition bits (there are
1821 * 9 bits each for 6 counters).
1822 */
1823 li r3, -1 /* set all freeze bits */
1824 clrrdi r3, r3, 10
1825 mfspr r10, SPRN_MMCR2
1826 mtspr SPRN_MMCR2, r3
1827 isync
1828END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1829 li r3, 1
1830 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
1831 mfspr r4, SPRN_MMCR0 /* save MMCR0 */
1832 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
1833 mfspr r6, SPRN_MMCRA
1834 /* Clear MMCRA in order to disable SDAR updates */
1835 li r7, 0
1836 mtspr SPRN_MMCRA, r7
1837 isync
1838 beq 21f /* if no VPA, save PMU stuff anyway */ 1727 beq 21f /* if no VPA, save PMU stuff anyway */
1839 lbz r7, LPPACA_PMCINUSE(r8) 1728 lbz r4, LPPACA_PMCINUSE(r8)
1840 cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ 172921: bl kvmhv_save_guest_pmu
1841 bne 21f 1730 ld r9, HSTATE_KVM_VCPU(r13)
1842 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
1843 b 22f
184421: mfspr r5, SPRN_MMCR1
1845 mfspr r7, SPRN_SIAR
1846 mfspr r8, SPRN_SDAR
1847 std r4, VCPU_MMCR(r9)
1848 std r5, VCPU_MMCR + 8(r9)
1849 std r6, VCPU_MMCR + 16(r9)
1850BEGIN_FTR_SECTION
1851 std r10, VCPU_MMCR + 24(r9)
1852END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1853 std r7, VCPU_SIAR(r9)
1854 std r8, VCPU_SDAR(r9)
1855 mfspr r3, SPRN_PMC1
1856 mfspr r4, SPRN_PMC2
1857 mfspr r5, SPRN_PMC3
1858 mfspr r6, SPRN_PMC4
1859 mfspr r7, SPRN_PMC5
1860 mfspr r8, SPRN_PMC6
1861 stw r3, VCPU_PMC(r9)
1862 stw r4, VCPU_PMC + 4(r9)
1863 stw r5, VCPU_PMC + 8(r9)
1864 stw r6, VCPU_PMC + 12(r9)
1865 stw r7, VCPU_PMC + 16(r9)
1866 stw r8, VCPU_PMC + 20(r9)
1867BEGIN_FTR_SECTION
1868 mfspr r5, SPRN_SIER
1869 std r5, VCPU_SIER(r9)
1870BEGIN_FTR_SECTION_NESTED(96)
1871 mfspr r6, SPRN_SPMC1
1872 mfspr r7, SPRN_SPMC2
1873 mfspr r8, SPRN_MMCRS
1874 stw r6, VCPU_PMC + 24(r9)
1875 stw r7, VCPU_PMC + 28(r9)
1876 std r8, VCPU_MMCR + 32(r9)
1877 lis r4, 0x8000
1878 mtspr SPRN_MMCRS, r4
1879END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
1880END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
188122:
1882 1731
1883 /* Restore host values of some registers */ 1732 /* Restore host values of some registers */
1884BEGIN_FTR_SECTION 1733BEGIN_FTR_SECTION
@@ -2010,24 +1859,6 @@ BEGIN_FTR_SECTION
2010 mtspr SPRN_DPDES, r8 1859 mtspr SPRN_DPDES, r8
2011END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1860END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2012 1861
2013 /* If HMI, call kvmppc_realmode_hmi_handler() */
2014 lwz r12, STACK_SLOT_TRAP(r1)
2015 cmpwi r12, BOOK3S_INTERRUPT_HMI
2016 bne 27f
2017 bl kvmppc_realmode_hmi_handler
2018 nop
2019 cmpdi r3, 0
2020 /*
2021 * At this point kvmppc_realmode_hmi_handler may have resync-ed
2022 * the TB, and if it has, we must not subtract the guest timebase
2023 * offset from the timebase. So, skip it.
2024 *
2025 * Also, do not call kvmppc_subcore_exit_guest() because it has
2026 * been invoked as part of kvmppc_realmode_hmi_handler().
2027 */
2028 beq 30f
2029
203027:
2031 /* Subtract timebase offset from timebase */ 1862 /* Subtract timebase offset from timebase */
2032 ld r8, VCORE_TB_OFFSET_APPL(r5) 1863 ld r8, VCORE_TB_OFFSET_APPL(r5)
2033 cmpdi r8,0 1864 cmpdi r8,0
@@ -2045,7 +1876,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2045 addis r8,r8,0x100 /* if so, increment upper 40 bits */ 1876 addis r8,r8,0x100 /* if so, increment upper 40 bits */
2046 mtspr SPRN_TBU40,r8 1877 mtspr SPRN_TBU40,r8
2047 1878
204817: bl kvmppc_subcore_exit_guest 187917:
1880 /*
1881 * If this is an HMI, we called kvmppc_realmode_hmi_handler
1882 * above, which may or may not have already called
1883 * kvmppc_subcore_exit_guest. Fortunately, all that
1884 * kvmppc_subcore_exit_guest does is clear a flag, so calling
1885 * it again here is benign even if kvmppc_realmode_hmi_handler
1886 * has already called it.
1887 */
1888 bl kvmppc_subcore_exit_guest
2049 nop 1889 nop
205030: ld r5,HSTATE_KVM_VCORE(r13) 189030: ld r5,HSTATE_KVM_VCORE(r13)
2051 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ 1891 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
@@ -2099,6 +1939,67 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2099 mtlr r0 1939 mtlr r0
2100 blr 1940 blr
2101 1941
1942kvmppc_guest_external:
1943 /* External interrupt, first check for host_ipi. If this is
1944 * set, we know the host wants us out so let's do it now
1945 */
1946 bl kvmppc_read_intr
1947
1948 /*
1949 * Restore the active volatile registers after returning from
1950 * a C function.
1951 */
1952 ld r9, HSTATE_KVM_VCPU(r13)
1953 li r12, BOOK3S_INTERRUPT_EXTERNAL
1954
1955 /*
1956 * kvmppc_read_intr return codes:
1957 *
1958 * Exit to host (r3 > 0)
1959 * 1 An interrupt is pending that needs to be handled by the host
1960 * Exit guest and return to host by branching to guest_exit_cont
1961 *
1962 * 2 Passthrough that needs completion in the host
1963 * Exit guest and return to host by branching to guest_exit_cont
1964 * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
1965 * to indicate to the host to complete handling the interrupt
1966 *
1967 * Before returning to guest, we check if any CPU is heading out
1968 * to the host and if so, we head out also. If no CPUs are heading
1969 * check return values <= 0.
1970 *
1971 * Return to guest (r3 <= 0)
1972 * 0 No external interrupt is pending
1973 * -1 A guest wakeup IPI (which has now been cleared)
1974 * In either case, we return to guest to deliver any pending
1975 * guest interrupts.
1976 *
1977 * -2 A PCI passthrough external interrupt was handled
1978 * (interrupt was delivered directly to guest)
1979 * Return to guest to deliver any pending guest interrupts.
1980 */
1981
1982 cmpdi r3, 1
1983 ble 1f
1984
1985 /* Return code = 2 */
1986 li r12, BOOK3S_INTERRUPT_HV_RM_HARD
1987 stw r12, VCPU_TRAP(r9)
1988 b guest_exit_cont
1989
19901: /* Return code <= 1 */
1991 cmpdi r3, 0
1992 bgt guest_exit_cont
1993
1994 /* Return code <= 0 */
1995maybe_reenter_guest:
1996 ld r5, HSTATE_KVM_VCORE(r13)
1997 lwz r0, VCORE_ENTRY_EXIT(r5)
1998 cmpwi r0, 0x100
1999 mr r4, r9
2000 blt deliver_guest_interrupt
2001 b guest_exit_cont
2002
2102#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2003#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2103/* 2004/*
2104 * Softpatch interrupt for transactional memory emulation cases 2005 * Softpatch interrupt for transactional memory emulation cases
@@ -2302,6 +2203,10 @@ hcall_try_real_mode:
2302 andi. r0,r11,MSR_PR 2203 andi. r0,r11,MSR_PR
2303 /* sc 1 from userspace - reflect to guest syscall */ 2204 /* sc 1 from userspace - reflect to guest syscall */
2304 bne sc_1_fast_return 2205 bne sc_1_fast_return
2206 /* sc 1 from nested guest - give it to L1 to handle */
2207 ld r0, VCPU_NESTED(r9)
2208 cmpdi r0, 0
2209 bne guest_exit_cont
2305 clrrdi r3,r3,2 2210 clrrdi r3,r3,2
2306 cmpldi r3,hcall_real_table_end - hcall_real_table 2211 cmpldi r3,hcall_real_table_end - hcall_real_table
2307 bge guest_exit_cont 2212 bge guest_exit_cont
@@ -2561,6 +2466,7 @@ hcall_real_table:
2561hcall_real_table_end: 2466hcall_real_table_end:
2562 2467
2563_GLOBAL(kvmppc_h_set_xdabr) 2468_GLOBAL(kvmppc_h_set_xdabr)
2469EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
2564 andi. r0, r5, DABRX_USER | DABRX_KERNEL 2470 andi. r0, r5, DABRX_USER | DABRX_KERNEL
2565 beq 6f 2471 beq 6f
2566 li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI 2472 li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
@@ -2570,6 +2476,7 @@ _GLOBAL(kvmppc_h_set_xdabr)
2570 blr 2476 blr
2571 2477
2572_GLOBAL(kvmppc_h_set_dabr) 2478_GLOBAL(kvmppc_h_set_dabr)
2479EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
2573 li r5, DABRX_USER | DABRX_KERNEL 2480 li r5, DABRX_USER | DABRX_KERNEL
25743: 24813:
2575BEGIN_FTR_SECTION 2482BEGIN_FTR_SECTION
@@ -2682,11 +2589,13 @@ BEGIN_FTR_SECTION
2682 b 91f 2589 b 91f
2683END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 2590END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2684 /* 2591 /*
2685 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 2592 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
2686 */ 2593 */
2687 ld r3, HSTATE_KVM_VCPU(r13) 2594 ld r3, HSTATE_KVM_VCPU(r13)
2688 ld r4, VCPU_MSR(r3) 2595 ld r4, VCPU_MSR(r3)
2596 li r5, 0 /* don't preserve non-vol regs */
2689 bl kvmppc_save_tm_hv 2597 bl kvmppc_save_tm_hv
2598 nop
269091: 259991:
2691#endif 2600#endif
2692 2601
@@ -2802,11 +2711,13 @@ BEGIN_FTR_SECTION
2802 b 91f 2711 b 91f
2803END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 2712END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2804 /* 2713 /*
2805 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 2714 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
2806 */ 2715 */
2807 mr r3, r4 2716 mr r3, r4
2808 ld r4, VCPU_MSR(r3) 2717 ld r4, VCPU_MSR(r3)
2718 li r5, 0 /* don't preserve non-vol regs */
2809 bl kvmppc_restore_tm_hv 2719 bl kvmppc_restore_tm_hv
2720 nop
2810 ld r4, HSTATE_KVM_VCPU(r13) 2721 ld r4, HSTATE_KVM_VCPU(r13)
281191: 272291:
2812#endif 2723#endif
@@ -2874,13 +2785,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2874 mr r9, r4 2785 mr r9, r4
2875 cmpdi r3, 0 2786 cmpdi r3, 0
2876 bgt guest_exit_cont 2787 bgt guest_exit_cont
2877 2788 b maybe_reenter_guest
2878 /* see if any other thread is already exiting */
2879 lwz r0,VCORE_ENTRY_EXIT(r5)
2880 cmpwi r0,0x100
2881 bge guest_exit_cont
2882
2883 b kvmppc_cede_reentry /* if not go back to guest */
2884 2789
2885 /* cede when already previously prodded case */ 2790 /* cede when already previously prodded case */
2886kvm_cede_prodded: 2791kvm_cede_prodded:
@@ -2947,12 +2852,12 @@ machine_check_realmode:
2947 */ 2852 */
2948 ld r11, VCPU_MSR(r9) 2853 ld r11, VCPU_MSR(r9)
2949 rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ 2854 rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
2950 bne mc_cont /* if so, exit to host */ 2855 bne guest_exit_cont /* if so, exit to host */
2951 /* Check if guest is capable of handling NMI exit */ 2856 /* Check if guest is capable of handling NMI exit */
2952 ld r10, VCPU_KVM(r9) 2857 ld r10, VCPU_KVM(r9)
2953 lbz r10, KVM_FWNMI(r10) 2858 lbz r10, KVM_FWNMI(r10)
2954 cmpdi r10, 1 /* FWNMI capable? */ 2859 cmpdi r10, 1 /* FWNMI capable? */
2955 beq mc_cont /* if so, exit with KVM_EXIT_NMI. */ 2860 beq guest_exit_cont /* if so, exit with KVM_EXIT_NMI. */
2956 2861
2957 /* if not, fall through for backward compatibility. */ 2862 /* if not, fall through for backward compatibility. */
2958 andi. r10, r11, MSR_RI /* check for unrecoverable exception */ 2863 andi. r10, r11, MSR_RI /* check for unrecoverable exception */
@@ -2966,6 +2871,21 @@ machine_check_realmode:
29662: b fast_interrupt_c_return 28712: b fast_interrupt_c_return
2967 2872
2968/* 2873/*
2874 * Call C code to handle a HMI in real mode.
2875 * Only the primary thread does the call, secondary threads are handled
2876 * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
2877 * r9 points to the vcpu on entry
2878 */
2879hmi_realmode:
2880 lbz r0, HSTATE_PTID(r13)
2881 cmpwi r0, 0
2882 bne guest_exit_cont
2883 bl kvmppc_realmode_hmi_handler
2884 ld r9, HSTATE_KVM_VCPU(r13)
2885 li r12, BOOK3S_INTERRUPT_HMI
2886 b guest_exit_cont
2887
2888/*
2969 * Check the reason we woke from nap, and take appropriate action. 2889 * Check the reason we woke from nap, and take appropriate action.
2970 * Returns (in r3): 2890 * Returns (in r3):
2971 * 0 if nothing needs to be done 2891 * 0 if nothing needs to be done
@@ -3130,10 +3050,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3130 * Save transactional state and TM-related registers. 3050 * Save transactional state and TM-related registers.
3131 * Called with r3 pointing to the vcpu struct and r4 containing 3051 * Called with r3 pointing to the vcpu struct and r4 containing
3132 * the guest MSR value. 3052 * the guest MSR value.
3133 * This can modify all checkpointed registers, but 3053 * r5 is non-zero iff non-volatile register state needs to be maintained.
3054 * If r5 == 0, this can modify all checkpointed registers, but
3134 * restores r1 and r2 before exit. 3055 * restores r1 and r2 before exit.
3135 */ 3056 */
3136kvmppc_save_tm_hv: 3057_GLOBAL_TOC(kvmppc_save_tm_hv)
3058EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
3137 /* See if we need to handle fake suspend mode */ 3059 /* See if we need to handle fake suspend mode */
3138BEGIN_FTR_SECTION 3060BEGIN_FTR_SECTION
3139 b __kvmppc_save_tm 3061 b __kvmppc_save_tm
@@ -3161,12 +3083,6 @@ BEGIN_FTR_SECTION
3161END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) 3083END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
3162 nop 3084 nop
3163 3085
3164 std r1, HSTATE_HOST_R1(r13)
3165
3166 /* Clear the MSR RI since r1, r13 may be foobar. */
3167 li r5, 0
3168 mtmsrd r5, 1
3169
3170 /* We have to treclaim here because that's the only way to do S->N */ 3086 /* We have to treclaim here because that's the only way to do S->N */
3171 li r3, TM_CAUSE_KVM_RESCHED 3087 li r3, TM_CAUSE_KVM_RESCHED
3172 TRECLAIM(R3) 3088 TRECLAIM(R3)
@@ -3175,22 +3091,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
3175 * We were in fake suspend, so we are not going to save the 3091 * We were in fake suspend, so we are not going to save the
3176 * register state as the guest checkpointed state (since 3092 * register state as the guest checkpointed state (since
3177 * we already have it), therefore we can now use any volatile GPR. 3093 * we already have it), therefore we can now use any volatile GPR.
3094 * In fact treclaim in fake suspend state doesn't modify
3095 * any registers.
3178 */ 3096 */
3179 /* Reload PACA pointer, stack pointer and TOC. */
3180 GET_PACA(r13)
3181 ld r1, HSTATE_HOST_R1(r13)
3182 ld r2, PACATOC(r13)
3183 3097
3184 /* Set MSR RI now we have r1 and r13 back. */ 3098BEGIN_FTR_SECTION
3185 li r5, MSR_RI
3186 mtmsrd r5, 1
3187
3188 HMT_MEDIUM
3189 ld r6, HSTATE_DSCR(r13)
3190 mtspr SPRN_DSCR, r6
3191BEGIN_FTR_SECTION_NESTED(96)
3192 bl pnv_power9_force_smt4_release 3099 bl pnv_power9_force_smt4_release
3193END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) 3100END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
3194 nop 3101 nop
3195 3102
31964: 31034:
@@ -3216,10 +3123,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
3216 * Restore transactional state and TM-related registers. 3123 * Restore transactional state and TM-related registers.
3217 * Called with r3 pointing to the vcpu struct 3124 * Called with r3 pointing to the vcpu struct
3218 * and r4 containing the guest MSR value. 3125 * and r4 containing the guest MSR value.
3126 * r5 is non-zero iff non-volatile register state needs to be maintained.
3219 * This potentially modifies all checkpointed registers. 3127 * This potentially modifies all checkpointed registers.
3220 * It restores r1 and r2 from the PACA. 3128 * It restores r1 and r2 from the PACA.
3221 */ 3129 */
3222kvmppc_restore_tm_hv: 3130_GLOBAL_TOC(kvmppc_restore_tm_hv)
3131EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
3223 /* 3132 /*
3224 * If we are doing TM emulation for the guest on a POWER9 DD2, 3133 * If we are doing TM emulation for the guest on a POWER9 DD2,
3225 * then we don't actually do a trechkpt -- we either set up 3134 * then we don't actually do a trechkpt -- we either set up
@@ -3424,6 +3333,194 @@ kvmppc_msr_interrupt:
3424 blr 3333 blr
3425 3334
3426/* 3335/*
3336 * Load up guest PMU state. R3 points to the vcpu struct.
3337 */
3338_GLOBAL(kvmhv_load_guest_pmu)
3339EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
3340 mr r4, r3
3341 mflr r0
3342 li r3, 1
3343 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
3344 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
3345 isync
3346BEGIN_FTR_SECTION
3347 ld r3, VCPU_MMCR(r4)
3348 andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3349 cmpwi r5, MMCR0_PMAO
3350 beql kvmppc_fix_pmao
3351END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3352 lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
3353 lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
3354 lwz r6, VCPU_PMC + 8(r4)
3355 lwz r7, VCPU_PMC + 12(r4)
3356 lwz r8, VCPU_PMC + 16(r4)
3357 lwz r9, VCPU_PMC + 20(r4)
3358 mtspr SPRN_PMC1, r3
3359 mtspr SPRN_PMC2, r5
3360 mtspr SPRN_PMC3, r6
3361 mtspr SPRN_PMC4, r7
3362 mtspr SPRN_PMC5, r8
3363 mtspr SPRN_PMC6, r9
3364 ld r3, VCPU_MMCR(r4)
3365 ld r5, VCPU_MMCR + 8(r4)
3366 ld r6, VCPU_MMCR + 16(r4)
3367 ld r7, VCPU_SIAR(r4)
3368 ld r8, VCPU_SDAR(r4)
3369 mtspr SPRN_MMCR1, r5
3370 mtspr SPRN_MMCRA, r6
3371 mtspr SPRN_SIAR, r7
3372 mtspr SPRN_SDAR, r8
3373BEGIN_FTR_SECTION
3374 ld r5, VCPU_MMCR + 24(r4)
3375 ld r6, VCPU_SIER(r4)
3376 mtspr SPRN_MMCR2, r5
3377 mtspr SPRN_SIER, r6
3378BEGIN_FTR_SECTION_NESTED(96)
3379 lwz r7, VCPU_PMC + 24(r4)
3380 lwz r8, VCPU_PMC + 28(r4)
3381 ld r9, VCPU_MMCR + 32(r4)
3382 mtspr SPRN_SPMC1, r7
3383 mtspr SPRN_SPMC2, r8
3384 mtspr SPRN_MMCRS, r9
3385END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3386END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3387 mtspr SPRN_MMCR0, r3
3388 isync
3389 mtlr r0
3390 blr
3391
3392/*
3393 * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
3394 */
3395_GLOBAL(kvmhv_load_host_pmu)
3396EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
3397 mflr r0
3398 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
3399 cmpwi r4, 0
3400 beq 23f /* skip if not */
3401BEGIN_FTR_SECTION
3402 ld r3, HSTATE_MMCR0(r13)
3403 andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3404 cmpwi r4, MMCR0_PMAO
3405 beql kvmppc_fix_pmao
3406END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3407 lwz r3, HSTATE_PMC1(r13)
3408 lwz r4, HSTATE_PMC2(r13)
3409 lwz r5, HSTATE_PMC3(r13)
3410 lwz r6, HSTATE_PMC4(r13)
3411 lwz r8, HSTATE_PMC5(r13)
3412 lwz r9, HSTATE_PMC6(r13)
3413 mtspr SPRN_PMC1, r3
3414 mtspr SPRN_PMC2, r4
3415 mtspr SPRN_PMC3, r5
3416 mtspr SPRN_PMC4, r6
3417 mtspr SPRN_PMC5, r8
3418 mtspr SPRN_PMC6, r9
3419 ld r3, HSTATE_MMCR0(r13)
3420 ld r4, HSTATE_MMCR1(r13)
3421 ld r5, HSTATE_MMCRA(r13)
3422 ld r6, HSTATE_SIAR(r13)
3423 ld r7, HSTATE_SDAR(r13)
3424 mtspr SPRN_MMCR1, r4
3425 mtspr SPRN_MMCRA, r5
3426 mtspr SPRN_SIAR, r6
3427 mtspr SPRN_SDAR, r7
3428BEGIN_FTR_SECTION
3429 ld r8, HSTATE_MMCR2(r13)
3430 ld r9, HSTATE_SIER(r13)
3431 mtspr SPRN_MMCR2, r8
3432 mtspr SPRN_SIER, r9
3433END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3434 mtspr SPRN_MMCR0, r3
3435 isync
3436 mtlr r0
343723: blr
3438
3439/*
3440 * Save guest PMU state into the vcpu struct.
3441 * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
3442 */
3443_GLOBAL(kvmhv_save_guest_pmu)
3444EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
3445 mr r9, r3
3446 mr r8, r4
3447BEGIN_FTR_SECTION
3448 /*
3449 * POWER8 seems to have a hardware bug where setting
3450 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
3451 * when some counters are already negative doesn't seem
3452 * to cause a performance monitor alert (and hence interrupt).
3453 * The effect of this is that when saving the PMU state,
3454 * if there is no PMU alert pending when we read MMCR0
3455 * before freezing the counters, but one becomes pending
3456 * before we read the counters, we lose it.
3457 * To work around this, we need a way to freeze the counters
3458 * before reading MMCR0. Normally, freezing the counters
3459 * is done by writing MMCR0 (to set MMCR0[FC]) which
3460 * unavoidably writes MMCR0[PMA0] as well. On POWER8,
3461 * we can also freeze the counters using MMCR2, by writing
3462 * 1s to all the counter freeze condition bits (there are
3463 * 9 bits each for 6 counters).
3464 */
3465 li r3, -1 /* set all freeze bits */
3466 clrrdi r3, r3, 10
3467 mfspr r10, SPRN_MMCR2
3468 mtspr SPRN_MMCR2, r3
3469 isync
3470END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3471 li r3, 1
3472 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
3473 mfspr r4, SPRN_MMCR0 /* save MMCR0 */
3474 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
3475 mfspr r6, SPRN_MMCRA
3476 /* Clear MMCRA in order to disable SDAR updates */
3477 li r7, 0
3478 mtspr SPRN_MMCRA, r7
3479 isync
3480 cmpwi r8, 0 /* did they ask for PMU stuff to be saved? */
3481 bne 21f
3482 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
3483 b 22f
348421: mfspr r5, SPRN_MMCR1
3485 mfspr r7, SPRN_SIAR
3486 mfspr r8, SPRN_SDAR
3487 std r4, VCPU_MMCR(r9)
3488 std r5, VCPU_MMCR + 8(r9)
3489 std r6, VCPU_MMCR + 16(r9)
3490BEGIN_FTR_SECTION
3491 std r10, VCPU_MMCR + 24(r9)
3492END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3493 std r7, VCPU_SIAR(r9)
3494 std r8, VCPU_SDAR(r9)
3495 mfspr r3, SPRN_PMC1
3496 mfspr r4, SPRN_PMC2
3497 mfspr r5, SPRN_PMC3
3498 mfspr r6, SPRN_PMC4
3499 mfspr r7, SPRN_PMC5
3500 mfspr r8, SPRN_PMC6
3501 stw r3, VCPU_PMC(r9)
3502 stw r4, VCPU_PMC + 4(r9)
3503 stw r5, VCPU_PMC + 8(r9)
3504 stw r6, VCPU_PMC + 12(r9)
3505 stw r7, VCPU_PMC + 16(r9)
3506 stw r8, VCPU_PMC + 20(r9)
3507BEGIN_FTR_SECTION
3508 mfspr r5, SPRN_SIER
3509 std r5, VCPU_SIER(r9)
3510BEGIN_FTR_SECTION_NESTED(96)
3511 mfspr r6, SPRN_SPMC1
3512 mfspr r7, SPRN_SPMC2
3513 mfspr r8, SPRN_MMCRS
3514 stw r6, VCPU_PMC + 24(r9)
3515 stw r7, VCPU_PMC + 28(r9)
3516 std r8, VCPU_MMCR + 32(r9)
3517 lis r4, 0x8000
3518 mtspr SPRN_MMCRS, r4
3519END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3520END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
352122: blr
3522
3523/*
3427 * This works around a hardware bug on POWER8E processors, where 3524 * This works around a hardware bug on POWER8E processors, where
3428 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a 3525 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
3429 * performance monitor interrupt. Instead, when we need to have 3526 * performance monitor interrupt. Instead, when we need to have
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index 008285058f9b..888e2609e3f1 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
130 return RESUME_GUEST; 130 return RESUME_GUEST;
131 } 131 }
132 /* Set CR0 to indicate previous transactional state */ 132 /* Set CR0 to indicate previous transactional state */
133 vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 133 vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
134 (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); 134 (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
135 /* L=1 => tresume, L=0 => tsuspend */ 135 /* L=1 => tresume, L=0 => tsuspend */
136 if (instr & (1 << 21)) { 136 if (instr & (1 << 21)) {
@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
174 copy_from_checkpoint(vcpu); 174 copy_from_checkpoint(vcpu);
175 175
176 /* Set CR0 to indicate previous transactional state */ 176 /* Set CR0 to indicate previous transactional state */
177 vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 177 vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
178 (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); 178 (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
179 vcpu->arch.shregs.msr &= ~MSR_TS_MASK; 179 vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
180 return RESUME_GUEST; 180 return RESUME_GUEST;
@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
204 copy_to_checkpoint(vcpu); 204 copy_to_checkpoint(vcpu);
205 205
206 /* Set CR0 to indicate previous transactional state */ 206 /* Set CR0 to indicate previous transactional state */
207 vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 207 vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
208 (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); 208 (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
209 vcpu->arch.shregs.msr = msr | MSR_TS_S; 209 vcpu->arch.shregs.msr = msr | MSR_TS_S;
210 return RESUME_GUEST; 210 return RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
index b2c7c6fca4f9..3cf5863bc06e 100644
--- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
89 if (instr & (1 << 21)) 89 if (instr & (1 << 21))
90 vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; 90 vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
91 /* Set CR0 to 0b0010 */ 91 /* Set CR0 to 0b0010 */
92 vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000; 92 vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
93 0x20000000;
93 return 1; 94 return 1;
94 } 95 }
95 96
@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
105 vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ 106 vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
106 vcpu->arch.regs.nip = vcpu->arch.tfhar; 107 vcpu->arch.regs.nip = vcpu->arch.tfhar;
107 copy_from_checkpoint(vcpu); 108 copy_from_checkpoint(vcpu);
108 vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; 109 vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
109} 110}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 614ebb4261f7..4efd65d9e828 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
167 svcpu->gpr[11] = vcpu->arch.regs.gpr[11]; 167 svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
168 svcpu->gpr[12] = vcpu->arch.regs.gpr[12]; 168 svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
169 svcpu->gpr[13] = vcpu->arch.regs.gpr[13]; 169 svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
170 svcpu->cr = vcpu->arch.cr; 170 svcpu->cr = vcpu->arch.regs.ccr;
171 svcpu->xer = vcpu->arch.regs.xer; 171 svcpu->xer = vcpu->arch.regs.xer;
172 svcpu->ctr = vcpu->arch.regs.ctr; 172 svcpu->ctr = vcpu->arch.regs.ctr;
173 svcpu->lr = vcpu->arch.regs.link; 173 svcpu->lr = vcpu->arch.regs.link;
@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
249 vcpu->arch.regs.gpr[11] = svcpu->gpr[11]; 249 vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
250 vcpu->arch.regs.gpr[12] = svcpu->gpr[12]; 250 vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
251 vcpu->arch.regs.gpr[13] = svcpu->gpr[13]; 251 vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
252 vcpu->arch.cr = svcpu->cr; 252 vcpu->arch.regs.ccr = svcpu->cr;
253 vcpu->arch.regs.xer = svcpu->xer; 253 vcpu->arch.regs.xer = svcpu->xer;
254 vcpu->arch.regs.ctr = svcpu->ctr; 254 vcpu->arch.regs.ctr = svcpu->ctr;
255 vcpu->arch.regs.link = svcpu->lr; 255 vcpu->arch.regs.link = svcpu->lr;
@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
1246 r = RESUME_GUEST; 1246 r = RESUME_GUEST;
1247 break; 1247 break;
1248 case BOOK3S_INTERRUPT_EXTERNAL: 1248 case BOOK3S_INTERRUPT_EXTERNAL:
1249 case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
1250 case BOOK3S_INTERRUPT_EXTERNAL_HV: 1249 case BOOK3S_INTERRUPT_EXTERNAL_HV:
1251 case BOOK3S_INTERRUPT_H_VIRT: 1250 case BOOK3S_INTERRUPT_H_VIRT:
1252 vcpu->stat.ext_intr_exits++; 1251 vcpu->stat.ext_intr_exits++;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index b8356cdc0c04..b0b2bfc2ff51 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
310 */ 310 */
311 if (new.out_ee) { 311 if (new.out_ee) {
312 kvmppc_book3s_queue_irqprio(icp->vcpu, 312 kvmppc_book3s_queue_irqprio(icp->vcpu,
313 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 313 BOOK3S_INTERRUPT_EXTERNAL);
314 if (!change_self) 314 if (!change_self)
315 kvmppc_fast_vcpu_kick(icp->vcpu); 315 kvmppc_fast_vcpu_kick(icp->vcpu);
316 } 316 }
@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
593 u32 xirr; 593 u32 xirr;
594 594
595 /* First, remove EE from the processor */ 595 /* First, remove EE from the processor */
596 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 596 kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
597 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
598 597
599 /* 598 /*
600 * ICP State: Accept_Interrupt 599 * ICP State: Accept_Interrupt
@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
754 * We can remove EE from the current processor, the update 753 * We can remove EE from the current processor, the update
755 * transaction will set it again if needed 754 * transaction will set it again if needed
756 */ 755 */
757 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 756 kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
758 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
759 757
760 do { 758 do {
761 old_state = new_state = READ_ONCE(icp->state); 759 old_state = new_state = READ_ONCE(icp->state);
@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
1167 * Deassert the CPU interrupt request. 1165 * Deassert the CPU interrupt request.
1168 * icp_try_update will reassert it if necessary. 1166 * icp_try_update will reassert it if necessary.
1169 */ 1167 */
1170 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 1168 kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
1171 BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
1172 1169
1173 /* 1170 /*
1174 * Note that if we displace an interrupt from old_state.xisr, 1171 * Note that if we displace an interrupt from old_state.xisr,
@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
1393 } 1390 }
1394 1391
1395#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1392#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1396 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 1393 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
1394 cpu_has_feature(CPU_FTR_HVMODE)) {
1397 /* Enable real mode support */ 1395 /* Enable real mode support */
1398 xics->real_mode = ENABLE_REALMODE; 1396 xics->real_mode = ENABLE_REALMODE;
1399 xics->real_mode_dbg = DEBUG_REALMODE; 1397 xics->real_mode_dbg = DEBUG_REALMODE;
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 30c2eb766954..ad4a370703d3 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -62,6 +62,69 @@
62#define XIVE_Q_GAP 2 62#define XIVE_Q_GAP 2
63 63
64/* 64/*
65 * Push a vcpu's context to the XIVE on guest entry.
66 * This assumes we are in virtual mode (MMU on)
67 */
68void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
69{
70 void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
71 u64 pq;
72
73 if (!tima)
74 return;
75 eieio();
76 __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
77 __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
78 vcpu->arch.xive_pushed = 1;
79 eieio();
80
81 /*
82 * We clear the irq_pending flag. There is a small chance of a
83 * race vs. the escalation interrupt happening on another
84 * processor setting it again, but the only consequence is to
85 * cause a spurious wakeup on the next H_CEDE, which is not an
86 * issue.
87 */
88 vcpu->arch.irq_pending = 0;
89
90 /*
91 * In single escalation mode, if the escalation interrupt is
92 * on, we mask it.
93 */
94 if (vcpu->arch.xive_esc_on) {
95 pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
96 XIVE_ESB_SET_PQ_01));
97 mb();
98
99 /*
100 * We have a possible subtle race here: The escalation
101 * interrupt might have fired and be on its way to the
102 * host queue while we mask it, and if we unmask it
103 * early enough (re-cede right away), there is a
104 * theorical possibility that it fires again, thus
105 * landing in the target queue more than once which is
106 * a big no-no.
107 *
108 * Fortunately, solving this is rather easy. If the
109 * above load setting PQ to 01 returns a previous
110 * value where P is set, then we know the escalation
111 * interrupt is somewhere on its way to the host. In
112 * that case we simply don't clear the xive_esc_on
113 * flag below. It will be eventually cleared by the
114 * handler for the escalation interrupt.
115 *
116 * Then, when doing a cede, we check that flag again
117 * before re-enabling the escalation interrupt, and if
118 * set, we abort the cede.
119 */
120 if (!(pq & XIVE_ESB_VAL_P))
121 /* Now P is 0, we can clear the flag */
122 vcpu->arch.xive_esc_on = 0;
123 }
124}
125EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
126
127/*
65 * This is a simple trigger for a generic XIVE IRQ. This must 128 * This is a simple trigger for a generic XIVE IRQ. This must
66 * only be called for interrupts that support a trigger page 129 * only be called for interrupts that support a trigger page
67 */ 130 */
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 4171ede8722b..033363d6e764 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
280 /* First collect pending bits from HW */ 280 /* First collect pending bits from HW */
281 GLUE(X_PFX,ack_pending)(xc); 281 GLUE(X_PFX,ack_pending)(xc);
282 282
283 /*
284 * Cleanup the old-style bits if needed (they may have been
285 * set by pull or an escalation interrupts).
286 */
287 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
288 clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
289 &vcpu->arch.pending_exceptions);
290
291 pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n", 283 pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
292 xc->pending, xc->hw_cppr, xc->cppr); 284 xc->pending, xc->hw_cppr, xc->cppr);
293 285
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 81bd8a07aa51..051af7d97327 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -182,7 +182,7 @@
182 */ 182 */
183 PPC_LL r4, PACACURRENT(r13) 183 PPC_LL r4, PACACURRENT(r13)
184 PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4) 184 PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
185 stw r10, VCPU_CR(r4) 185 PPC_STL r10, VCPU_CR(r4)
186 PPC_STL r11, VCPU_GPR(R4)(r4) 186 PPC_STL r11, VCPU_GPR(R4)(r4)
187 PPC_STL r5, VCPU_GPR(R5)(r4) 187 PPC_STL r5, VCPU_GPR(R5)(r4)
188 PPC_STL r6, VCPU_GPR(R6)(r4) 188 PPC_STL r6, VCPU_GPR(R6)(r4)
@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
292 PPC_STL r4, VCPU_GPR(R4)(r11) 292 PPC_STL r4, VCPU_GPR(R4)(r11)
293 PPC_LL r4, THREAD_NORMSAVE(0)(r10) 293 PPC_LL r4, THREAD_NORMSAVE(0)(r10)
294 PPC_STL r5, VCPU_GPR(R5)(r11) 294 PPC_STL r5, VCPU_GPR(R5)(r11)
295 stw r13, VCPU_CR(r11) 295 PPC_STL r13, VCPU_CR(r11)
296 mfspr r5, \srr0 296 mfspr r5, \srr0
297 PPC_STL r3, VCPU_GPR(R10)(r11) 297 PPC_STL r3, VCPU_GPR(R10)(r11)
298 PPC_LL r3, THREAD_NORMSAVE(2)(r10) 298 PPC_LL r3, THREAD_NORMSAVE(2)(r10)
@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
319 PPC_STL r4, VCPU_GPR(R4)(r11) 319 PPC_STL r4, VCPU_GPR(R4)(r11)
320 PPC_LL r4, GPR9(r8) 320 PPC_LL r4, GPR9(r8)
321 PPC_STL r5, VCPU_GPR(R5)(r11) 321 PPC_STL r5, VCPU_GPR(R5)(r11)
322 stw r9, VCPU_CR(r11) 322 PPC_STL r9, VCPU_CR(r11)
323 mfspr r5, \srr0 323 mfspr r5, \srr0
324 PPC_STL r3, VCPU_GPR(R8)(r11) 324 PPC_STL r3, VCPU_GPR(R8)(r11)
325 PPC_LL r3, GPR10(r8) 325 PPC_LL r3, GPR10(r8)
@@ -643,7 +643,7 @@ lightweight_exit:
643 PPC_LL r3, VCPU_LR(r4) 643 PPC_LL r3, VCPU_LR(r4)
644 PPC_LL r5, VCPU_XER(r4) 644 PPC_LL r5, VCPU_XER(r4)
645 PPC_LL r6, VCPU_CTR(r4) 645 PPC_LL r6, VCPU_CTR(r4)
646 lwz r7, VCPU_CR(r4) 646 PPC_LL r7, VCPU_CR(r4)
647 PPC_LL r8, VCPU_PC(r4) 647 PPC_LL r8, VCPU_PC(r4)
648 PPC_LD(r9, VCPU_SHARED_MSR, r11) 648 PPC_LD(r9, VCPU_SHARED_MSR, r11)
649 PPC_LL r0, VCPU_GPR(R0)(r4) 649 PPC_LL r0, VCPU_GPR(R0)(r4)
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 75dce1ef3bc8..f91b1309a0a8 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
117 117
118 emulated = EMULATE_FAIL; 118 emulated = EMULATE_FAIL;
119 vcpu->arch.regs.msr = vcpu->arch.shared->msr; 119 vcpu->arch.regs.msr = vcpu->arch.shared->msr;
120 vcpu->arch.regs.ccr = vcpu->arch.cr;
121 if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { 120 if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
122 int type = op.type & INSTR_TYPE_MASK; 121 int type = op.type & INSTR_TYPE_MASK;
123 int size = GETSIZE(op.type); 122 int size = GETSIZE(op.type);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index eba5756d5b41..2869a299c4ed 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -594,7 +594,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
594 r = !!(hv_enabled && radix_enabled()); 594 r = !!(hv_enabled && radix_enabled());
595 break; 595 break;
596 case KVM_CAP_PPC_MMU_HASH_V3: 596 case KVM_CAP_PPC_MMU_HASH_V3:
597 r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300)); 597 r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
598 cpu_has_feature(CPU_FTR_HVMODE));
599 break;
600 case KVM_CAP_PPC_NESTED_HV:
601 r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
602 !kvmppc_hv_ops->enable_nested(NULL));
598 break; 603 break;
599#endif 604#endif
600 case KVM_CAP_SYNC_MMU: 605 case KVM_CAP_SYNC_MMU:
@@ -2114,6 +2119,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
2114 r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags); 2119 r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
2115 break; 2120 break;
2116 } 2121 }
2122
2123 case KVM_CAP_PPC_NESTED_HV:
2124 r = -EINVAL;
2125 if (!is_kvmppc_hv_enabled(kvm) ||
2126 !kvm->arch.kvm_ops->enable_nested)
2127 break;
2128 r = kvm->arch.kvm_ops->enable_nested(kvm);
2129 break;
2117#endif 2130#endif
2118 default: 2131 default:
2119 r = -EINVAL; 2132 r = -EINVAL;
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 90e330f21356..0531a1492fdf 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -28,17 +28,25 @@
28 * Save transactional state and TM-related registers. 28 * Save transactional state and TM-related registers.
29 * Called with: 29 * Called with:
30 * - r3 pointing to the vcpu struct 30 * - r3 pointing to the vcpu struct
31 * - r4 points to the MSR with current TS bits: 31 * - r4 containing the MSR with current TS bits:
32 * (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR). 32 * (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
33 * This can modify all checkpointed registers, but 33 * - r5 containing a flag indicating that non-volatile registers
34 * restores r1, r2 before exit. 34 * must be preserved.
35 * If r5 == 0, this can modify all checkpointed registers, but
36 * restores r1, r2 before exit. If r5 != 0, this restores the
37 * MSR TM/FP/VEC/VSX bits to their state on entry.
35 */ 38 */
36_GLOBAL(__kvmppc_save_tm) 39_GLOBAL(__kvmppc_save_tm)
37 mflr r0 40 mflr r0
38 std r0, PPC_LR_STKOFF(r1) 41 std r0, PPC_LR_STKOFF(r1)
42 stdu r1, -SWITCH_FRAME_SIZE(r1)
43
44 mr r9, r3
45 cmpdi cr7, r5, 0
39 46
40 /* Turn on TM. */ 47 /* Turn on TM. */
41 mfmsr r8 48 mfmsr r8
49 mr r10, r8
42 li r0, 1 50 li r0, 1
43 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG 51 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
44 ori r8, r8, MSR_FP 52 ori r8, r8, MSR_FP
@@ -51,6 +59,27 @@ _GLOBAL(__kvmppc_save_tm)
51 std r1, HSTATE_SCRATCH2(r13) 59 std r1, HSTATE_SCRATCH2(r13)
52 std r3, HSTATE_SCRATCH1(r13) 60 std r3, HSTATE_SCRATCH1(r13)
53 61
62 /* Save CR on the stack - even if r5 == 0 we need to get cr7 back. */
63 mfcr r6
64 SAVE_GPR(6, r1)
65
66 /* Save DSCR so we can restore it to avoid running with user value */
67 mfspr r7, SPRN_DSCR
68 SAVE_GPR(7, r1)
69
70 /*
71 * We are going to do treclaim., which will modify all checkpointed
72 * registers. Save the non-volatile registers on the stack if
73 * preservation of non-volatile state has been requested.
74 */
75 beq cr7, 3f
76 SAVE_NVGPRS(r1)
77
78 /* MSR[TS] will be 0 (non-transactional) once we do treclaim. */
79 li r0, 0
80 rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
81 SAVE_GPR(10, r1) /* final MSR value */
823:
54#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 83#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
55BEGIN_FTR_SECTION 84BEGIN_FTR_SECTION
56 /* Emulation of the treclaim instruction needs TEXASR before treclaim */ 85 /* Emulation of the treclaim instruction needs TEXASR before treclaim */
@@ -74,22 +103,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
74 std r9, PACATMSCRATCH(r13) 103 std r9, PACATMSCRATCH(r13)
75 ld r9, HSTATE_SCRATCH1(r13) 104 ld r9, HSTATE_SCRATCH1(r13)
76 105
77 /* Get a few more GPRs free. */ 106 /* Save away PPR soon so we don't run with user value. */
78 std r29, VCPU_GPRS_TM(29)(r9) 107 std r0, VCPU_GPRS_TM(0)(r9)
79 std r30, VCPU_GPRS_TM(30)(r9) 108 mfspr r0, SPRN_PPR
80 std r31, VCPU_GPRS_TM(31)(r9)
81
82 /* Save away PPR and DSCR soon so don't run with user values. */
83 mfspr r31, SPRN_PPR
84 HMT_MEDIUM 109 HMT_MEDIUM
85 mfspr r30, SPRN_DSCR
86#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
87 ld r29, HSTATE_DSCR(r13)
88 mtspr SPRN_DSCR, r29
89#endif
90 110
91 /* Save all but r9, r13 & r29-r31 */ 111 /* Reload stack pointer. */
92 reg = 0 112 std r1, VCPU_GPRS_TM(1)(r9)
113 ld r1, HSTATE_SCRATCH2(r13)
114
115 /* Set MSR RI now we have r1 and r13 back. */
116 std r2, VCPU_GPRS_TM(2)(r9)
117 li r2, MSR_RI
118 mtmsrd r2, 1
119
120 /* Reload TOC pointer. */
121 ld r2, PACATOC(r13)
122
123 /* Save all but r0-r2, r9 & r13 */
124 reg = 3
93 .rept 29 125 .rept 29
94 .if (reg != 9) && (reg != 13) 126 .if (reg != 9) && (reg != 13)
95 std reg, VCPU_GPRS_TM(reg)(r9) 127 std reg, VCPU_GPRS_TM(reg)(r9)
@@ -103,33 +135,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
103 ld r4, PACATMSCRATCH(r13) 135 ld r4, PACATMSCRATCH(r13)
104 std r4, VCPU_GPRS_TM(9)(r9) 136 std r4, VCPU_GPRS_TM(9)(r9)
105 137
106 /* Reload stack pointer and TOC. */ 138 /* Restore host DSCR and CR values, after saving guest values */
107 ld r1, HSTATE_SCRATCH2(r13) 139 mfcr r6
108 ld r2, PACATOC(r13) 140 mfspr r7, SPRN_DSCR
109 141 stw r6, VCPU_CR_TM(r9)
110 /* Set MSR RI now we have r1 and r13 back. */ 142 std r7, VCPU_DSCR_TM(r9)
111 li r5, MSR_RI 143 REST_GPR(6, r1)
112 mtmsrd r5, 1 144 REST_GPR(7, r1)
145 mtcr r6
146 mtspr SPRN_DSCR, r7
113 147
114 /* Save away checkpinted SPRs. */ 148 /* Save away checkpointed SPRs. */
115 std r31, VCPU_PPR_TM(r9) 149 std r0, VCPU_PPR_TM(r9)
116 std r30, VCPU_DSCR_TM(r9)
117 mflr r5 150 mflr r5
118 mfcr r6
119 mfctr r7 151 mfctr r7
120 mfspr r8, SPRN_AMR 152 mfspr r8, SPRN_AMR
121 mfspr r10, SPRN_TAR 153 mfspr r10, SPRN_TAR
122 mfxer r11 154 mfxer r11
123 std r5, VCPU_LR_TM(r9) 155 std r5, VCPU_LR_TM(r9)
124 stw r6, VCPU_CR_TM(r9)
125 std r7, VCPU_CTR_TM(r9) 156 std r7, VCPU_CTR_TM(r9)
126 std r8, VCPU_AMR_TM(r9) 157 std r8, VCPU_AMR_TM(r9)
127 std r10, VCPU_TAR_TM(r9) 158 std r10, VCPU_TAR_TM(r9)
128 std r11, VCPU_XER_TM(r9) 159 std r11, VCPU_XER_TM(r9)
129 160
130 /* Restore r12 as trap number. */
131 lwz r12, VCPU_TRAP(r9)
132
133 /* Save FP/VSX. */ 161 /* Save FP/VSX. */
134 addi r3, r9, VCPU_FPRS_TM 162 addi r3, r9, VCPU_FPRS_TM
135 bl store_fp_state 163 bl store_fp_state
@@ -137,6 +165,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
137 bl store_vr_state 165 bl store_vr_state
138 mfspr r6, SPRN_VRSAVE 166 mfspr r6, SPRN_VRSAVE
139 stw r6, VCPU_VRSAVE_TM(r9) 167 stw r6, VCPU_VRSAVE_TM(r9)
168
169 /* Restore non-volatile registers if requested to */
170 beq cr7, 1f
171 REST_NVGPRS(r1)
172 REST_GPR(10, r1)
1401: 1731:
141 /* 174 /*
142 * We need to save these SPRs after the treclaim so that the software 175 * We need to save these SPRs after the treclaim so that the software
@@ -146,12 +179,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
146 */ 179 */
147 mfspr r7, SPRN_TEXASR 180 mfspr r7, SPRN_TEXASR
148 std r7, VCPU_TEXASR(r9) 181 std r7, VCPU_TEXASR(r9)
14911:
150 mfspr r5, SPRN_TFHAR 182 mfspr r5, SPRN_TFHAR
151 mfspr r6, SPRN_TFIAR 183 mfspr r6, SPRN_TFIAR
152 std r5, VCPU_TFHAR(r9) 184 std r5, VCPU_TFHAR(r9)
153 std r6, VCPU_TFIAR(r9) 185 std r6, VCPU_TFIAR(r9)
154 186
187 /* Restore MSR state if requested */
188 beq cr7, 2f
189 mtmsrd r10, 0
1902:
191 addi r1, r1, SWITCH_FRAME_SIZE
155 ld r0, PPC_LR_STKOFF(r1) 192 ld r0, PPC_LR_STKOFF(r1)
156 mtlr r0 193 mtlr r0
157 blr 194 blr
@@ -161,49 +198,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
161 * be invoked from C function by PR KVM only. 198 * be invoked from C function by PR KVM only.
162 */ 199 */
163_GLOBAL(_kvmppc_save_tm_pr) 200_GLOBAL(_kvmppc_save_tm_pr)
164 mflr r5 201 mflr r0
165 std r5, PPC_LR_STKOFF(r1) 202 std r0, PPC_LR_STKOFF(r1)
166 stdu r1, -SWITCH_FRAME_SIZE(r1) 203 stdu r1, -PPC_MIN_STKFRM(r1)
167 SAVE_NVGPRS(r1)
168
169 /* save MSR since TM/math bits might be impacted
170 * by __kvmppc_save_tm().
171 */
172 mfmsr r5
173 SAVE_GPR(5, r1)
174
175 /* also save DSCR/CR/TAR so that it can be recovered later */
176 mfspr r6, SPRN_DSCR
177 SAVE_GPR(6, r1)
178
179 mfcr r7
180 stw r7, _CCR(r1)
181 204
182 mfspr r8, SPRN_TAR 205 mfspr r8, SPRN_TAR
183 SAVE_GPR(8, r1) 206 std r8, PPC_MIN_STKFRM-8(r1)
184 207
208 li r5, 1 /* preserve non-volatile registers */
185 bl __kvmppc_save_tm 209 bl __kvmppc_save_tm
186 210
187 REST_GPR(8, r1) 211 ld r8, PPC_MIN_STKFRM-8(r1)
188 mtspr SPRN_TAR, r8 212 mtspr SPRN_TAR, r8
189 213
190 ld r7, _CCR(r1) 214 addi r1, r1, PPC_MIN_STKFRM
191 mtcr r7 215 ld r0, PPC_LR_STKOFF(r1)
192 216 mtlr r0
193 REST_GPR(6, r1)
194 mtspr SPRN_DSCR, r6
195
196 /* need preserve current MSR's MSR_TS bits */
197 REST_GPR(5, r1)
198 mfmsr r6
199 rldicl r6, r6, 64 - MSR_TS_S_LG, 62
200 rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
201 mtmsrd r5
202
203 REST_NVGPRS(r1)
204 addi r1, r1, SWITCH_FRAME_SIZE
205 ld r5, PPC_LR_STKOFF(r1)
206 mtlr r5
207 blr 217 blr
208 218
209EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr); 219EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
@@ -215,15 +225,21 @@ EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
215 * - r4 is the guest MSR with desired TS bits: 225 * - r4 is the guest MSR with desired TS bits:
216 * For HV KVM, it is VCPU_MSR 226 * For HV KVM, it is VCPU_MSR
217 * For PR KVM, it is provided by caller 227 * For PR KVM, it is provided by caller
218 * This potentially modifies all checkpointed registers. 228 * - r5 containing a flag indicating that non-volatile registers
219 * It restores r1, r2 from the PACA. 229 * must be preserved.
230 * If r5 == 0, this potentially modifies all checkpointed registers, but
231 * restores r1, r2 from the PACA before exit.
232 * If r5 != 0, this restores the MSR TM/FP/VEC/VSX bits to their state on entry.
220 */ 233 */
221_GLOBAL(__kvmppc_restore_tm) 234_GLOBAL(__kvmppc_restore_tm)
222 mflr r0 235 mflr r0
223 std r0, PPC_LR_STKOFF(r1) 236 std r0, PPC_LR_STKOFF(r1)
224 237
238 cmpdi cr7, r5, 0
239
225 /* Turn on TM/FP/VSX/VMX so we can restore them. */ 240 /* Turn on TM/FP/VSX/VMX so we can restore them. */
226 mfmsr r5 241 mfmsr r5
242 mr r10, r5
227 li r6, MSR_TM >> 32 243 li r6, MSR_TM >> 32
228 sldi r6, r6, 32 244 sldi r6, r6, 32
229 or r5, r5, r6 245 or r5, r5, r6
@@ -244,8 +260,7 @@ _GLOBAL(__kvmppc_restore_tm)
244 260
245 mr r5, r4 261 mr r5, r4
246 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 262 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
247 beqlr /* TM not active in guest */ 263 beq 9f /* TM not active in guest */
248 std r1, HSTATE_SCRATCH2(r13)
249 264
250 /* Make sure the failure summary is set, otherwise we'll program check 265 /* Make sure the failure summary is set, otherwise we'll program check
251 * when we trechkpt. It's possible that this might have been not set 266 * when we trechkpt. It's possible that this might have been not set
@@ -256,6 +271,26 @@ _GLOBAL(__kvmppc_restore_tm)
256 mtspr SPRN_TEXASR, r7 271 mtspr SPRN_TEXASR, r7
257 272
258 /* 273 /*
274 * Make a stack frame and save non-volatile registers if requested.
275 */
276 stdu r1, -SWITCH_FRAME_SIZE(r1)
277 std r1, HSTATE_SCRATCH2(r13)
278
279 mfcr r6
280 mfspr r7, SPRN_DSCR
281 SAVE_GPR(2, r1)
282 SAVE_GPR(6, r1)
283 SAVE_GPR(7, r1)
284
285 beq cr7, 4f
286 SAVE_NVGPRS(r1)
287
288 /* MSR[TS] will be 1 (suspended) once we do trechkpt */
289 li r0, 1
290 rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
291 SAVE_GPR(10, r1) /* final MSR value */
2924:
293 /*
259 * We need to load up the checkpointed state for the guest. 294 * We need to load up the checkpointed state for the guest.
260 * We need to do this early as it will blow away any GPRs, VSRs and 295 * We need to do this early as it will blow away any GPRs, VSRs and
261 * some SPRs. 296 * some SPRs.
@@ -291,8 +326,6 @@ _GLOBAL(__kvmppc_restore_tm)
291 ld r29, VCPU_DSCR_TM(r3) 326 ld r29, VCPU_DSCR_TM(r3)
292 ld r30, VCPU_PPR_TM(r3) 327 ld r30, VCPU_PPR_TM(r3)
293 328
294 std r2, PACATMSCRATCH(r13) /* Save TOC */
295
296 /* Clear the MSR RI since r1, r13 are all going to be foobar. */ 329 /* Clear the MSR RI since r1, r13 are all going to be foobar. */
297 li r5, 0 330 li r5, 0
298 mtmsrd r5, 1 331 mtmsrd r5, 1
@@ -318,18 +351,31 @@ _GLOBAL(__kvmppc_restore_tm)
318 /* Now let's get back the state we need. */ 351 /* Now let's get back the state we need. */
319 HMT_MEDIUM 352 HMT_MEDIUM
320 GET_PACA(r13) 353 GET_PACA(r13)
321#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
322 ld r29, HSTATE_DSCR(r13)
323 mtspr SPRN_DSCR, r29
324#endif
325 ld r1, HSTATE_SCRATCH2(r13) 354 ld r1, HSTATE_SCRATCH2(r13)
326 ld r2, PACATMSCRATCH(r13) 355 REST_GPR(7, r1)
356 mtspr SPRN_DSCR, r7
327 357
328 /* Set the MSR RI since we have our registers back. */ 358 /* Set the MSR RI since we have our registers back. */
329 li r5, MSR_RI 359 li r5, MSR_RI
330 mtmsrd r5, 1 360 mtmsrd r5, 1
361
362 /* Restore TOC pointer and CR */
363 REST_GPR(2, r1)
364 REST_GPR(6, r1)
365 mtcr r6
366
367 /* Restore non-volatile registers if requested to. */
368 beq cr7, 5f
369 REST_GPR(10, r1)
370 REST_NVGPRS(r1)
371
3725: addi r1, r1, SWITCH_FRAME_SIZE
331 ld r0, PPC_LR_STKOFF(r1) 373 ld r0, PPC_LR_STKOFF(r1)
332 mtlr r0 374 mtlr r0
375
3769: /* Restore MSR bits if requested */
377 beqlr cr7
378 mtmsrd r10, 0
333 blr 379 blr
334 380
335/* 381/*
@@ -337,47 +383,23 @@ _GLOBAL(__kvmppc_restore_tm)
337 * can be invoked from C function by PR KVM only. 383 * can be invoked from C function by PR KVM only.
338 */ 384 */
339_GLOBAL(_kvmppc_restore_tm_pr) 385_GLOBAL(_kvmppc_restore_tm_pr)
340 mflr r5 386 mflr r0
341 std r5, PPC_LR_STKOFF(r1) 387 std r0, PPC_LR_STKOFF(r1)
342 stdu r1, -SWITCH_FRAME_SIZE(r1) 388 stdu r1, -PPC_MIN_STKFRM(r1)
343 SAVE_NVGPRS(r1)
344
345 /* save MSR to avoid TM/math bits change */
346 mfmsr r5
347 SAVE_GPR(5, r1)
348
349 /* also save DSCR/CR/TAR so that it can be recovered later */
350 mfspr r6, SPRN_DSCR
351 SAVE_GPR(6, r1)
352
353 mfcr r7
354 stw r7, _CCR(r1)
355 389
390 /* save TAR so that it can be recovered later */
356 mfspr r8, SPRN_TAR 391 mfspr r8, SPRN_TAR
357 SAVE_GPR(8, r1) 392 std r8, PPC_MIN_STKFRM-8(r1)
358 393
394 li r5, 1
359 bl __kvmppc_restore_tm 395 bl __kvmppc_restore_tm
360 396
361 REST_GPR(8, r1) 397 ld r8, PPC_MIN_STKFRM-8(r1)
362 mtspr SPRN_TAR, r8 398 mtspr SPRN_TAR, r8
363 399
364 ld r7, _CCR(r1) 400 addi r1, r1, PPC_MIN_STKFRM
365 mtcr r7 401 ld r0, PPC_LR_STKOFF(r1)
366 402 mtlr r0
367 REST_GPR(6, r1)
368 mtspr SPRN_DSCR, r6
369
370 /* need preserve current MSR's MSR_TS bits */
371 REST_GPR(5, r1)
372 mfmsr r6
373 rldicl r6, r6, 64 - MSR_TS_S_LG, 62
374 rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
375 mtmsrd r5
376
377 REST_NVGPRS(r1)
378 addi r1, r1, SWITCH_FRAME_SIZE
379 ld r5, PPC_LR_STKOFF(r1)
380 mtlr r5
381 blr 403 blr
382 404
383EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr); 405EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
index f3b23759e017..372a82fa2de3 100644
--- a/arch/powerpc/kvm/trace_book3s.h
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -14,7 +14,6 @@
14 {0x400, "INST_STORAGE"}, \ 14 {0x400, "INST_STORAGE"}, \
15 {0x480, "INST_SEGMENT"}, \ 15 {0x480, "INST_SEGMENT"}, \
16 {0x500, "EXTERNAL"}, \ 16 {0x500, "EXTERNAL"}, \
17 {0x501, "EXTERNAL_LEVEL"}, \
18 {0x502, "EXTERNAL_HV"}, \ 17 {0x502, "EXTERNAL_HV"}, \
19 {0x600, "ALIGNMENT"}, \ 18 {0x600, "ALIGNMENT"}, \
20 {0x700, "PROGRAM"}, \ 19 {0x700, "PROGRAM"}, \
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 670286808928..3bf9fc6fd36c 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -3,8 +3,6 @@
3# Makefile for ppc-specific library files.. 3# Makefile for ppc-specific library files..
4# 4#
5 5
6subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
7
8ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 6ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
9 7
10CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) 8CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
@@ -14,6 +12,8 @@ obj-y += string.o alloc.o code-patching.o feature-fixups.o
14 12
15obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o 13obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o
16 14
15obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
16
17# See corresponding test in arch/powerpc/Makefile 17# See corresponding test in arch/powerpc/Makefile
18# 64-bit linker creates .sfpr on demand for final link (vmlinux), 18# 64-bit linker creates .sfpr on demand for final link (vmlinux),
19# so it is only needed for modules, and only for older linkers which 19# so it is only needed for modules, and only for older linkers which
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 06796dec01ea..dedf88a76f58 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -2,7 +2,7 @@
2#include <linux/types.h> 2#include <linux/types.h>
3#include <linux/init.h> 3#include <linux/init.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/bootmem.h> 5#include <linux/memblock.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <asm/setup.h> 7#include <asm/setup.h>
8 8
@@ -14,7 +14,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
14 if (slab_is_available()) 14 if (slab_is_available())
15 p = kzalloc(size, mask); 15 p = kzalloc(size, mask);
16 else { 16 else {
17 p = memblock_virt_alloc(size, 0); 17 p = memblock_alloc(size, SMP_CACHE_BYTES);
18 } 18 }
19 return p; 19 return p;
20} 20}
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 5ffee298745f..89502cbccb1b 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -98,8 +98,7 @@ static int map_patch_area(void *addr, unsigned long text_poke_addr)
98 else 98 else
99 pfn = __pa_symbol(addr) >> PAGE_SHIFT; 99 pfn = __pa_symbol(addr) >> PAGE_SHIFT;
100 100
101 err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), 101 err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
102 pgprot_val(PAGE_KERNEL));
103 102
104 pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); 103 pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
105 if (err) 104 if (err)
diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c
new file mode 100644
index 000000000000..407b992fb02f
--- /dev/null
+++ b/arch/powerpc/lib/error-inject.c
@@ -0,0 +1,16 @@
1// SPDX-License-Identifier: GPL-2.0+
2
3#include <linux/error-injection.h>
4#include <linux/kprobes.h>
5#include <linux/uaccess.h>
6
7void override_function_with_return(struct pt_regs *regs)
8{
9 /*
10 * Emulate 'blr'. 'regs' represents the state on entry of a predefined
11 * function in the kernel/module, captured on a kprobe. We don't need
12 * to worry about 32-bit userspace on a 64-bit kernel.
13 */
14 regs->nip = regs->link;
15}
16NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index ec531de99996..3c3be02f33b7 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -40,7 +40,7 @@ _GLOBAL(memset)
40.Lms: PPC_MTOCRF(1,r0) 40.Lms: PPC_MTOCRF(1,r0)
41 mr r6,r3 41 mr r6,r3
42 blt cr1,8f 42 blt cr1,8f
43 beq+ 3f /* if already 8-byte aligned */ 43 beq 3f /* if already 8-byte aligned */
44 subf r5,r0,r5 44 subf r5,r0,r5
45 bf 31,1f 45 bf 31,1f
46 stb r4,0(r6) 46 stb r4,0(r6)
@@ -85,7 +85,7 @@ _GLOBAL(memset)
85 addi r6,r6,8 85 addi r6,r6,8
868: cmpwi r5,0 868: cmpwi r5,0
87 PPC_MTOCRF(1,r5) 87 PPC_MTOCRF(1,r5)
88 beqlr+ 88 beqlr
89 bf 29,9f 89 bf 29,9f
90 stw r4,0(r6) 90 stw r4,0(r6)
91 addi r6,r6,4 91 addi r6,r6,4
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index cf77d755246d..36484a2ef915 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -67,7 +67,7 @@ void __init MMU_init_hw(void)
67 /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ 67 /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
68#ifdef CONFIG_PIN_TLB_DATA 68#ifdef CONFIG_PIN_TLB_DATA
69 unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; 69 unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
70 unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY; 70 unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
71#ifdef CONFIG_PIN_TLB_IMMR 71#ifdef CONFIG_PIN_TLB_IMMR
72 int i = 29; 72 int i = 29;
73#else 73#else
@@ -91,11 +91,10 @@ static void __init mmu_mapin_immr(void)
91{ 91{
92 unsigned long p = PHYS_IMMR_BASE; 92 unsigned long p = PHYS_IMMR_BASE;
93 unsigned long v = VIRT_IMMR_BASE; 93 unsigned long v = VIRT_IMMR_BASE;
94 unsigned long f = pgprot_val(PAGE_KERNEL_NCG);
95 int offset; 94 int offset;
96 95
97 for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) 96 for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
98 map_kernel_page(v + offset, p + offset, f); 97 map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
99} 98}
100 99
101/* Address of instructions to patch */ 100/* Address of instructions to patch */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index cdf6a9960046..ca96e7be4d0e 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -3,10 +3,10 @@
3# Makefile for the linux ppc-specific parts of the memory manager. 3# Makefile for the linux ppc-specific parts of the memory manager.
4# 4#
5 5
6subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
7
8ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 6ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
9 7
8CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
9
10obj-y := fault.o mem.o pgtable.o mmap.o \ 10obj-y := fault.o mem.o pgtable.o mmap.o \
11 init_$(BITS).o pgtable_$(BITS).o \ 11 init_$(BITS).o pgtable_$(BITS).o \
12 init-common.o mmu_context.o drmem.o 12 init-common.o mmu_context.o drmem.o
@@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
15obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o 15obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
16hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o 16hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
17obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o 17obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
18obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o 18obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
19obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o 19obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
20obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o 20obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
21obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o 21obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
@@ -43,5 +43,12 @@ obj-$(CONFIG_HIGHMEM) += highmem.o
43obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o 43obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
44obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o 44obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
45obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o 45obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
46ifdef CONFIG_PPC_PTDUMP
47obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o
48obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o
49obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o
50obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o
51obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o
52endif
46obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o 53obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
47obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o 54obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 382528475433..b6e7b5952ab5 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -228,7 +228,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
228 do { 228 do {
229 SetPageReserved(page); 229 SetPageReserved(page);
230 map_kernel_page(vaddr, page_to_phys(page), 230 map_kernel_page(vaddr, page_to_phys(page),
231 pgprot_val(pgprot_noncached(PAGE_KERNEL))); 231 pgprot_noncached(PAGE_KERNEL));
232 page++; 232 page++;
233 vaddr += PAGE_SIZE; 233 vaddr += PAGE_SIZE;
234 } while (size -= PAGE_SIZE); 234 } while (size -= PAGE_SIZE);
diff --git a/arch/powerpc/mm/dump_linuxpagetables-8xx.c b/arch/powerpc/mm/dump_linuxpagetables-8xx.c
new file mode 100644
index 000000000000..ab9e3f24db2f
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-8xx.c
@@ -0,0 +1,82 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * From split of dump_linuxpagetables.c
4 * Copyright 2016, Rashmica Gupta, IBM Corp.
5 *
6 */
7#include <linux/kernel.h>
8#include <asm/pgtable.h>
9
10#include "dump_linuxpagetables.h"
11
12static const struct flag_info flag_array[] = {
13 {
14 .mask = _PAGE_SH,
15 .val = 0,
16 .set = "user",
17 .clear = " ",
18 }, {
19 .mask = _PAGE_RO | _PAGE_NA,
20 .val = 0,
21 .set = "rw",
22 }, {
23 .mask = _PAGE_RO | _PAGE_NA,
24 .val = _PAGE_RO,
25 .set = "r ",
26 }, {
27 .mask = _PAGE_RO | _PAGE_NA,
28 .val = _PAGE_NA,
29 .set = " ",
30 }, {
31 .mask = _PAGE_EXEC,
32 .val = _PAGE_EXEC,
33 .set = " X ",
34 .clear = " ",
35 }, {
36 .mask = _PAGE_PRESENT,
37 .val = _PAGE_PRESENT,
38 .set = "present",
39 .clear = " ",
40 }, {
41 .mask = _PAGE_GUARDED,
42 .val = _PAGE_GUARDED,
43 .set = "guarded",
44 .clear = " ",
45 }, {
46 .mask = _PAGE_DIRTY,
47 .val = _PAGE_DIRTY,
48 .set = "dirty",
49 .clear = " ",
50 }, {
51 .mask = _PAGE_ACCESSED,
52 .val = _PAGE_ACCESSED,
53 .set = "accessed",
54 .clear = " ",
55 }, {
56 .mask = _PAGE_NO_CACHE,
57 .val = _PAGE_NO_CACHE,
58 .set = "no cache",
59 .clear = " ",
60 }, {
61 .mask = _PAGE_SPECIAL,
62 .val = _PAGE_SPECIAL,
63 .set = "special",
64 }
65};
66
67struct pgtable_level pg_level[5] = {
68 {
69 }, { /* pgd */
70 .flag = flag_array,
71 .num = ARRAY_SIZE(flag_array),
72 }, { /* pud */
73 .flag = flag_array,
74 .num = ARRAY_SIZE(flag_array),
75 }, { /* pmd */
76 .flag = flag_array,
77 .num = ARRAY_SIZE(flag_array),
78 }, { /* pte */
79 .flag = flag_array,
80 .num = ARRAY_SIZE(flag_array),
81 },
82};
diff --git a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c
new file mode 100644
index 000000000000..ed6fcf78256e
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c
@@ -0,0 +1,120 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * From split of dump_linuxpagetables.c
4 * Copyright 2016, Rashmica Gupta, IBM Corp.
5 *
6 */
7#include <linux/kernel.h>
8#include <asm/pgtable.h>
9
10#include "dump_linuxpagetables.h"
11
12static const struct flag_info flag_array[] = {
13 {
14 .mask = _PAGE_PRIVILEGED,
15 .val = 0,
16 .set = "user",
17 .clear = " ",
18 }, {
19 .mask = _PAGE_READ,
20 .val = _PAGE_READ,
21 .set = "r",
22 .clear = " ",
23 }, {
24 .mask = _PAGE_WRITE,
25 .val = _PAGE_WRITE,
26 .set = "w",
27 .clear = " ",
28 }, {
29 .mask = _PAGE_EXEC,
30 .val = _PAGE_EXEC,
31 .set = " X ",
32 .clear = " ",
33 }, {
34 .mask = _PAGE_PTE,
35 .val = _PAGE_PTE,
36 .set = "pte",
37 .clear = " ",
38 }, {
39 .mask = _PAGE_PRESENT,
40 .val = _PAGE_PRESENT,
41 .set = "valid",
42 .clear = " ",
43 }, {
44 .mask = _PAGE_PRESENT | _PAGE_INVALID,
45 .val = 0,
46 .set = " ",
47 .clear = "present",
48 }, {
49 .mask = H_PAGE_HASHPTE,
50 .val = H_PAGE_HASHPTE,
51 .set = "hpte",
52 .clear = " ",
53 }, {
54 .mask = _PAGE_DIRTY,
55 .val = _PAGE_DIRTY,
56 .set = "dirty",
57 .clear = " ",
58 }, {
59 .mask = _PAGE_ACCESSED,
60 .val = _PAGE_ACCESSED,
61 .set = "accessed",
62 .clear = " ",
63 }, {
64 .mask = _PAGE_NON_IDEMPOTENT,
65 .val = _PAGE_NON_IDEMPOTENT,
66 .set = "non-idempotent",
67 .clear = " ",
68 }, {
69 .mask = _PAGE_TOLERANT,
70 .val = _PAGE_TOLERANT,
71 .set = "tolerant",
72 .clear = " ",
73 }, {
74 .mask = H_PAGE_BUSY,
75 .val = H_PAGE_BUSY,
76 .set = "busy",
77 }, {
78#ifdef CONFIG_PPC_64K_PAGES
79 .mask = H_PAGE_COMBO,
80 .val = H_PAGE_COMBO,
81 .set = "combo",
82 }, {
83 .mask = H_PAGE_4K_PFN,
84 .val = H_PAGE_4K_PFN,
85 .set = "4K_pfn",
86 }, {
87#else /* CONFIG_PPC_64K_PAGES */
88 .mask = H_PAGE_F_GIX,
89 .val = H_PAGE_F_GIX,
90 .set = "f_gix",
91 .is_val = true,
92 .shift = H_PAGE_F_GIX_SHIFT,
93 }, {
94 .mask = H_PAGE_F_SECOND,
95 .val = H_PAGE_F_SECOND,
96 .set = "f_second",
97 }, {
98#endif /* CONFIG_PPC_64K_PAGES */
99 .mask = _PAGE_SPECIAL,
100 .val = _PAGE_SPECIAL,
101 .set = "special",
102 }
103};
104
105struct pgtable_level pg_level[5] = {
106 {
107 }, { /* pgd */
108 .flag = flag_array,
109 .num = ARRAY_SIZE(flag_array),
110 }, { /* pud */
111 .flag = flag_array,
112 .num = ARRAY_SIZE(flag_array),
113 }, { /* pmd */
114 .flag = flag_array,
115 .num = ARRAY_SIZE(flag_array),
116 }, { /* pte */
117 .flag = flag_array,
118 .num = ARRAY_SIZE(flag_array),
119 },
120};
diff --git a/arch/powerpc/mm/dump_linuxpagetables-generic.c b/arch/powerpc/mm/dump_linuxpagetables-generic.c
new file mode 100644
index 000000000000..1e3829ec1348
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-generic.c
@@ -0,0 +1,82 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * From split of dump_linuxpagetables.c
4 * Copyright 2016, Rashmica Gupta, IBM Corp.
5 *
6 */
7#include <linux/kernel.h>
8#include <asm/pgtable.h>
9
10#include "dump_linuxpagetables.h"
11
12static const struct flag_info flag_array[] = {
13 {
14 .mask = _PAGE_USER,
15 .val = _PAGE_USER,
16 .set = "user",
17 .clear = " ",
18 }, {
19 .mask = _PAGE_RW,
20 .val = _PAGE_RW,
21 .set = "rw",
22 .clear = "r ",
23 }, {
24#ifndef CONFIG_PPC_BOOK3S_32
25 .mask = _PAGE_EXEC,
26 .val = _PAGE_EXEC,
27 .set = " X ",
28 .clear = " ",
29 }, {
30#endif
31 .mask = _PAGE_PRESENT,
32 .val = _PAGE_PRESENT,
33 .set = "present",
34 .clear = " ",
35 }, {
36 .mask = _PAGE_GUARDED,
37 .val = _PAGE_GUARDED,
38 .set = "guarded",
39 .clear = " ",
40 }, {
41 .mask = _PAGE_DIRTY,
42 .val = _PAGE_DIRTY,
43 .set = "dirty",
44 .clear = " ",
45 }, {
46 .mask = _PAGE_ACCESSED,
47 .val = _PAGE_ACCESSED,
48 .set = "accessed",
49 .clear = " ",
50 }, {
51 .mask = _PAGE_WRITETHRU,
52 .val = _PAGE_WRITETHRU,
53 .set = "write through",
54 .clear = " ",
55 }, {
56 .mask = _PAGE_NO_CACHE,
57 .val = _PAGE_NO_CACHE,
58 .set = "no cache",
59 .clear = " ",
60 }, {
61 .mask = _PAGE_SPECIAL,
62 .val = _PAGE_SPECIAL,
63 .set = "special",
64 }
65};
66
67struct pgtable_level pg_level[5] = {
68 {
69 }, { /* pgd */
70 .flag = flag_array,
71 .num = ARRAY_SIZE(flag_array),
72 }, { /* pud */
73 .flag = flag_array,
74 .num = ARRAY_SIZE(flag_array),
75 }, { /* pmd */
76 .flag = flag_array,
77 .num = ARRAY_SIZE(flag_array),
78 }, { /* pte */
79 .flag = flag_array,
80 .num = ARRAY_SIZE(flag_array),
81 },
82};
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index 876e2a3c79f2..2b74f8adf4d0 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -27,6 +27,8 @@
27#include <asm/page.h> 27#include <asm/page.h>
28#include <asm/pgalloc.h> 28#include <asm/pgalloc.h>
29 29
30#include "dump_linuxpagetables.h"
31
30#ifdef CONFIG_PPC32 32#ifdef CONFIG_PPC32
31#define KERN_VIRT_START 0 33#define KERN_VIRT_START 0
32#endif 34#endif
@@ -101,159 +103,6 @@ static struct addr_marker address_markers[] = {
101 { -1, NULL }, 103 { -1, NULL },
102}; 104};
103 105
104struct flag_info {
105 u64 mask;
106 u64 val;
107 const char *set;
108 const char *clear;
109 bool is_val;
110 int shift;
111};
112
113static const struct flag_info flag_array[] = {
114 {
115 .mask = _PAGE_USER | _PAGE_PRIVILEGED,
116 .val = _PAGE_USER,
117 .set = "user",
118 .clear = " ",
119 }, {
120 .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
121 .val = _PAGE_RW,
122 .set = "rw",
123 }, {
124 .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
125 .val = _PAGE_RO,
126 .set = "ro",
127 }, {
128#if _PAGE_NA != 0
129 .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
130 .val = _PAGE_RO,
131 .set = "na",
132 }, {
133#endif
134 .mask = _PAGE_EXEC,
135 .val = _PAGE_EXEC,
136 .set = " X ",
137 .clear = " ",
138 }, {
139 .mask = _PAGE_PTE,
140 .val = _PAGE_PTE,
141 .set = "pte",
142 .clear = " ",
143 }, {
144 .mask = _PAGE_PRESENT,
145 .val = _PAGE_PRESENT,
146 .set = "present",
147 .clear = " ",
148 }, {
149#ifdef CONFIG_PPC_BOOK3S_64
150 .mask = H_PAGE_HASHPTE,
151 .val = H_PAGE_HASHPTE,
152#else
153 .mask = _PAGE_HASHPTE,
154 .val = _PAGE_HASHPTE,
155#endif
156 .set = "hpte",
157 .clear = " ",
158 }, {
159#ifndef CONFIG_PPC_BOOK3S_64
160 .mask = _PAGE_GUARDED,
161 .val = _PAGE_GUARDED,
162 .set = "guarded",
163 .clear = " ",
164 }, {
165#endif
166 .mask = _PAGE_DIRTY,
167 .val = _PAGE_DIRTY,
168 .set = "dirty",
169 .clear = " ",
170 }, {
171 .mask = _PAGE_ACCESSED,
172 .val = _PAGE_ACCESSED,
173 .set = "accessed",
174 .clear = " ",
175 }, {
176#ifndef CONFIG_PPC_BOOK3S_64
177 .mask = _PAGE_WRITETHRU,
178 .val = _PAGE_WRITETHRU,
179 .set = "write through",
180 .clear = " ",
181 }, {
182#endif
183#ifndef CONFIG_PPC_BOOK3S_64
184 .mask = _PAGE_NO_CACHE,
185 .val = _PAGE_NO_CACHE,
186 .set = "no cache",
187 .clear = " ",
188 }, {
189#else
190 .mask = _PAGE_NON_IDEMPOTENT,
191 .val = _PAGE_NON_IDEMPOTENT,
192 .set = "non-idempotent",
193 .clear = " ",
194 }, {
195 .mask = _PAGE_TOLERANT,
196 .val = _PAGE_TOLERANT,
197 .set = "tolerant",
198 .clear = " ",
199 }, {
200#endif
201#ifdef CONFIG_PPC_BOOK3S_64
202 .mask = H_PAGE_BUSY,
203 .val = H_PAGE_BUSY,
204 .set = "busy",
205 }, {
206#ifdef CONFIG_PPC_64K_PAGES
207 .mask = H_PAGE_COMBO,
208 .val = H_PAGE_COMBO,
209 .set = "combo",
210 }, {
211 .mask = H_PAGE_4K_PFN,
212 .val = H_PAGE_4K_PFN,
213 .set = "4K_pfn",
214 }, {
215#else /* CONFIG_PPC_64K_PAGES */
216 .mask = H_PAGE_F_GIX,
217 .val = H_PAGE_F_GIX,
218 .set = "f_gix",
219 .is_val = true,
220 .shift = H_PAGE_F_GIX_SHIFT,
221 }, {
222 .mask = H_PAGE_F_SECOND,
223 .val = H_PAGE_F_SECOND,
224 .set = "f_second",
225 }, {
226#endif /* CONFIG_PPC_64K_PAGES */
227#endif
228 .mask = _PAGE_SPECIAL,
229 .val = _PAGE_SPECIAL,
230 .set = "special",
231 }
232};
233
234struct pgtable_level {
235 const struct flag_info *flag;
236 size_t num;
237 u64 mask;
238};
239
240static struct pgtable_level pg_level[] = {
241 {
242 }, { /* pgd */
243 .flag = flag_array,
244 .num = ARRAY_SIZE(flag_array),
245 }, { /* pud */
246 .flag = flag_array,
247 .num = ARRAY_SIZE(flag_array),
248 }, { /* pmd */
249 .flag = flag_array,
250 .num = ARRAY_SIZE(flag_array),
251 }, { /* pte */
252 .flag = flag_array,
253 .num = ARRAY_SIZE(flag_array),
254 },
255};
256
257static void dump_flag_info(struct pg_state *st, const struct flag_info 106static void dump_flag_info(struct pg_state *st, const struct flag_info
258 *flag, u64 pte, int num) 107 *flag, u64 pte, int num)
259{ 108{
@@ -418,12 +267,13 @@ static void walk_pagetables(struct pg_state *st)
418 unsigned int i; 267 unsigned int i;
419 unsigned long addr; 268 unsigned long addr;
420 269
270 addr = st->start_address;
271
421 /* 272 /*
422 * Traverse the linux pagetable structure and dump pages that are in 273 * Traverse the linux pagetable structure and dump pages that are in
423 * the hash pagetable. 274 * the hash pagetable.
424 */ 275 */
425 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { 276 for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
426 addr = KERN_VIRT_START + i * PGDIR_SIZE;
427 if (!pgd_none(*pgd) && !pgd_huge(*pgd)) 277 if (!pgd_none(*pgd) && !pgd_huge(*pgd))
428 /* pgd exists */ 278 /* pgd exists */
429 walk_pud(st, pgd, addr); 279 walk_pud(st, pgd, addr);
@@ -472,9 +322,14 @@ static int ptdump_show(struct seq_file *m, void *v)
472{ 322{
473 struct pg_state st = { 323 struct pg_state st = {
474 .seq = m, 324 .seq = m,
475 .start_address = KERN_VIRT_START,
476 .marker = address_markers, 325 .marker = address_markers,
477 }; 326 };
327
328 if (radix_enabled())
329 st.start_address = PAGE_OFFSET;
330 else
331 st.start_address = KERN_VIRT_START;
332
478 /* Traverse kernel page tables */ 333 /* Traverse kernel page tables */
479 walk_pagetables(&st); 334 walk_pagetables(&st);
480 note_page(&st, 0, 0, 0); 335 note_page(&st, 0, 0, 0);
diff --git a/arch/powerpc/mm/dump_linuxpagetables.h b/arch/powerpc/mm/dump_linuxpagetables.h
new file mode 100644
index 000000000000..5d513636de73
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables.h
@@ -0,0 +1,19 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/types.h>
3
4struct flag_info {
5 u64 mask;
6 u64 val;
7 const char *set;
8 const char *clear;
9 bool is_val;
10 int shift;
11};
12
13struct pgtable_level {
14 const struct flag_info *flag;
15 size_t num;
16 u64 mask;
17};
18
19extern struct pgtable_level pg_level[5];
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index d51cf5f4e45e..1697e903bbf2 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -103,8 +103,7 @@ static bool store_updates_sp(unsigned int inst)
103 */ 103 */
104 104
105static int 105static int
106__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code, 106__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
107 int pkey)
108{ 107{
109 /* 108 /*
110 * If we are in kernel mode, bail out with a SEGV, this will 109 * If we are in kernel mode, bail out with a SEGV, this will
@@ -114,18 +113,17 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code,
114 if (!user_mode(regs)) 113 if (!user_mode(regs))
115 return SIGSEGV; 114 return SIGSEGV;
116 115
117 _exception_pkey(SIGSEGV, regs, si_code, address, pkey); 116 _exception(SIGSEGV, regs, si_code, address);
118 117
119 return 0; 118 return 0;
120} 119}
121 120
122static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) 121static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
123{ 122{
124 return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0); 123 return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
125} 124}
126 125
127static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code, 126static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
128 int pkey)
129{ 127{
130 struct mm_struct *mm = current->mm; 128 struct mm_struct *mm = current->mm;
131 129
@@ -135,54 +133,61 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
135 */ 133 */
136 up_read(&mm->mmap_sem); 134 up_read(&mm->mmap_sem);
137 135
138 return __bad_area_nosemaphore(regs, address, si_code, pkey); 136 return __bad_area_nosemaphore(regs, address, si_code);
139} 137}
140 138
141static noinline int bad_area(struct pt_regs *regs, unsigned long address) 139static noinline int bad_area(struct pt_regs *regs, unsigned long address)
142{ 140{
143 return __bad_area(regs, address, SEGV_MAPERR, 0); 141 return __bad_area(regs, address, SEGV_MAPERR);
144} 142}
145 143
146static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, 144static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
147 int pkey) 145 int pkey)
148{ 146{
149 return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey); 147 /*
148 * If we are in kernel mode, bail out with a SEGV, this will
149 * be caught by the assembly which will restore the non-volatile
150 * registers before calling bad_page_fault()
151 */
152 if (!user_mode(regs))
153 return SIGSEGV;
154
155 _exception_pkey(regs, address, pkey);
156
157 return 0;
150} 158}
151 159
152static noinline int bad_access(struct pt_regs *regs, unsigned long address) 160static noinline int bad_access(struct pt_regs *regs, unsigned long address)
153{ 161{
154 return __bad_area(regs, address, SEGV_ACCERR, 0); 162 return __bad_area(regs, address, SEGV_ACCERR);
155} 163}
156 164
157static int do_sigbus(struct pt_regs *regs, unsigned long address, 165static int do_sigbus(struct pt_regs *regs, unsigned long address,
158 vm_fault_t fault) 166 vm_fault_t fault)
159{ 167{
160 siginfo_t info;
161 unsigned int lsb = 0;
162
163 if (!user_mode(regs)) 168 if (!user_mode(regs))
164 return SIGBUS; 169 return SIGBUS;
165 170
166 current->thread.trap_nr = BUS_ADRERR; 171 current->thread.trap_nr = BUS_ADRERR;
167 clear_siginfo(&info);
168 info.si_signo = SIGBUS;
169 info.si_errno = 0;
170 info.si_code = BUS_ADRERR;
171 info.si_addr = (void __user *)address;
172#ifdef CONFIG_MEMORY_FAILURE 172#ifdef CONFIG_MEMORY_FAILURE
173 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { 173 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
174 unsigned int lsb = 0; /* shutup gcc */
175
174 pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", 176 pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
175 current->comm, current->pid, address); 177 current->comm, current->pid, address);
176 info.si_code = BUS_MCEERR_AR; 178
179 if (fault & VM_FAULT_HWPOISON_LARGE)
180 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
181 if (fault & VM_FAULT_HWPOISON)
182 lsb = PAGE_SHIFT;
183
184 force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb,
185 current);
186 return 0;
177 } 187 }
178 188
179 if (fault & VM_FAULT_HWPOISON_LARGE)
180 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
181 if (fault & VM_FAULT_HWPOISON)
182 lsb = PAGE_SHIFT;
183#endif 189#endif
184 info.si_addr_lsb = lsb; 190 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
185 force_sig_info(SIGBUS, &info, current);
186 return 0; 191 return 0;
187} 192}
188 193
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 729f02df8290..aaa28fd918fe 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -115,6 +115,8 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
115 tlbiel_hash_set_isa300(0, is, 0, 2, 1); 115 tlbiel_hash_set_isa300(0, is, 0, 2, 1);
116 116
117 asm volatile("ptesync": : :"memory"); 117 asm volatile("ptesync": : :"memory");
118
119 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
118} 120}
119 121
120void hash__tlbiel_all(unsigned int action) 122void hash__tlbiel_all(unsigned int action)
@@ -140,8 +142,6 @@ void hash__tlbiel_all(unsigned int action)
140 tlbiel_all_isa206(POWER7_TLB_SETS, is); 142 tlbiel_all_isa206(POWER7_TLB_SETS, is);
141 else 143 else
142 WARN(1, "%s called on pre-POWER7 CPU\n", __func__); 144 WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
143
144 asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
145} 145}
146 146
147static inline unsigned long ___tlbie(unsigned long vpn, int psize, 147static inline unsigned long ___tlbie(unsigned long vpn, int psize,
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f23a89d8e4ce..0cc7fbc3bd1c 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1001,9 +1001,9 @@ void __init hash__early_init_mmu(void)
1001 * 4k use hugepd format, so for hash set then to 1001 * 4k use hugepd format, so for hash set then to
1002 * zero 1002 * zero
1003 */ 1003 */
1004 __pmd_val_bits = 0; 1004 __pmd_val_bits = HASH_PMD_VAL_BITS;
1005 __pud_val_bits = 0; 1005 __pud_val_bits = HASH_PUD_VAL_BITS;
1006 __pgd_val_bits = 0; 1006 __pgd_val_bits = HASH_PGD_VAL_BITS;
1007 1007
1008 __kernel_virt_start = H_KERN_VIRT_START; 1008 __kernel_virt_start = H_KERN_VIRT_START;
1009 __kernel_virt_size = H_KERN_VIRT_SIZE; 1009 __kernel_virt_size = H_KERN_VIRT_SIZE;
@@ -1125,7 +1125,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
1125 if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { 1125 if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
1126 1126
1127 copy_mm_to_paca(mm); 1127 copy_mm_to_paca(mm);
1128 slb_flush_and_rebolt(); 1128 slb_flush_and_restore_bolted();
1129 } 1129 }
1130} 1130}
1131#endif /* CONFIG_PPC_64K_PAGES */ 1131#endif /* CONFIG_PPC_64K_PAGES */
@@ -1197,7 +1197,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
1197 if (user_region) { 1197 if (user_region) {
1198 if (psize != get_paca_psize(ea)) { 1198 if (psize != get_paca_psize(ea)) {
1199 copy_mm_to_paca(mm); 1199 copy_mm_to_paca(mm);
1200 slb_flush_and_rebolt(); 1200 slb_flush_and_restore_bolted();
1201 } 1201 }
1202 } else if (get_paca()->vmalloc_sllp != 1202 } else if (get_paca()->vmalloc_sllp !=
1203 mmu_psize_defs[mmu_vmalloc_psize].sllp) { 1203 mmu_psize_defs[mmu_vmalloc_psize].sllp) {
@@ -1482,7 +1482,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
1482#endif 1482#endif
1483 1483
1484void hash_preload(struct mm_struct *mm, unsigned long ea, 1484void hash_preload(struct mm_struct *mm, unsigned long ea,
1485 unsigned long access, unsigned long trap) 1485 bool is_exec, unsigned long trap)
1486{ 1486{
1487 int hugepage_shift; 1487 int hugepage_shift;
1488 unsigned long vsid; 1488 unsigned long vsid;
@@ -1490,6 +1490,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
1490 pte_t *ptep; 1490 pte_t *ptep;
1491 unsigned long flags; 1491 unsigned long flags;
1492 int rc, ssize, update_flags = 0; 1492 int rc, ssize, update_flags = 0;
1493 unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
1493 1494
1494 BUG_ON(REGION_ID(ea) != USER_REGION_ID); 1495 BUG_ON(REGION_ID(ea) != USER_REGION_ID);
1495 1496
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 01f213d2bcb9..dfbc3b32f09b 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -51,6 +51,12 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
51 new_pmd |= _PAGE_DIRTY; 51 new_pmd |= _PAGE_DIRTY;
52 } while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd))); 52 } while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd)));
53 53
54 /*
55 * Make sure this is thp or devmap entry
56 */
57 if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)))
58 return 0;
59
54 rflags = htab_convert_pte_flags(new_pmd); 60 rflags = htab_convert_pte_flags(new_pmd);
55 61
56#if 0 62#if 0
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index b320f5097a06..2e6a8f9345d3 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -62,6 +62,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
62 new_pte |= _PAGE_DIRTY; 62 new_pte |= _PAGE_DIRTY;
63 } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); 63 } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
64 64
65 /* Make sure this is a hugetlb entry */
66 if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
67 return 0;
68
65 rflags = htab_convert_pte_flags(new_pte); 69 rflags = htab_convert_pte_flags(new_pte);
66 if (unlikely(mmu_psize == MMU_PAGE_16G)) 70 if (unlikely(mmu_psize == MMU_PAGE_16G))
67 offset = PTRS_PER_PUD; 71 offset = PTRS_PER_PUD;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e87f9ef9115b..8cf035e68378 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -15,10 +15,10 @@
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/of_fdt.h> 16#include <linux/of_fdt.h>
17#include <linux/memblock.h> 17#include <linux/memblock.h>
18#include <linux/bootmem.h>
19#include <linux/moduleparam.h> 18#include <linux/moduleparam.h>
20#include <linux/swap.h> 19#include <linux/swap.h>
21#include <linux/swapops.h> 20#include <linux/swapops.h>
21#include <linux/kmemleak.h>
22#include <asm/pgtable.h> 22#include <asm/pgtable.h>
23#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
24#include <asm/tlb.h> 24#include <asm/tlb.h>
@@ -95,7 +95,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
95 break; 95 break;
96 else { 96 else {
97#ifdef CONFIG_PPC_BOOK3S_64 97#ifdef CONFIG_PPC_BOOK3S_64
98 *hpdp = __hugepd(__pa(new) | 98 *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS |
99 (shift_to_mmu_psize(pshift) << 2)); 99 (shift_to_mmu_psize(pshift) << 2));
100#elif defined(CONFIG_PPC_8xx) 100#elif defined(CONFIG_PPC_8xx)
101 *hpdp = __hugepd(__pa(new) | _PMD_USER | 101 *hpdp = __hugepd(__pa(new) | _PMD_USER |
@@ -112,6 +112,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
112 for (i = i - 1 ; i >= 0; i--, hpdp--) 112 for (i = i - 1 ; i >= 0; i--, hpdp--)
113 *hpdp = __hugepd(0); 113 *hpdp = __hugepd(0);
114 kmem_cache_free(cachep, new); 114 kmem_cache_free(cachep, new);
115 } else {
116 kmemleak_ignore(new);
115 } 117 }
116 spin_unlock(ptl); 118 spin_unlock(ptl);
117 return 0; 119 return 0;
@@ -837,8 +839,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
837 ret_pte = (pte_t *) pmdp; 839 ret_pte = (pte_t *) pmdp;
838 goto out; 840 goto out;
839 } 841 }
840 842 /*
841 if (pmd_huge(pmd)) { 843 * pmd_large check below will handle the swap pmd pte
844 * we need to do both the check because they are config
845 * dependent.
846 */
847 if (pmd_huge(pmd) || pmd_large(pmd)) {
842 ret_pte = (pte_t *) pmdp; 848 ret_pte = (pte_t *) pmdp;
843 goto out; 849 goto out;
844 } else if (is_hugepd(__hugepd(pmd_val(pmd)))) 850 } else if (is_hugepd(__hugepd(pmd_val(pmd))))
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 04ccb274a620..0a64fffabee1 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -27,12 +27,11 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/stddef.h> 28#include <linux/stddef.h>
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/bootmem.h> 30#include <linux/memblock.h>
31#include <linux/highmem.h> 31#include <linux/highmem.h>
32#include <linux/initrd.h> 32#include <linux/initrd.h>
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/suspend.h> 34#include <linux/suspend.h>
35#include <linux/memblock.h>
36#include <linux/hugetlb.h> 35#include <linux/hugetlb.h>
37#include <linux/slab.h> 36#include <linux/slab.h>
38#include <linux/vmalloc.h> 37#include <linux/vmalloc.h>
@@ -309,11 +308,11 @@ void __init paging_init(void)
309 unsigned long end = __fix_to_virt(FIX_HOLE); 308 unsigned long end = __fix_to_virt(FIX_HOLE);
310 309
311 for (; v < end; v += PAGE_SIZE) 310 for (; v < end; v += PAGE_SIZE)
312 map_kernel_page(v, 0, 0); /* XXX gross */ 311 map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
313#endif 312#endif
314 313
315#ifdef CONFIG_HIGHMEM 314#ifdef CONFIG_HIGHMEM
316 map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */ 315 map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */
317 pkmap_page_table = virt_to_kpte(PKMAP_BASE); 316 pkmap_page_table = virt_to_kpte(PKMAP_BASE);
318 317
319 kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); 318 kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
@@ -349,7 +348,7 @@ void __init mem_init(void)
349 348
350 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); 349 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
351 set_max_mapnr(max_pfn); 350 set_max_mapnr(max_pfn);
352 free_all_bootmem(); 351 memblock_free_all();
353 352
354#ifdef CONFIG_HIGHMEM 353#ifdef CONFIG_HIGHMEM
355 { 354 {
@@ -509,7 +508,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
509 * We don't need to worry about _PAGE_PRESENT here because we are 508 * We don't need to worry about _PAGE_PRESENT here because we are
510 * called with either mm->page_table_lock held or ptl lock held 509 * called with either mm->page_table_lock held or ptl lock held
511 */ 510 */
512 unsigned long access, trap; 511 unsigned long trap;
512 bool is_exec;
513 513
514 if (radix_enabled()) { 514 if (radix_enabled()) {
515 prefetch((void *)address); 515 prefetch((void *)address);
@@ -531,16 +531,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
531 trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL; 531 trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
532 switch (trap) { 532 switch (trap) {
533 case 0x300: 533 case 0x300:
534 access = 0UL; 534 is_exec = false;
535 break; 535 break;
536 case 0x400: 536 case 0x400:
537 access = _PAGE_EXEC; 537 is_exec = true;
538 break; 538 break;
539 default: 539 default:
540 return; 540 return;
541 } 541 }
542 542
543 hash_preload(vma->vm_mm, address, access, trap); 543 hash_preload(vma->vm_mm, address, is_exec, trap);
544#endif /* CONFIG_PPC_STD_MMU */ 544#endif /* CONFIG_PPC_STD_MMU */
545#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ 545#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
546 && defined(CONFIG_HUGETLB_PAGE) 546 && defined(CONFIG_HUGETLB_PAGE)
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index dbd8f762140b..510f103d7813 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -53,6 +53,8 @@ int hash__alloc_context_id(void)
53} 53}
54EXPORT_SYMBOL_GPL(hash__alloc_context_id); 54EXPORT_SYMBOL_GPL(hash__alloc_context_id);
55 55
56void slb_setup_new_exec(void);
57
56static int hash__init_new_context(struct mm_struct *mm) 58static int hash__init_new_context(struct mm_struct *mm)
57{ 59{
58 int index; 60 int index;
@@ -84,6 +86,13 @@ static int hash__init_new_context(struct mm_struct *mm)
84 return index; 86 return index;
85} 87}
86 88
89void hash__setup_new_exec(void)
90{
91 slice_setup_new_exec();
92
93 slb_setup_new_exec();
94}
95
87static int radix__init_new_context(struct mm_struct *mm) 96static int radix__init_new_context(struct mm_struct *mm)
88{ 97{
89 unsigned long rts_field; 98 unsigned long rts_field;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4d80239ef83c..2faca46ad720 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -44,7 +44,7 @@
44#include <linux/mm.h> 44#include <linux/mm.h>
45#include <linux/init.h> 45#include <linux/init.h>
46#include <linux/spinlock.h> 46#include <linux/spinlock.h>
47#include <linux/bootmem.h> 47#include <linux/memblock.h>
48#include <linux/notifier.h> 48#include <linux/notifier.h>
49#include <linux/cpu.h> 49#include <linux/cpu.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
@@ -461,10 +461,11 @@ void __init mmu_context_init(void)
461 /* 461 /*
462 * Allocate the maps used by context management 462 * Allocate the maps used by context management
463 */ 463 */
464 context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0); 464 context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
465 context_mm = memblock_virt_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0); 465 context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
466 SMP_CACHE_BYTES);
466#ifdef CONFIG_SMP 467#ifdef CONFIG_SMP
467 stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); 468 stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
468 469
469 cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, 470 cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
470 "powerpc/mmu/ctx:prepare", 471 "powerpc/mmu/ctx:prepare",
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index e5d779eed181..8574fbbc45e0 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -22,6 +22,7 @@
22#include <asm/mmu.h> 22#include <asm/mmu.h>
23 23
24#ifdef CONFIG_PPC_MMU_NOHASH 24#ifdef CONFIG_PPC_MMU_NOHASH
25#include <asm/trace.h>
25 26
26/* 27/*
27 * On 40x and 8xx, we directly inline tlbia and tlbivax 28 * On 40x and 8xx, we directly inline tlbia and tlbivax
@@ -30,10 +31,12 @@
30static inline void _tlbil_all(void) 31static inline void _tlbil_all(void)
31{ 32{
32 asm volatile ("sync; tlbia; isync" : : : "memory"); 33 asm volatile ("sync; tlbia; isync" : : : "memory");
34 trace_tlbia(MMU_NO_CONTEXT);
33} 35}
34static inline void _tlbil_pid(unsigned int pid) 36static inline void _tlbil_pid(unsigned int pid)
35{ 37{
36 asm volatile ("sync; tlbia; isync" : : : "memory"); 38 asm volatile ("sync; tlbia; isync" : : : "memory");
39 trace_tlbia(pid);
37} 40}
38#define _tlbil_pid_noind(pid) _tlbil_pid(pid) 41#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
39 42
@@ -55,6 +58,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
55 unsigned int tsize, unsigned int ind) 58 unsigned int tsize, unsigned int ind)
56{ 59{
57 asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); 60 asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
61 trace_tlbie(0, 0, address, pid, 0, 0, 0);
58} 62}
59#elif defined(CONFIG_PPC_BOOK3E) 63#elif defined(CONFIG_PPC_BOOK3E)
60extern void _tlbil_va(unsigned long address, unsigned int pid, 64extern void _tlbil_va(unsigned long address, unsigned int pid,
@@ -82,7 +86,7 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
82#else /* CONFIG_PPC_MMU_NOHASH */ 86#else /* CONFIG_PPC_MMU_NOHASH */
83 87
84extern void hash_preload(struct mm_struct *mm, unsigned long ea, 88extern void hash_preload(struct mm_struct *mm, unsigned long ea,
85 unsigned long access, unsigned long trap); 89 bool is_exec, unsigned long trap);
86 90
87 91
88extern void _tlbie(unsigned long address); 92extern void _tlbie(unsigned long address);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 055b211b7126..3a048e98a132 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -11,7 +11,7 @@
11#define pr_fmt(fmt) "numa: " fmt 11#define pr_fmt(fmt) "numa: " fmt
12 12
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <linux/bootmem.h> 14#include <linux/memblock.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/mmzone.h> 17#include <linux/mmzone.h>
@@ -19,7 +19,6 @@
19#include <linux/nodemask.h> 19#include <linux/nodemask.h>
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/notifier.h> 21#include <linux/notifier.h>
22#include <linux/memblock.h>
23#include <linux/of.h> 22#include <linux/of.h>
24#include <linux/pfn.h> 23#include <linux/pfn.h>
25#include <linux/cpuset.h> 24#include <linux/cpuset.h>
@@ -788,7 +787,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
788 void *nd; 787 void *nd;
789 int tnid; 788 int tnid;
790 789
791 nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 790 nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
792 nd = __va(nd_pa); 791 nd = __va(nd_pa);
793 792
794 /* report and initialize */ 793 /* report and initialize */
@@ -1521,6 +1520,10 @@ int start_topology_update(void)
1521 } 1520 }
1522 } 1521 }
1523 1522
1523 pr_info("Starting topology update%s%s\n",
1524 (prrn_enabled ? " prrn_enabled" : ""),
1525 (vphn_enabled ? " vphn_enabled" : ""));
1526
1524 return rc; 1527 return rc;
1525} 1528}
1526 1529
@@ -1542,6 +1545,8 @@ int stop_topology_update(void)
1542 rc = del_timer_sync(&topology_timer); 1545 rc = del_timer_sync(&topology_timer);
1543 } 1546 }
1544 1547
1548 pr_info("Stopping topology update\n");
1549
1545 return rc; 1550 return rc;
1546} 1551}
1547 1552
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c
index a2298930f990..e0ccf36714b2 100644
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/pgtable-book3e.c
@@ -42,7 +42,7 @@ int __meminit vmemmap_create_mapping(unsigned long start,
42 * thus must have the low bits clear 42 * thus must have the low bits clear
43 */ 43 */
44 for (i = 0; i < page_size; i += PAGE_SIZE) 44 for (i = 0; i < page_size; i += PAGE_SIZE)
45 BUG_ON(map_kernel_page(start + i, phys, flags)); 45 BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags)));
46 46
47 return 0; 47 return 0;
48} 48}
@@ -70,7 +70,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
70 * map_kernel_page adds an entry to the ioremap page table 70 * map_kernel_page adds an entry to the ioremap page table
71 * and adds an entry to the HPT, possibly bolting it 71 * and adds an entry to the HPT, possibly bolting it
72 */ 72 */
73int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) 73int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
74{ 74{
75 pgd_t *pgdp; 75 pgd_t *pgdp;
76 pud_t *pudp; 76 pud_t *pudp;
@@ -89,8 +89,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
89 ptep = pte_alloc_kernel(pmdp, ea); 89 ptep = pte_alloc_kernel(pmdp, ea);
90 if (!ptep) 90 if (!ptep)
91 return -ENOMEM; 91 return -ENOMEM;
92 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
93 __pgprot(flags)));
94 } else { 92 } else {
95 pgdp = pgd_offset_k(ea); 93 pgdp = pgd_offset_k(ea);
96#ifndef __PAGETABLE_PUD_FOLDED 94#ifndef __PAGETABLE_PUD_FOLDED
@@ -113,9 +111,8 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
113 pmd_populate_kernel(&init_mm, pmdp, ptep); 111 pmd_populate_kernel(&init_mm, pmdp, ptep);
114 } 112 }
115 ptep = pte_offset_kernel(pmdp, ea); 113 ptep = pte_offset_kernel(pmdp, ea);
116 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
117 __pgprot(flags)));
118 } 114 }
115 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
119 116
120 smp_wmb(); 117 smp_wmb();
121 return 0; 118 return 0;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 01d7c0f7c4f0..9f93c9f985c5 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -69,9 +69,14 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
69 pmd_t *pmdp, pmd_t pmd) 69 pmd_t *pmdp, pmd_t pmd)
70{ 70{
71#ifdef CONFIG_DEBUG_VM 71#ifdef CONFIG_DEBUG_VM
72 WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); 72 /*
73 * Make sure hardware valid bit is not set. We don't do
74 * tlb flush for this update.
75 */
76
77 WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
73 assert_spin_locked(pmd_lockptr(mm, pmdp)); 78 assert_spin_locked(pmd_lockptr(mm, pmdp));
74 WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd))); 79 WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd)));
75#endif 80#endif
76 trace_hugepage_set_pmd(addr, pmd_val(pmd)); 81 trace_hugepage_set_pmd(addr, pmd_val(pmd));
77 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); 82 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
@@ -106,7 +111,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
106{ 111{
107 unsigned long old_pmd; 112 unsigned long old_pmd;
108 113
109 old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); 114 old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
110 flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 115 flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
111 /* 116 /*
112 * This ensures that generic code that rely on IRQ disabling 117 * This ensures that generic code that rely on IRQ disabling
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 692bfc9e372c..c08d49046a96 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -142,7 +142,7 @@ void hash__vmemmap_remove_mapping(unsigned long start,
142 * map_kernel_page adds an entry to the ioremap page table 142 * map_kernel_page adds an entry to the ioremap page table
143 * and adds an entry to the HPT, possibly bolting it 143 * and adds an entry to the HPT, possibly bolting it
144 */ 144 */
145int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) 145int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
146{ 146{
147 pgd_t *pgdp; 147 pgd_t *pgdp;
148 pud_t *pudp; 148 pud_t *pudp;
@@ -161,8 +161,7 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flag
161 ptep = pte_alloc_kernel(pmdp, ea); 161 ptep = pte_alloc_kernel(pmdp, ea);
162 if (!ptep) 162 if (!ptep)
163 return -ENOMEM; 163 return -ENOMEM;
164 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 164 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
165 __pgprot(flags)));
166 } else { 165 } else {
167 /* 166 /*
168 * If the mm subsystem is not fully up, we cannot create a 167 * If the mm subsystem is not fully up, we cannot create a
@@ -170,7 +169,7 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flag
170 * entry in the hardware page table. 169 * entry in the hardware page table.
171 * 170 *
172 */ 171 */
173 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, 172 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
174 mmu_io_psize, mmu_kernel_ssize)) { 173 mmu_io_psize, mmu_kernel_ssize)) {
175 printk(KERN_ERR "Failed to do bolted mapping IO " 174 printk(KERN_ERR "Failed to do bolted mapping IO "
176 "memory at %016lx !\n", pa); 175 "memory at %016lx !\n", pa);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c879979faa73..931156069a81 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -241,9 +241,8 @@ void radix__mark_initmem_nx(void)
241} 241}
242#endif /* CONFIG_STRICT_KERNEL_RWX */ 242#endif /* CONFIG_STRICT_KERNEL_RWX */
243 243
244static inline void __meminit print_mapping(unsigned long start, 244static inline void __meminit
245 unsigned long end, 245print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
246 unsigned long size)
247{ 246{
248 char buf[10]; 247 char buf[10];
249 248
@@ -252,7 +251,17 @@ static inline void __meminit print_mapping(unsigned long start,
252 251
253 string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf)); 252 string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
254 253
255 pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf); 254 pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
255 exec ? " (exec)" : "");
256}
257
258static unsigned long next_boundary(unsigned long addr, unsigned long end)
259{
260#ifdef CONFIG_STRICT_KERNEL_RWX
261 if (addr < __pa_symbol(__init_begin))
262 return __pa_symbol(__init_begin);
263#endif
264 return end;
256} 265}
257 266
258static int __meminit create_physical_mapping(unsigned long start, 267static int __meminit create_physical_mapping(unsigned long start,
@@ -260,13 +269,8 @@ static int __meminit create_physical_mapping(unsigned long start,
260 int nid) 269 int nid)
261{ 270{
262 unsigned long vaddr, addr, mapping_size = 0; 271 unsigned long vaddr, addr, mapping_size = 0;
272 bool prev_exec, exec = false;
263 pgprot_t prot; 273 pgprot_t prot;
264 unsigned long max_mapping_size;
265#ifdef CONFIG_STRICT_KERNEL_RWX
266 int split_text_mapping = 1;
267#else
268 int split_text_mapping = 0;
269#endif
270 int psize; 274 int psize;
271 275
272 start = _ALIGN_UP(start, PAGE_SIZE); 276 start = _ALIGN_UP(start, PAGE_SIZE);
@@ -274,14 +278,12 @@ static int __meminit create_physical_mapping(unsigned long start,
274 unsigned long gap, previous_size; 278 unsigned long gap, previous_size;
275 int rc; 279 int rc;
276 280
277 gap = end - addr; 281 gap = next_boundary(addr, end) - addr;
278 previous_size = mapping_size; 282 previous_size = mapping_size;
279 max_mapping_size = PUD_SIZE; 283 prev_exec = exec;
280 284
281retry:
282 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && 285 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
283 mmu_psize_defs[MMU_PAGE_1G].shift && 286 mmu_psize_defs[MMU_PAGE_1G].shift) {
284 PUD_SIZE <= max_mapping_size) {
285 mapping_size = PUD_SIZE; 287 mapping_size = PUD_SIZE;
286 psize = MMU_PAGE_1G; 288 psize = MMU_PAGE_1G;
287 } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && 289 } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
@@ -293,32 +295,21 @@ retry:
293 psize = mmu_virtual_psize; 295 psize = mmu_virtual_psize;
294 } 296 }
295 297
296 if (split_text_mapping && (mapping_size == PUD_SIZE) &&
297 (addr <= __pa_symbol(__init_begin)) &&
298 (addr + mapping_size) >= __pa_symbol(_stext)) {
299 max_mapping_size = PMD_SIZE;
300 goto retry;
301 }
302
303 if (split_text_mapping && (mapping_size == PMD_SIZE) &&
304 (addr <= __pa_symbol(__init_begin)) &&
305 (addr + mapping_size) >= __pa_symbol(_stext)) {
306 mapping_size = PAGE_SIZE;
307 psize = mmu_virtual_psize;
308 }
309
310 if (mapping_size != previous_size) {
311 print_mapping(start, addr, previous_size);
312 start = addr;
313 }
314
315 vaddr = (unsigned long)__va(addr); 298 vaddr = (unsigned long)__va(addr);
316 299
317 if (overlaps_kernel_text(vaddr, vaddr + mapping_size) || 300 if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
318 overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) 301 overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
319 prot = PAGE_KERNEL_X; 302 prot = PAGE_KERNEL_X;
320 else 303 exec = true;
304 } else {
321 prot = PAGE_KERNEL; 305 prot = PAGE_KERNEL;
306 exec = false;
307 }
308
309 if (mapping_size != previous_size || exec != prev_exec) {
310 print_mapping(start, addr, previous_size, prev_exec);
311 start = addr;
312 }
322 313
323 rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); 314 rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
324 if (rc) 315 if (rc)
@@ -327,7 +318,7 @@ retry:
327 update_page_count(psize, 1); 318 update_page_count(psize, 1);
328 } 319 }
329 320
330 print_mapping(start, addr, mapping_size); 321 print_mapping(start, addr, mapping_size, exec);
331 return 0; 322 return 0;
332} 323}
333 324
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index d71c7777669c..010e1c616cb2 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -44,20 +44,13 @@ static inline int is_exec_fault(void)
44static inline int pte_looks_normal(pte_t pte) 44static inline int pte_looks_normal(pte_t pte)
45{ 45{
46 46
47#if defined(CONFIG_PPC_BOOK3S_64) 47 if (pte_present(pte) && !pte_special(pte)) {
48 if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
49 if (pte_ci(pte)) 48 if (pte_ci(pte))
50 return 0; 49 return 0;
51 if (pte_user(pte)) 50 if (pte_user(pte))
52 return 1; 51 return 1;
53 } 52 }
54 return 0; 53 return 0;
55#else
56 return (pte_val(pte) &
57 (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
58 _PAGE_PRIVILEGED)) ==
59 (_PAGE_PRESENT | _PAGE_USER);
60#endif
61} 54}
62 55
63static struct page *maybe_pte_to_page(pte_t pte) 56static struct page *maybe_pte_to_page(pte_t pte)
@@ -73,7 +66,7 @@ static struct page *maybe_pte_to_page(pte_t pte)
73 return page; 66 return page;
74} 67}
75 68
76#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 69#ifdef CONFIG_PPC_BOOK3S
77 70
78/* Server-style MMU handles coherency when hashing if HW exec permission 71/* Server-style MMU handles coherency when hashing if HW exec permission
79 * is supposed per page (currently 64-bit only). If not, then, we always 72 * is supposed per page (currently 64-bit only). If not, then, we always
@@ -106,7 +99,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
106 return pte; 99 return pte;
107} 100}
108 101
109#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */ 102#else /* CONFIG_PPC_BOOK3S */
110 103
111/* Embedded type MMU with HW exec support. This is a bit more complicated 104/* Embedded type MMU with HW exec support. This is a bit more complicated
112 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so 105 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
@@ -117,7 +110,7 @@ static pte_t set_pte_filter(pte_t pte)
117 struct page *pg; 110 struct page *pg;
118 111
119 /* No exec permission in the first place, move on */ 112 /* No exec permission in the first place, move on */
120 if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte)) 113 if (!pte_exec(pte) || !pte_looks_normal(pte))
121 return pte; 114 return pte;
122 115
123 /* If you set _PAGE_EXEC on weird pages you're on your own */ 116 /* If you set _PAGE_EXEC on weird pages you're on your own */
@@ -137,7 +130,7 @@ static pte_t set_pte_filter(pte_t pte)
137 } 130 }
138 131
139 /* Else, we filter out _PAGE_EXEC */ 132 /* Else, we filter out _PAGE_EXEC */
140 return __pte(pte_val(pte) & ~_PAGE_EXEC); 133 return pte_exprotect(pte);
141} 134}
142 135
143static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, 136static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
@@ -150,7 +143,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
150 * if necessary. Also if _PAGE_EXEC is already set, same deal, 143 * if necessary. Also if _PAGE_EXEC is already set, same deal,
151 * we just bail out 144 * we just bail out
152 */ 145 */
153 if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault()) 146 if (dirty || pte_exec(pte) || !is_exec_fault())
154 return pte; 147 return pte;
155 148
156#ifdef CONFIG_DEBUG_VM 149#ifdef CONFIG_DEBUG_VM
@@ -176,10 +169,10 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
176 set_bit(PG_arch_1, &pg->flags); 169 set_bit(PG_arch_1, &pg->flags);
177 170
178 bail: 171 bail:
179 return __pte(pte_val(pte) | _PAGE_EXEC); 172 return pte_mkexec(pte);
180} 173}
181 174
182#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */ 175#endif /* CONFIG_PPC_BOOK3S */
183 176
184/* 177/*
185 * set_pte stores a linux PTE into the linux page table. 178 * set_pte stores a linux PTE into the linux page table.
@@ -188,14 +181,13 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
188 pte_t pte) 181 pte_t pte)
189{ 182{
190 /* 183 /*
191 * When handling numa faults, we already have the pte marked 184 * Make sure hardware valid bit is not set. We don't do
192 * _PAGE_PRESENT, but we can be sure that it is not in hpte. 185 * tlb flush for this update.
193 * Hence we can use set_pte_at for them.
194 */ 186 */
195 VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep)); 187 VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
196 188
197 /* Add the pte bit when trying to set a pte */ 189 /* Add the pte bit when trying to set a pte */
198 pte = __pte(pte_val(pte) | _PAGE_PTE); 190 pte = pte_mkpte(pte);
199 191
200 /* Note: mm->context.id might not yet have been assigned as 192 /* Note: mm->context.id might not yet have been assigned as
201 * this context might not have been activated yet when this 193 * this context might not have been activated yet when this
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 120a49bfb9c6..bda3c6f1bd32 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -50,7 +50,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
50 if (slab_is_available()) { 50 if (slab_is_available()) {
51 pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); 51 pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
52 } else { 52 } else {
53 pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); 53 pte = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
54 if (pte) 54 if (pte)
55 clear_page(pte); 55 clear_page(pte);
56 } 56 }
@@ -76,56 +76,69 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
76void __iomem * 76void __iomem *
77ioremap(phys_addr_t addr, unsigned long size) 77ioremap(phys_addr_t addr, unsigned long size)
78{ 78{
79 return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED, 79 pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
80 __builtin_return_address(0)); 80
81 return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
81} 82}
82EXPORT_SYMBOL(ioremap); 83EXPORT_SYMBOL(ioremap);
83 84
84void __iomem * 85void __iomem *
85ioremap_wc(phys_addr_t addr, unsigned long size) 86ioremap_wc(phys_addr_t addr, unsigned long size)
86{ 87{
87 return __ioremap_caller(addr, size, _PAGE_NO_CACHE, 88 pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
88 __builtin_return_address(0)); 89
90 return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
89} 91}
90EXPORT_SYMBOL(ioremap_wc); 92EXPORT_SYMBOL(ioremap_wc);
91 93
92void __iomem * 94void __iomem *
95ioremap_wt(phys_addr_t addr, unsigned long size)
96{
97 pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
98
99 return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
100}
101EXPORT_SYMBOL(ioremap_wt);
102
103void __iomem *
104ioremap_coherent(phys_addr_t addr, unsigned long size)
105{
106 pgprot_t prot = pgprot_cached(PAGE_KERNEL);
107
108 return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
109}
110EXPORT_SYMBOL(ioremap_coherent);
111
112void __iomem *
93ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) 113ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
94{ 114{
115 pte_t pte = __pte(flags);
116
95 /* writeable implies dirty for kernel addresses */ 117 /* writeable implies dirty for kernel addresses */
96 if ((flags & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO) 118 if (pte_write(pte))
97 flags |= _PAGE_DIRTY | _PAGE_HWWRITE; 119 pte = pte_mkdirty(pte);
98 120
99 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ 121 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
100 flags &= ~(_PAGE_USER | _PAGE_EXEC); 122 pte = pte_exprotect(pte);
101 flags |= _PAGE_PRIVILEGED; 123 pte = pte_mkprivileged(pte);
102 124
103 return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); 125 return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
104} 126}
105EXPORT_SYMBOL(ioremap_prot); 127EXPORT_SYMBOL(ioremap_prot);
106 128
107void __iomem * 129void __iomem *
108__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) 130__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
109{ 131{
110 return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); 132 return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
111} 133}
112 134
113void __iomem * 135void __iomem *
114__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, 136__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
115 void *caller)
116{ 137{
117 unsigned long v, i; 138 unsigned long v, i;
118 phys_addr_t p; 139 phys_addr_t p;
119 int err; 140 int err;
120 141
121 /* Make sure we have the base flags */
122 if ((flags & _PAGE_PRESENT) == 0)
123 flags |= pgprot_val(PAGE_KERNEL);
124
125 /* Non-cacheable page cannot be coherent */
126 if (flags & _PAGE_NO_CACHE)
127 flags &= ~_PAGE_COHERENT;
128
129 /* 142 /*
130 * Choose an address to map it to. 143 * Choose an address to map it to.
131 * Once the vmalloc system is running, we use it. 144 * Once the vmalloc system is running, we use it.
@@ -183,7 +196,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
183 196
184 err = 0; 197 err = 0;
185 for (i = 0; i < size && err == 0; i += PAGE_SIZE) 198 for (i = 0; i < size && err == 0; i += PAGE_SIZE)
186 err = map_kernel_page(v+i, p+i, flags); 199 err = map_kernel_page(v + i, p + i, prot);
187 if (err) { 200 if (err) {
188 if (slab_is_available()) 201 if (slab_is_available())
189 vunmap((void *)v); 202 vunmap((void *)v);
@@ -209,7 +222,7 @@ void iounmap(volatile void __iomem *addr)
209} 222}
210EXPORT_SYMBOL(iounmap); 223EXPORT_SYMBOL(iounmap);
211 224
212int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) 225int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
213{ 226{
214 pmd_t *pd; 227 pmd_t *pd;
215 pte_t *pg; 228 pte_t *pg;
@@ -224,10 +237,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
224 /* The PTE should never be already set nor present in the 237 /* The PTE should never be already set nor present in the
225 * hash table 238 * hash table
226 */ 239 */
227 BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) && 240 BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && pgprot_val(prot));
228 flags); 241 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot));
229 set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
230 __pgprot(flags)));
231 } 242 }
232 smp_wmb(); 243 smp_wmb();
233 return err; 244 return err;
@@ -238,7 +249,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
238 */ 249 */
239static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) 250static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
240{ 251{
241 unsigned long v, s, f; 252 unsigned long v, s;
242 phys_addr_t p; 253 phys_addr_t p;
243 int ktext; 254 int ktext;
244 255
@@ -248,11 +259,10 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
248 for (; s < top; s += PAGE_SIZE) { 259 for (; s < top; s += PAGE_SIZE) {
249 ktext = ((char *)v >= _stext && (char *)v < etext) || 260 ktext = ((char *)v >= _stext && (char *)v < etext) ||
250 ((char *)v >= _sinittext && (char *)v < _einittext); 261 ((char *)v >= _sinittext && (char *)v < _einittext);
251 f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL); 262 map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
252 map_kernel_page(v, p, f);
253#ifdef CONFIG_PPC_STD_MMU_32 263#ifdef CONFIG_PPC_STD_MMU_32
254 if (ktext) 264 if (ktext)
255 hash_preload(&init_mm, v, 0, 0x300); 265 hash_preload(&init_mm, v, false, 0x300);
256#endif 266#endif
257 v += PAGE_SIZE; 267 v += PAGE_SIZE;
258 p += PAGE_SIZE; 268 p += PAGE_SIZE;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 53e9eeecd5d4..fb1375c07e8c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -113,17 +113,12 @@ unsigned long ioremap_bot = IOREMAP_BASE;
113 * __ioremap_at - Low level function to establish the page tables 113 * __ioremap_at - Low level function to establish the page tables
114 * for an IO mapping 114 * for an IO mapping
115 */ 115 */
116void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, 116void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
117 unsigned long flags)
118{ 117{
119 unsigned long i; 118 unsigned long i;
120 119
121 /* Make sure we have the base flags */
122 if ((flags & _PAGE_PRESENT) == 0)
123 flags |= pgprot_val(PAGE_KERNEL);
124
125 /* We don't support the 4K PFN hack with ioremap */ 120 /* We don't support the 4K PFN hack with ioremap */
126 if (flags & H_PAGE_4K_PFN) 121 if (pgprot_val(prot) & H_PAGE_4K_PFN)
127 return NULL; 122 return NULL;
128 123
129 WARN_ON(pa & ~PAGE_MASK); 124 WARN_ON(pa & ~PAGE_MASK);
@@ -131,7 +126,7 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
131 WARN_ON(size & ~PAGE_MASK); 126 WARN_ON(size & ~PAGE_MASK);
132 127
133 for (i = 0; i < size; i += PAGE_SIZE) 128 for (i = 0; i < size; i += PAGE_SIZE)
134 if (map_kernel_page((unsigned long)ea+i, pa+i, flags)) 129 if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
135 return NULL; 130 return NULL;
136 131
137 return (void __iomem *)ea; 132 return (void __iomem *)ea;
@@ -152,7 +147,7 @@ void __iounmap_at(void *ea, unsigned long size)
152} 147}
153 148
154void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, 149void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
155 unsigned long flags, void *caller) 150 pgprot_t prot, void *caller)
156{ 151{
157 phys_addr_t paligned; 152 phys_addr_t paligned;
158 void __iomem *ret; 153 void __iomem *ret;
@@ -182,11 +177,11 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
182 return NULL; 177 return NULL;
183 178
184 area->phys_addr = paligned; 179 area->phys_addr = paligned;
185 ret = __ioremap_at(paligned, area->addr, size, flags); 180 ret = __ioremap_at(paligned, area->addr, size, prot);
186 if (!ret) 181 if (!ret)
187 vunmap(area->addr); 182 vunmap(area->addr);
188 } else { 183 } else {
189 ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags); 184 ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
190 if (ret) 185 if (ret)
191 ioremap_bot += size; 186 ioremap_bot += size;
192 } 187 }
@@ -199,49 +194,59 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
199void __iomem * __ioremap(phys_addr_t addr, unsigned long size, 194void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
200 unsigned long flags) 195 unsigned long flags)
201{ 196{
202 return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); 197 return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
203} 198}
204 199
205void __iomem * ioremap(phys_addr_t addr, unsigned long size) 200void __iomem * ioremap(phys_addr_t addr, unsigned long size)
206{ 201{
207 unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0))); 202 pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
208 void *caller = __builtin_return_address(0); 203 void *caller = __builtin_return_address(0);
209 204
210 if (ppc_md.ioremap) 205 if (ppc_md.ioremap)
211 return ppc_md.ioremap(addr, size, flags, caller); 206 return ppc_md.ioremap(addr, size, prot, caller);
212 return __ioremap_caller(addr, size, flags, caller); 207 return __ioremap_caller(addr, size, prot, caller);
213} 208}
214 209
215void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) 210void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
216{ 211{
217 unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0))); 212 pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
213 void *caller = __builtin_return_address(0);
214
215 if (ppc_md.ioremap)
216 return ppc_md.ioremap(addr, size, prot, caller);
217 return __ioremap_caller(addr, size, prot, caller);
218}
219
220void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
221{
222 pgprot_t prot = pgprot_cached(PAGE_KERNEL);
218 void *caller = __builtin_return_address(0); 223 void *caller = __builtin_return_address(0);
219 224
220 if (ppc_md.ioremap) 225 if (ppc_md.ioremap)
221 return ppc_md.ioremap(addr, size, flags, caller); 226 return ppc_md.ioremap(addr, size, prot, caller);
222 return __ioremap_caller(addr, size, flags, caller); 227 return __ioremap_caller(addr, size, prot, caller);
223} 228}
224 229
225void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, 230void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
226 unsigned long flags) 231 unsigned long flags)
227{ 232{
233 pte_t pte = __pte(flags);
228 void *caller = __builtin_return_address(0); 234 void *caller = __builtin_return_address(0);
229 235
230 /* writeable implies dirty for kernel addresses */ 236 /* writeable implies dirty for kernel addresses */
231 if (flags & _PAGE_WRITE) 237 if (pte_write(pte))
232 flags |= _PAGE_DIRTY; 238 pte = pte_mkdirty(pte);
233 239
234 /* we don't want to let _PAGE_EXEC leak out */ 240 /* we don't want to let _PAGE_EXEC leak out */
235 flags &= ~_PAGE_EXEC; 241 pte = pte_exprotect(pte);
236 /* 242 /*
237 * Force kernel mapping. 243 * Force kernel mapping.
238 */ 244 */
239 flags &= ~_PAGE_USER; 245 pte = pte_mkprivileged(pte);
240 flags |= _PAGE_PRIVILEGED;
241 246
242 if (ppc_md.ioremap) 247 if (ppc_md.ioremap)
243 return ppc_md.ioremap(addr, size, flags, caller); 248 return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller);
244 return __ioremap_caller(addr, size, flags, caller); 249 return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
245} 250}
246 251
247 252
@@ -306,7 +311,7 @@ struct page *pud_page(pud_t pud)
306 */ 311 */
307struct page *pmd_page(pmd_t pmd) 312struct page *pmd_page(pmd_t pmd)
308{ 313{
309 if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) 314 if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
310 return pte_page(pmd_pte(pmd)); 315 return pte_page(pmd_pte(pmd));
311 return virt_to_page(pmd_page_vaddr(pmd)); 316 return virt_to_page(pmd_page_vaddr(pmd));
312} 317}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index bea6c544e38f..f6f575bae3bc 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -163,7 +163,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
163 * Preload a translation in the hash table 163 * Preload a translation in the hash table
164 */ 164 */
165void hash_preload(struct mm_struct *mm, unsigned long ea, 165void hash_preload(struct mm_struct *mm, unsigned long ea,
166 unsigned long access, unsigned long trap) 166 bool is_exec, unsigned long trap)
167{ 167{
168 pmd_t *pmd; 168 pmd_t *pmd;
169 169
@@ -224,7 +224,7 @@ void __init MMU_init_hw(void)
224 * Find some memory for the hash table. 224 * Find some memory for the hash table.
225 */ 225 */
226 if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); 226 if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
227 Hash = __va(memblock_alloc(Hash_size, Hash_size)); 227 Hash = __va(memblock_phys_alloc(Hash_size, Hash_size));
228 memset(Hash, 0, Hash_size); 228 memset(Hash, 0, Hash_size);
229 _SDR1 = __pa(Hash) | SDR1_LOW_BITS; 229 _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
230 230
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 9f574e59d178..c3fdf2969d9f 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,6 +14,7 @@
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 */ 15 */
16 16
17#include <asm/asm-prototypes.h>
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <asm/mmu.h> 19#include <asm/mmu.h>
19#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
@@ -30,11 +31,10 @@
30 31
31enum slb_index { 32enum slb_index {
32 LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ 33 LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */
33 VMALLOC_INDEX = 1, /* Kernel virtual map (0xd000000000000000) */ 34 KSTACK_INDEX = 1, /* Kernel stack map */
34 KSTACK_INDEX = 2, /* Kernel stack map */
35}; 35};
36 36
37extern void slb_allocate(unsigned long ea); 37static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
38 38
39#define slb_esid_mask(ssize) \ 39#define slb_esid_mask(ssize) \
40 (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) 40 (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
@@ -45,13 +45,43 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
45 return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; 45 return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
46} 46}
47 47
48static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, 48static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
49 unsigned long flags) 49 unsigned long flags)
50{ 50{
51 return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags | 51 return (vsid << slb_vsid_shift(ssize)) | flags |
52 ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); 52 ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
53} 53}
54 54
55static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
56 unsigned long flags)
57{
58 return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
59}
60
61static void assert_slb_exists(unsigned long ea)
62{
63#ifdef CONFIG_DEBUG_VM
64 unsigned long tmp;
65
66 WARN_ON_ONCE(mfmsr() & MSR_EE);
67
68 asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
69 WARN_ON(tmp == 0);
70#endif
71}
72
73static void assert_slb_notexists(unsigned long ea)
74{
75#ifdef CONFIG_DEBUG_VM
76 unsigned long tmp;
77
78 WARN_ON_ONCE(mfmsr() & MSR_EE);
79
80 asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
81 WARN_ON(tmp != 0);
82#endif
83}
84
55static inline void slb_shadow_update(unsigned long ea, int ssize, 85static inline void slb_shadow_update(unsigned long ea, int ssize,
56 unsigned long flags, 86 unsigned long flags,
57 enum slb_index index) 87 enum slb_index index)
@@ -84,6 +114,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
84 */ 114 */
85 slb_shadow_update(ea, ssize, flags, index); 115 slb_shadow_update(ea, ssize, flags, index);
86 116
117 assert_slb_notexists(ea);
87 asm volatile("slbmte %0,%1" : 118 asm volatile("slbmte %0,%1" :
88 : "r" (mk_vsid_data(ea, ssize, flags)), 119 : "r" (mk_vsid_data(ea, ssize, flags)),
89 "r" (mk_esid_data(ea, ssize, index)) 120 "r" (mk_esid_data(ea, ssize, index))
@@ -105,17 +136,20 @@ void __slb_restore_bolted_realmode(void)
105 : "r" (be64_to_cpu(p->save_area[index].vsid)), 136 : "r" (be64_to_cpu(p->save_area[index].vsid)),
106 "r" (be64_to_cpu(p->save_area[index].esid))); 137 "r" (be64_to_cpu(p->save_area[index].esid)));
107 } 138 }
139
140 assert_slb_exists(local_paca->kstack);
108} 141}
109 142
110/* 143/*
111 * Insert the bolted entries into an empty SLB. 144 * Insert the bolted entries into an empty SLB.
112 * This is not the same as rebolt because the bolted segments are not
113 * changed, just loaded from the shadow area.
114 */ 145 */
115void slb_restore_bolted_realmode(void) 146void slb_restore_bolted_realmode(void)
116{ 147{
117 __slb_restore_bolted_realmode(); 148 __slb_restore_bolted_realmode();
118 get_paca()->slb_cache_ptr = 0; 149 get_paca()->slb_cache_ptr = 0;
150
151 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
152 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
119} 153}
120 154
121/* 155/*
@@ -123,113 +157,262 @@ void slb_restore_bolted_realmode(void)
123 */ 157 */
124void slb_flush_all_realmode(void) 158void slb_flush_all_realmode(void)
125{ 159{
126 /*
127 * This flushes all SLB entries including 0, so it must be realmode.
128 */
129 asm volatile("slbmte %0,%0; slbia" : : "r" (0)); 160 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
130} 161}
131 162
132static void __slb_flush_and_rebolt(void) 163/*
164 * This flushes non-bolted entries, it can be run in virtual mode. Must
165 * be called with interrupts disabled.
166 */
167void slb_flush_and_restore_bolted(void)
133{ 168{
134 /* If you change this make sure you change SLB_NUM_BOLTED 169 struct slb_shadow *p = get_slb_shadow();
135 * and PR KVM appropriately too. */
136 unsigned long linear_llp, vmalloc_llp, lflags, vflags;
137 unsigned long ksp_esid_data, ksp_vsid_data;
138 170
139 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; 171 BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
140 vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
141 lflags = SLB_VSID_KERNEL | linear_llp;
142 vflags = SLB_VSID_KERNEL | vmalloc_llp;
143 172
144 ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, KSTACK_INDEX); 173 WARN_ON(!irqs_disabled());
145 if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) { 174
146 ksp_esid_data &= ~SLB_ESID_V; 175 /*
147 ksp_vsid_data = 0; 176 * We can't take a PMU exception in the following code, so hard
148 slb_shadow_clear(KSTACK_INDEX); 177 * disable interrupts.
149 } else { 178 */
150 /* Update stack entry; others don't change */ 179 hard_irq_disable();
151 slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, KSTACK_INDEX);
152 ksp_vsid_data =
153 be64_to_cpu(get_slb_shadow()->save_area[KSTACK_INDEX].vsid);
154 }
155 180
156 /* We need to do this all in asm, so we're sure we don't touch
157 * the stack between the slbia and rebolting it. */
158 asm volatile("isync\n" 181 asm volatile("isync\n"
159 "slbia\n" 182 "slbia\n"
160 /* Slot 1 - first VMALLOC segment */ 183 "slbmte %0, %1\n"
161 "slbmte %0,%1\n" 184 "isync\n"
162 /* Slot 2 - kernel stack */ 185 :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
163 "slbmte %2,%3\n" 186 "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
164 "isync"
165 :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)),
166 "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, VMALLOC_INDEX)),
167 "r"(ksp_vsid_data),
168 "r"(ksp_esid_data)
169 : "memory"); 187 : "memory");
188 assert_slb_exists(get_paca()->kstack);
189
190 get_paca()->slb_cache_ptr = 0;
191
192 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
193 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
170} 194}
171 195
172void slb_flush_and_rebolt(void) 196void slb_save_contents(struct slb_entry *slb_ptr)
173{ 197{
198 int i;
199 unsigned long e, v;
174 200
175 WARN_ON(!irqs_disabled()); 201 /* Save slb_cache_ptr value. */
202 get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
203
204 if (!slb_ptr)
205 return;
206
207 for (i = 0; i < mmu_slb_size; i++) {
208 asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i));
209 asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i));
210 slb_ptr->esid = e;
211 slb_ptr->vsid = v;
212 slb_ptr++;
213 }
214}
215
216void slb_dump_contents(struct slb_entry *slb_ptr)
217{
218 int i, n;
219 unsigned long e, v;
220 unsigned long llp;
221
222 if (!slb_ptr)
223 return;
224
225 pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
226 pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr);
227
228 for (i = 0; i < mmu_slb_size; i++) {
229 e = slb_ptr->esid;
230 v = slb_ptr->vsid;
231 slb_ptr++;
232
233 if (!e && !v)
234 continue;
235
236 pr_err("%02d %016lx %016lx\n", i, e, v);
237
238 if (!(e & SLB_ESID_V)) {
239 pr_err("\n");
240 continue;
241 }
242 llp = v & SLB_VSID_LLP;
243 if (v & SLB_VSID_B_1T) {
244 pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
245 GET_ESID_1T(e),
246 (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
247 } else {
248 pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
249 GET_ESID(e),
250 (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
251 }
252 }
253 pr_err("----------------------------------\n");
254
255 /* Dump slb cache entires as well. */
256 pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
257 pr_err("Valid SLB cache entries:\n");
258 n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
259 for (i = 0; i < n; i++)
260 pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
261 pr_err("Rest of SLB cache entries:\n");
262 for (i = n; i < SLB_CACHE_ENTRIES; i++)
263 pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
264}
176 265
266void slb_vmalloc_update(void)
267{
177 /* 268 /*
178 * We can't take a PMU exception in the following code, so hard 269 * vmalloc is not bolted, so just have to flush non-bolted.
179 * disable interrupts.
180 */ 270 */
181 hard_irq_disable(); 271 slb_flush_and_restore_bolted();
272}
182 273
183 __slb_flush_and_rebolt(); 274static bool preload_hit(struct thread_info *ti, unsigned long esid)
184 get_paca()->slb_cache_ptr = 0; 275{
276 unsigned char i;
277
278 for (i = 0; i < ti->slb_preload_nr; i++) {
279 unsigned char idx;
280
281 idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
282 if (esid == ti->slb_preload_esid[idx])
283 return true;
284 }
285 return false;
185} 286}
186 287
187void slb_vmalloc_update(void) 288static bool preload_add(struct thread_info *ti, unsigned long ea)
188{ 289{
189 unsigned long vflags; 290 unsigned char idx;
291 unsigned long esid;
292
293 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
294 /* EAs are stored >> 28 so 256MB segments don't need clearing */
295 if (ea & ESID_MASK_1T)
296 ea &= ESID_MASK_1T;
297 }
298
299 esid = ea >> SID_SHIFT;
190 300
191 vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp; 301 if (preload_hit(ti, esid))
192 slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX); 302 return false;
193 slb_flush_and_rebolt(); 303
304 idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
305 ti->slb_preload_esid[idx] = esid;
306 if (ti->slb_preload_nr == SLB_PRELOAD_NR)
307 ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
308 else
309 ti->slb_preload_nr++;
310
311 return true;
194} 312}
195 313
196/* Helper function to compare esids. There are four cases to handle. 314static void preload_age(struct thread_info *ti)
197 * 1. The system is not 1T segment size capable. Use the GET_ESID compare.
198 * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
199 * 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match.
200 * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
201 */
202static inline int esids_match(unsigned long addr1, unsigned long addr2)
203{ 315{
204 int esid_1t_count; 316 if (!ti->slb_preload_nr)
317 return;
318 ti->slb_preload_nr--;
319 ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
320}
205 321
206 /* System is not 1T segment size capable. */ 322void slb_setup_new_exec(void)
207 if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) 323{
208 return (GET_ESID(addr1) == GET_ESID(addr2)); 324 struct thread_info *ti = current_thread_info();
325 struct mm_struct *mm = current->mm;
326 unsigned long exec = 0x10000000;
209 327
210 esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + 328 WARN_ON(irqs_disabled());
211 ((addr2 >> SID_SHIFT_1T) != 0));
212 329
213 /* both addresses are < 1T */ 330 /*
214 if (esid_1t_count == 0) 331 * preload cache can only be used to determine whether a SLB
215 return (GET_ESID(addr1) == GET_ESID(addr2)); 332 * entry exists if it does not start to overflow.
333 */
334 if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
335 return;
216 336
217 /* One address < 1T, the other > 1T. Not a match */ 337 hard_irq_disable();
218 if (esid_1t_count == 1)
219 return 0;
220 338
221 /* Both addresses are > 1T. */ 339 /*
222 return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2)); 340 * We have no good place to clear the slb preload cache on exec,
341 * flush_thread is about the earliest arch hook but that happens
342 * after we switch to the mm and have aleady preloaded the SLBEs.
343 *
344 * For the most part that's probably okay to use entries from the
345 * previous exec, they will age out if unused. It may turn out to
346 * be an advantage to clear the cache before switching to it,
347 * however.
348 */
349
350 /*
351 * preload some userspace segments into the SLB.
352 * Almost all 32 and 64bit PowerPC executables are linked at
353 * 0x10000000 so it makes sense to preload this segment.
354 */
355 if (!is_kernel_addr(exec)) {
356 if (preload_add(ti, exec))
357 slb_allocate_user(mm, exec);
358 }
359
360 /* Libraries and mmaps. */
361 if (!is_kernel_addr(mm->mmap_base)) {
362 if (preload_add(ti, mm->mmap_base))
363 slb_allocate_user(mm, mm->mmap_base);
364 }
365
366 /* see switch_slb */
367 asm volatile("isync" : : : "memory");
368
369 local_irq_enable();
223} 370}
224 371
372void preload_new_slb_context(unsigned long start, unsigned long sp)
373{
374 struct thread_info *ti = current_thread_info();
375 struct mm_struct *mm = current->mm;
376 unsigned long heap = mm->start_brk;
377
378 WARN_ON(irqs_disabled());
379
380 /* see above */
381 if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
382 return;
383
384 hard_irq_disable();
385
386 /* Userspace entry address. */
387 if (!is_kernel_addr(start)) {
388 if (preload_add(ti, start))
389 slb_allocate_user(mm, start);
390 }
391
392 /* Top of stack, grows down. */
393 if (!is_kernel_addr(sp)) {
394 if (preload_add(ti, sp))
395 slb_allocate_user(mm, sp);
396 }
397
398 /* Bottom of heap, grows up. */
399 if (heap && !is_kernel_addr(heap)) {
400 if (preload_add(ti, heap))
401 slb_allocate_user(mm, heap);
402 }
403
404 /* see switch_slb */
405 asm volatile("isync" : : : "memory");
406
407 local_irq_enable();
408}
409
410
225/* Flush all user entries from the segment table of the current processor. */ 411/* Flush all user entries from the segment table of the current processor. */
226void switch_slb(struct task_struct *tsk, struct mm_struct *mm) 412void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
227{ 413{
228 unsigned long offset; 414 struct thread_info *ti = task_thread_info(tsk);
229 unsigned long slbie_data = 0; 415 unsigned char i;
230 unsigned long pc = KSTK_EIP(tsk);
231 unsigned long stack = KSTK_ESP(tsk);
232 unsigned long exec_base;
233 416
234 /* 417 /*
235 * We need interrupts hard-disabled here, not just soft-disabled, 418 * We need interrupts hard-disabled here, not just soft-disabled,
@@ -238,91 +421,107 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
238 * which would update the slb_cache/slb_cache_ptr fields in the PACA. 421 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
239 */ 422 */
240 hard_irq_disable(); 423 hard_irq_disable();
241 offset = get_paca()->slb_cache_ptr; 424 asm volatile("isync" : : : "memory");
242 if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && 425 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
243 offset <= SLB_CACHE_ENTRIES) { 426 /*
244 int i; 427 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
245 asm volatile("isync" : : : "memory"); 428 * associated lookaside structures, which matches what
246 for (i = 0; i < offset; i++) { 429 * switch_slb wants. So ARCH_300 does not use the slb
247 slbie_data = (unsigned long)get_paca()->slb_cache[i] 430 * cache.
248 << SID_SHIFT; /* EA */ 431 */
249 slbie_data |= user_segment_size(slbie_data) 432 asm volatile(PPC_SLBIA(3));
250 << SLBIE_SSIZE_SHIFT;
251 slbie_data |= SLBIE_C; /* C set for user addresses */
252 asm volatile("slbie %0" : : "r" (slbie_data));
253 }
254 asm volatile("isync" : : : "memory");
255 } else { 433 } else {
256 __slb_flush_and_rebolt(); 434 unsigned long offset = get_paca()->slb_cache_ptr;
257 } 435
436 if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
437 offset <= SLB_CACHE_ENTRIES) {
438 unsigned long slbie_data = 0;
439
440 for (i = 0; i < offset; i++) {
441 unsigned long ea;
442
443 ea = (unsigned long)
444 get_paca()->slb_cache[i] << SID_SHIFT;
445 /*
446 * Could assert_slb_exists here, but hypervisor
447 * or machine check could have come in and
448 * removed the entry at this point.
449 */
450
451 slbie_data = ea;
452 slbie_data |= user_segment_size(slbie_data)
453 << SLBIE_SSIZE_SHIFT;
454 slbie_data |= SLBIE_C; /* user slbs have C=1 */
455 asm volatile("slbie %0" : : "r" (slbie_data));
456 }
457
458 /* Workaround POWER5 < DD2.1 issue */
459 if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
460 asm volatile("slbie %0" : : "r" (slbie_data));
461
462 } else {
463 struct slb_shadow *p = get_slb_shadow();
464 unsigned long ksp_esid_data =
465 be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
466 unsigned long ksp_vsid_data =
467 be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
468
469 asm volatile(PPC_SLBIA(1) "\n"
470 "slbmte %0,%1\n"
471 "isync"
472 :: "r"(ksp_vsid_data),
473 "r"(ksp_esid_data));
474
475 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
476 }
258 477
259 /* Workaround POWER5 < DD2.1 issue */ 478 get_paca()->slb_cache_ptr = 0;
260 if (offset == 1 || offset > SLB_CACHE_ENTRIES) 479 }
261 asm volatile("slbie %0" : : "r" (slbie_data)); 480 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
262 481
263 get_paca()->slb_cache_ptr = 0;
264 copy_mm_to_paca(mm); 482 copy_mm_to_paca(mm);
265 483
266 /* 484 /*
267 * preload some userspace segments into the SLB. 485 * We gradually age out SLBs after a number of context switches to
268 * Almost all 32 and 64bit PowerPC executables are linked at 486 * reduce reload overhead of unused entries (like we do with FP/VEC
269 * 0x10000000 so it makes sense to preload this segment. 487 * reload). Each time we wrap 256 switches, take an entry out of the
488 * SLB preload cache.
270 */ 489 */
271 exec_base = 0x10000000; 490 tsk->thread.load_slb++;
272 491 if (!tsk->thread.load_slb) {
273 if (is_kernel_addr(pc) || is_kernel_addr(stack) || 492 unsigned long pc = KSTK_EIP(tsk);
274 is_kernel_addr(exec_base))
275 return;
276 493
277 slb_allocate(pc); 494 preload_age(ti);
495 preload_add(ti, pc);
496 }
278 497
279 if (!esids_match(pc, stack)) 498 for (i = 0; i < ti->slb_preload_nr; i++) {
280 slb_allocate(stack); 499 unsigned char idx;
500 unsigned long ea;
281 501
282 if (!esids_match(pc, exec_base) && 502 idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
283 !esids_match(stack, exec_base)) 503 ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
284 slb_allocate(exec_base);
285}
286 504
287static inline void patch_slb_encoding(unsigned int *insn_addr, 505 slb_allocate_user(mm, ea);
288 unsigned int immed) 506 }
289{
290 507
291 /* 508 /*
292 * This function patches either an li or a cmpldi instruction with 509 * Synchronize slbmte preloads with possible subsequent user memory
293 * a new immediate value. This relies on the fact that both li 510 * address accesses by the kernel (user mode won't happen until
294 * (which is actually addi) and cmpldi both take a 16-bit immediate 511 * rfid, which is safe).
295 * value, and it is situated in the same location in the instruction,
296 * ie. bits 16-31 (Big endian bit order) or the lower 16 bits.
297 * The signedness of the immediate operand differs between the two
298 * instructions however this code is only ever patching a small value,
299 * much less than 1 << 15, so we can get away with it.
300 * To patch the value we read the existing instruction, clear the
301 * immediate value, and or in our new value, then write the instruction
302 * back.
303 */ 512 */
304 unsigned int insn = (*insn_addr & 0xffff0000) | immed; 513 asm volatile("isync" : : : "memory");
305 patch_instruction(insn_addr, insn);
306} 514}
307 515
308extern u32 slb_miss_kernel_load_linear[];
309extern u32 slb_miss_kernel_load_io[];
310extern u32 slb_compare_rr_to_size[];
311extern u32 slb_miss_kernel_load_vmemmap[];
312
313void slb_set_size(u16 size) 516void slb_set_size(u16 size)
314{ 517{
315 if (mmu_slb_size == size)
316 return;
317
318 mmu_slb_size = size; 518 mmu_slb_size = size;
319 patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size);
320} 519}
321 520
322void slb_initialize(void) 521void slb_initialize(void)
323{ 522{
324 unsigned long linear_llp, vmalloc_llp, io_llp; 523 unsigned long linear_llp, vmalloc_llp, io_llp;
325 unsigned long lflags, vflags; 524 unsigned long lflags;
326 static int slb_encoding_inited; 525 static int slb_encoding_inited;
327#ifdef CONFIG_SPARSEMEM_VMEMMAP 526#ifdef CONFIG_SPARSEMEM_VMEMMAP
328 unsigned long vmemmap_llp; 527 unsigned long vmemmap_llp;
@@ -338,34 +537,24 @@ void slb_initialize(void)
338#endif 537#endif
339 if (!slb_encoding_inited) { 538 if (!slb_encoding_inited) {
340 slb_encoding_inited = 1; 539 slb_encoding_inited = 1;
341 patch_slb_encoding(slb_miss_kernel_load_linear,
342 SLB_VSID_KERNEL | linear_llp);
343 patch_slb_encoding(slb_miss_kernel_load_io,
344 SLB_VSID_KERNEL | io_llp);
345 patch_slb_encoding(slb_compare_rr_to_size,
346 mmu_slb_size);
347
348 pr_devel("SLB: linear LLP = %04lx\n", linear_llp); 540 pr_devel("SLB: linear LLP = %04lx\n", linear_llp);
349 pr_devel("SLB: io LLP = %04lx\n", io_llp); 541 pr_devel("SLB: io LLP = %04lx\n", io_llp);
350
351#ifdef CONFIG_SPARSEMEM_VMEMMAP 542#ifdef CONFIG_SPARSEMEM_VMEMMAP
352 patch_slb_encoding(slb_miss_kernel_load_vmemmap,
353 SLB_VSID_KERNEL | vmemmap_llp);
354 pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp); 543 pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
355#endif 544#endif
356 } 545 }
357 546
358 get_paca()->stab_rr = SLB_NUM_BOLTED; 547 get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
548 get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
549 get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
359 550
360 lflags = SLB_VSID_KERNEL | linear_llp; 551 lflags = SLB_VSID_KERNEL | linear_llp;
361 vflags = SLB_VSID_KERNEL | vmalloc_llp;
362 552
363 /* Invalidate the entire SLB (even entry 0) & all the ERATS */ 553 /* Invalidate the entire SLB (even entry 0) & all the ERATS */
364 asm volatile("isync":::"memory"); 554 asm volatile("isync":::"memory");
365 asm volatile("slbmte %0,%0"::"r" (0) : "memory"); 555 asm volatile("slbmte %0,%0"::"r" (0) : "memory");
366 asm volatile("isync; slbia; isync":::"memory"); 556 asm volatile("isync; slbia; isync":::"memory");
367 create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX); 557 create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
368 create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX);
369 558
370 /* For the boot cpu, we're running on the stack in init_thread_union, 559 /* For the boot cpu, we're running on the stack in init_thread_union,
371 * which is in the first segment of the linear mapping, and also 560 * which is in the first segment of the linear mapping, and also
@@ -381,122 +570,259 @@ void slb_initialize(void)
381 asm volatile("isync":::"memory"); 570 asm volatile("isync":::"memory");
382} 571}
383 572
384static void insert_slb_entry(unsigned long vsid, unsigned long ea, 573static void slb_cache_update(unsigned long esid_data)
385 int bpsize, int ssize)
386{ 574{
387 unsigned long flags, vsid_data, esid_data;
388 enum slb_index index;
389 int slb_cache_index; 575 int slb_cache_index;
390 576
391 /* 577 if (cpu_has_feature(CPU_FTR_ARCH_300))
392 * We are irq disabled, hence should be safe to access PACA. 578 return; /* ISAv3.0B and later does not use slb_cache */
393 */
394 VM_WARN_ON(!irqs_disabled());
395
396 /*
397 * We can't take a PMU exception in the following code, so hard
398 * disable interrupts.
399 */
400 hard_irq_disable();
401
402 index = get_paca()->stab_rr;
403
404 /*
405 * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
406 */
407 if (index < (mmu_slb_size - 1))
408 index++;
409 else
410 index = SLB_NUM_BOLTED;
411
412 get_paca()->stab_rr = index;
413
414 flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
415 vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
416 ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
417 esid_data = mk_esid_data(ea, ssize, index);
418
419 /*
420 * No need for an isync before or after this slbmte. The exception
421 * we enter with and the rfid we exit with are context synchronizing.
422 * Also we only handle user segments here.
423 */
424 asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
425 : "memory");
426 579
427 /* 580 /*
428 * Now update slb cache entries 581 * Now update slb cache entries
429 */ 582 */
430 slb_cache_index = get_paca()->slb_cache_ptr; 583 slb_cache_index = local_paca->slb_cache_ptr;
431 if (slb_cache_index < SLB_CACHE_ENTRIES) { 584 if (slb_cache_index < SLB_CACHE_ENTRIES) {
432 /* 585 /*
433 * We have space in slb cache for optimized switch_slb(). 586 * We have space in slb cache for optimized switch_slb().
434 * Top 36 bits from esid_data as per ISA 587 * Top 36 bits from esid_data as per ISA
435 */ 588 */
436 get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28; 589 local_paca->slb_cache[slb_cache_index++] = esid_data >> 28;
437 get_paca()->slb_cache_ptr++; 590 local_paca->slb_cache_ptr++;
438 } else { 591 } else {
439 /* 592 /*
440 * Our cache is full and the current cache content strictly 593 * Our cache is full and the current cache content strictly
441 * doesn't indicate the active SLB conents. Bump the ptr 594 * doesn't indicate the active SLB conents. Bump the ptr
442 * so that switch_slb() will ignore the cache. 595 * so that switch_slb() will ignore the cache.
443 */ 596 */
444 get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; 597 local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
445 } 598 }
446} 599}
447 600
448static void handle_multi_context_slb_miss(int context_id, unsigned long ea) 601static enum slb_index alloc_slb_index(bool kernel)
449{ 602{
450 struct mm_struct *mm = current->mm; 603 enum slb_index index;
451 unsigned long vsid;
452 int bpsize;
453 604
454 /* 605 /*
455 * We are always above 1TB, hence use high user segment size. 606 * The allocation bitmaps can become out of synch with the SLB
607 * when the _switch code does slbie when bolting a new stack
608 * segment and it must not be anywhere else in the SLB. This leaves
609 * a kernel allocated entry that is unused in the SLB. With very
610 * large systems or small segment sizes, the bitmaps could slowly
611 * fill with these entries. They will eventually be cleared out
612 * by the round robin allocator in that case, so it's probably not
613 * worth accounting for.
456 */ 614 */
457 vsid = get_vsid(context_id, ea, mmu_highuser_ssize); 615
458 bpsize = get_slice_psize(mm, ea); 616 /*
459 insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); 617 * SLBs beyond 32 entries are allocated with stab_rr only
618 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
619 * future CPU has more.
620 */
621 if (local_paca->slb_used_bitmap != U32_MAX) {
622 index = ffz(local_paca->slb_used_bitmap);
623 local_paca->slb_used_bitmap |= 1U << index;
624 if (kernel)
625 local_paca->slb_kern_bitmap |= 1U << index;
626 } else {
627 /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
628 index = local_paca->stab_rr;
629 if (index < (mmu_slb_size - 1))
630 index++;
631 else
632 index = SLB_NUM_BOLTED;
633 local_paca->stab_rr = index;
634 if (index < 32) {
635 if (kernel)
636 local_paca->slb_kern_bitmap |= 1U << index;
637 else
638 local_paca->slb_kern_bitmap &= ~(1U << index);
639 }
640 }
641 BUG_ON(index < SLB_NUM_BOLTED);
642
643 return index;
460} 644}
461 645
462void slb_miss_large_addr(struct pt_regs *regs) 646static long slb_insert_entry(unsigned long ea, unsigned long context,
647 unsigned long flags, int ssize, bool kernel)
463{ 648{
464 enum ctx_state prev_state = exception_enter(); 649 unsigned long vsid;
465 unsigned long ea = regs->dar; 650 unsigned long vsid_data, esid_data;
466 int context; 651 enum slb_index index;
467 652
468 if (REGION_ID(ea) != USER_REGION_ID) 653 vsid = get_vsid(context, ea, ssize);
469 goto slb_bad_addr; 654 if (!vsid)
655 return -EFAULT;
470 656
471 /* 657 /*
472 * Are we beyound what the page table layout supports ? 658 * There must not be a kernel SLB fault in alloc_slb_index or before
659 * slbmte here or the allocation bitmaps could get out of whack with
660 * the SLB.
661 *
662 * User SLB faults or preloads take this path which might get inlined
663 * into the caller, so add compiler barriers here to ensure unsafe
664 * memory accesses do not come between.
473 */ 665 */
474 if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) 666 barrier();
475 goto slb_bad_addr;
476 667
477 /* Lower address should have been handled by asm code */ 668 index = alloc_slb_index(kernel);
478 if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) 669
479 goto slb_bad_addr; 670 vsid_data = __mk_vsid_data(vsid, ssize, flags);
671 esid_data = mk_esid_data(ea, ssize, index);
672
673 /*
674 * No need for an isync before or after this slbmte. The exception
675 * we enter with and the rfid we exit with are context synchronizing.
676 * User preloads should add isync afterwards in case the kernel
677 * accesses user memory before it returns to userspace with rfid.
678 */
679 assert_slb_notexists(ea);
680 asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
681
682 barrier();
683
684 if (!kernel)
685 slb_cache_update(esid_data);
686
687 return 0;
688}
689
690static long slb_allocate_kernel(unsigned long ea, unsigned long id)
691{
692 unsigned long context;
693 unsigned long flags;
694 int ssize;
695
696 if (id == KERNEL_REGION_ID) {
697
698 /* We only support upto MAX_PHYSMEM_BITS */
699 if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
700 return -EFAULT;
701
702 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
703
704#ifdef CONFIG_SPARSEMEM_VMEMMAP
705 } else if (id == VMEMMAP_REGION_ID) {
706
707 if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
708 return -EFAULT;
709
710 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
711#endif
712 } else if (id == VMALLOC_REGION_ID) {
713
714 if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
715 return -EFAULT;
716
717 if (ea < H_VMALLOC_END)
718 flags = get_paca()->vmalloc_sllp;
719 else
720 flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
721 } else {
722 return -EFAULT;
723 }
724
725 ssize = MMU_SEGSIZE_1T;
726 if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
727 ssize = MMU_SEGSIZE_256M;
728
729 context = get_kernel_context(ea);
730 return slb_insert_entry(ea, context, flags, ssize, true);
731}
732
733static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
734{
735 unsigned long context;
736 unsigned long flags;
737 int bpsize;
738 int ssize;
480 739
481 /* 740 /*
482 * consider this as bad access if we take a SLB miss 741 * consider this as bad access if we take a SLB miss
483 * on an address above addr limit. 742 * on an address above addr limit.
484 */ 743 */
485 if (ea >= current->mm->context.slb_addr_limit) 744 if (ea >= mm->context.slb_addr_limit)
486 goto slb_bad_addr; 745 return -EFAULT;
487 746
488 context = get_ea_context(&current->mm->context, ea); 747 context = get_user_context(&mm->context, ea);
489 if (!context) 748 if (!context)
490 goto slb_bad_addr; 749 return -EFAULT;
750
751 if (unlikely(ea >= H_PGTABLE_RANGE)) {
752 WARN_ON(1);
753 return -EFAULT;
754 }
491 755
492 handle_multi_context_slb_miss(context, ea); 756 ssize = user_segment_size(ea);
493 exception_exit(prev_state);
494 return;
495 757
496slb_bad_addr: 758 bpsize = get_slice_psize(mm, ea);
497 if (user_mode(regs)) 759 flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
498 _exception(SIGSEGV, regs, SEGV_BNDERR, ea); 760
499 else 761 return slb_insert_entry(ea, context, flags, ssize, false);
500 bad_page_fault(regs, ea, SIGSEGV); 762}
501 exception_exit(prev_state); 763
764long do_slb_fault(struct pt_regs *regs, unsigned long ea)
765{
766 unsigned long id = REGION_ID(ea);
767
768 /* IRQs are not reconciled here, so can't check irqs_disabled */
769 VM_WARN_ON(mfmsr() & MSR_EE);
770
771 if (unlikely(!(regs->msr & MSR_RI)))
772 return -EINVAL;
773
774 /*
775 * SLB kernel faults must be very careful not to touch anything
776 * that is not bolted. E.g., PACA and global variables are okay,
777 * mm->context stuff is not.
778 *
779 * SLB user faults can access all of kernel memory, but must be
780 * careful not to touch things like IRQ state because it is not
781 * "reconciled" here. The difficulty is that we must use
782 * fast_exception_return to return from kernel SLB faults without
783 * looking at possible non-bolted memory. We could test user vs
784 * kernel faults in the interrupt handler asm and do a full fault,
785 * reconcile, ret_from_except for user faults which would make them
786 * first class kernel code. But for performance it's probably nicer
787 * if they go via fast_exception_return too.
788 */
789 if (id >= KERNEL_REGION_ID) {
790 long err;
791#ifdef CONFIG_DEBUG_VM
792 /* Catch recursive kernel SLB faults. */
793 BUG_ON(local_paca->in_kernel_slb_handler);
794 local_paca->in_kernel_slb_handler = 1;
795#endif
796 err = slb_allocate_kernel(ea, id);
797#ifdef CONFIG_DEBUG_VM
798 local_paca->in_kernel_slb_handler = 0;
799#endif
800 return err;
801 } else {
802 struct mm_struct *mm = current->mm;
803 long err;
804
805 if (unlikely(!mm))
806 return -EFAULT;
807
808 err = slb_allocate_user(mm, ea);
809 if (!err)
810 preload_add(current_thread_info(), ea);
811
812 return err;
813 }
814}
815
816void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
817{
818 if (err == -EFAULT) {
819 if (user_mode(regs))
820 _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
821 else
822 bad_page_fault(regs, ea, SIGSEGV);
823 } else if (err == -EINVAL) {
824 unrecoverable_exception(regs);
825 } else {
826 BUG();
827 }
502} 828}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
deleted file mode 100644
index 4ac5057ad439..000000000000
--- a/arch/powerpc/mm/slb_low.S
+++ /dev/null
@@ -1,335 +0,0 @@
1/*
2 * Low-level SLB routines
3 *
4 * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
5 *
6 * Based on earlier C version:
7 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
8 * Copyright (c) 2001 Dave Engebretsen
9 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <asm/processor.h>
18#include <asm/ppc_asm.h>
19#include <asm/asm-offsets.h>
20#include <asm/cputable.h>
21#include <asm/page.h>
22#include <asm/mmu.h>
23#include <asm/pgtable.h>
24#include <asm/firmware.h>
25#include <asm/feature-fixups.h>
26
27/*
28 * This macro generates asm code to compute the VSID scramble
29 * function. Used in slb_allocate() and do_stab_bolted. The function
30 * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
31 *
32 * rt = register containing the proto-VSID and into which the
33 * VSID will be stored
34 * rx = scratch register (clobbered)
35 * rf = flags
36 *
37 * - rt and rx must be different registers
38 * - The answer will end up in the low VSID_BITS bits of rt. The higher
39 * bits may contain other garbage, so you may need to mask the
40 * result.
41 */
42#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \
43 lis rx,VSID_MULTIPLIER_##size@h; \
44 ori rx,rx,VSID_MULTIPLIER_##size@l; \
45 mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
46/* \
47 * powermac get slb fault before feature fixup, so make 65 bit part \
48 * the default part of feature fixup \
49 */ \
50BEGIN_MMU_FTR_SECTION \
51 srdi rx,rt,VSID_BITS_65_##size; \
52 clrldi rt,rt,(64-VSID_BITS_65_##size); \
53 add rt,rt,rx; \
54 addi rx,rt,1; \
55 srdi rx,rx,VSID_BITS_65_##size; \
56 add rt,rt,rx; \
57 rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
58MMU_FTR_SECTION_ELSE \
59 srdi rx,rt,VSID_BITS_##size; \
60 clrldi rt,rt,(64-VSID_BITS_##size); \
61 add rt,rt,rx; /* add high and low bits */ \
62 addi rx,rt,1; \
63 srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
64 add rt,rt,rx; \
65 rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
66ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
67
68
69/* void slb_allocate(unsigned long ea);
70 *
71 * Create an SLB entry for the given EA (user or kernel).
72 * r3 = faulting address, r13 = PACA
73 * r9, r10, r11 are clobbered by this function
74 * r3 is preserved.
75 * No other registers are examined or changed.
76 */
77_GLOBAL(slb_allocate)
78 /*
79 * Check if the address falls within the range of the first context, or
80 * if we may need to handle multi context. For the first context we
81 * allocate the slb entry via the fast path below. For large address we
82 * branch out to C-code and see if additional contexts have been
83 * allocated.
84 * The test here is:
85 * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
86 */
87 rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
88 bne- 8f
89
90 srdi r9,r3,60 /* get region */
91 srdi r10,r3,SID_SHIFT /* get esid */
92 cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */
93
94 /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
95 blt cr7,0f /* user or kernel? */
96
97 /* Check if hitting the linear mapping or some other kernel space
98 */
99 bne cr7,1f
100
101 /* Linear mapping encoding bits, the "li" instruction below will
102 * be patched by the kernel at boot
103 */
104.globl slb_miss_kernel_load_linear
105slb_miss_kernel_load_linear:
106 li r11,0
107 /*
108 * context = (ea >> 60) - (0xc - 1)
109 * r9 = region id.
110 */
111 subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET
112
113BEGIN_FTR_SECTION
114 b .Lslb_finish_load
115END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
116 b .Lslb_finish_load_1T
117
1181:
119#ifdef CONFIG_SPARSEMEM_VMEMMAP
120 cmpldi cr0,r9,0xf
121 bne 1f
122/* Check virtual memmap region. To be patched at kernel boot */
123.globl slb_miss_kernel_load_vmemmap
124slb_miss_kernel_load_vmemmap:
125 li r11,0
126 b 6f
1271:
128#endif /* CONFIG_SPARSEMEM_VMEMMAP */
129
130 /*
131 * r10 contains the ESID, which is the original faulting EA shifted
132 * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28)
133 * which is 0xd00038000. That can't be used as an immediate, even if we
134 * ignored the 0xd, so we have to load it into a register, and we only
135 * have one register free. So we must load all of (H_VMALLOC_END >> 28)
136 * into a register and compare ESID against that.
137 */
138 lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000
139 ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800
140 // Rotate left 4, then mask with 0xffffffff0
141 rldic r11,r11,4,28 // r11 = 0xd00038000
142 cmpld r10,r11 // if r10 >= r11
143 bge 5f // goto io_mapping
144
145 /*
146 * vmalloc mapping gets the encoding from the PACA as the mapping
147 * can be demoted from 64K -> 4K dynamically on some machines.
148 */
149 lhz r11,PACAVMALLOCSLLP(r13)
150 b 6f
1515:
152 /* IO mapping */
153.globl slb_miss_kernel_load_io
154slb_miss_kernel_load_io:
155 li r11,0
1566:
157 /*
158 * context = (ea >> 60) - (0xc - 1)
159 * r9 = region id.
160 */
161 subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET
162
163BEGIN_FTR_SECTION
164 b .Lslb_finish_load
165END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
166 b .Lslb_finish_load_1T
167
1680: /*
169 * For userspace addresses, make sure this is region 0.
170 */
171 cmpdi r9, 0
172 bne- 8f
173 /*
174 * user space make sure we are within the allowed limit
175 */
176 ld r11,PACA_SLB_ADDR_LIMIT(r13)
177 cmpld r3,r11
178 bge- 8f
179
180 /* when using slices, we extract the psize off the slice bitmaps
181 * and then we need to get the sllp encoding off the mmu_psize_defs
182 * array.
183 *
184 * XXX This is a bit inefficient especially for the normal case,
185 * so we should try to implement a fast path for the standard page
186 * size using the old sllp value so we avoid the array. We cannot
187 * really do dynamic patching unfortunately as processes might flip
188 * between 4k and 64k standard page size
189 */
190#ifdef CONFIG_PPC_MM_SLICES
191 /* r10 have esid */
192 cmpldi r10,16
193 /* below SLICE_LOW_TOP */
194 blt 5f
195 /*
196 * Handle hpsizes,
197 * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
198 */
199 srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
200 addi r9,r11,PACAHIGHSLICEPSIZE
201 lbzx r9,r13,r9 /* r9 is hpsizes[r11] */
202 /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
203 rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
204 b 6f
205
2065:
207 /*
208 * Handle lpsizes
209 * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
210 */
211 srdi r11,r10,1 /* index */
212 addi r9,r11,PACALOWSLICESPSIZE
213 lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
214 rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
2156:
216 sldi r11,r11,2 /* index * 4 */
217 /* Extract the psize and multiply to get an array offset */
218 srd r9,r9,r11
219 andi. r9,r9,0xf
220 mulli r9,r9,MMUPSIZEDEFSIZE
221
222 /* Now get to the array and obtain the sllp
223 */
224 ld r11,PACATOC(r13)
225 ld r11,mmu_psize_defs@got(r11)
226 add r11,r11,r9
227 ld r11,MMUPSIZESLLP(r11)
228 ori r11,r11,SLB_VSID_USER
229#else
230 /* paca context sllp already contains the SLB_VSID_USER bits */
231 lhz r11,PACACONTEXTSLLP(r13)
232#endif /* CONFIG_PPC_MM_SLICES */
233
234 ld r9,PACACONTEXTID(r13)
235BEGIN_FTR_SECTION
236 cmpldi r10,0x1000
237 bge .Lslb_finish_load_1T
238END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
239 b .Lslb_finish_load
240
2418: /* invalid EA - return an error indication */
242 crset 4*cr0+eq /* indicate failure */
243 blr
244
245/*
246 * Finish loading of an SLB entry and return
247 *
248 * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
249 */
250.Lslb_finish_load:
251 rldimi r10,r9,ESID_BITS,0
252 ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
253 /* r3 = EA, r11 = VSID data */
254 /*
255 * Find a slot, round robin. Previously we tried to find a
256 * free slot first but that took too long. Unfortunately we
257 * dont have any LRU information to help us choose a slot.
258 */
259
260 mr r9,r3
261
262 /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
2637: ld r10,PACASTABRR(r13)
264 addi r10,r10,1
265 /* This gets soft patched on boot. */
266.globl slb_compare_rr_to_size
267slb_compare_rr_to_size:
268 cmpldi r10,0
269
270 blt+ 4f
271 li r10,SLB_NUM_BOLTED
272
2734:
274 std r10,PACASTABRR(r13)
275
2763:
277 rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */
278 oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */
279
280 /* r9 = ESID data, r11 = VSID data */
281
282 /*
283 * No need for an isync before or after this slbmte. The exception
284 * we enter with and the rfid we exit with are context synchronizing.
285 */
286 slbmte r11,r10
287
288 /* we're done for kernel addresses */
289 crclr 4*cr0+eq /* set result to "success" */
290 bgelr cr7
291
292 /* Update the slb cache */
293 lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
294 cmpldi r9,SLB_CACHE_ENTRIES
295 bge 1f
296
297 /* still room in the slb cache */
298 sldi r11,r9,2 /* r11 = offset * sizeof(u32) */
299 srdi r10,r10,28 /* get the 36 bits of the ESID */
300 add r11,r11,r13 /* r11 = (u32 *)paca + offset */
301 stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
302 addi r9,r9,1 /* offset++ */
303 b 2f
3041: /* offset >= SLB_CACHE_ENTRIES */
305 li r9,SLB_CACHE_ENTRIES+1
3062:
307 sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
308 crclr 4*cr0+eq /* set result to "success" */
309 blr
310
311/*
312 * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
313 *
314 * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
315 */
316.Lslb_finish_load_1T:
317 srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
318 rldimi r10,r9,ESID_BITS_1T,0
319 ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
320
321 li r10,MMU_SEGSIZE_1T
322 rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
323
324 /* r3 = EA, r11 = VSID data */
325 clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */
326 b 7b
327
328
329_ASM_NOKPROBE_SYMBOL(slb_allocate)
330_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
331_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
332_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
333#ifdef CONFIG_SPARSEMEM_VMEMMAP
334_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap)
335#endif
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 205fe557ca10..06898c13901d 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -31,6 +31,7 @@
31#include <linux/spinlock.h> 31#include <linux/spinlock.h>
32#include <linux/export.h> 32#include <linux/export.h>
33#include <linux/hugetlb.h> 33#include <linux/hugetlb.h>
34#include <linux/sched/mm.h>
34#include <asm/mman.h> 35#include <asm/mman.h>
35#include <asm/mmu.h> 36#include <asm/mmu.h>
36#include <asm/copro.h> 37#include <asm/copro.h>
@@ -61,6 +62,13 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) {
61 62
62#endif 63#endif
63 64
65static inline bool slice_addr_is_low(unsigned long addr)
66{
67 u64 tmp = (u64)addr;
68
69 return tmp < SLICE_LOW_TOP;
70}
71
64static void slice_range_to_mask(unsigned long start, unsigned long len, 72static void slice_range_to_mask(unsigned long start, unsigned long len,
65 struct slice_mask *ret) 73 struct slice_mask *ret)
66{ 74{
@@ -70,7 +78,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
70 if (SLICE_NUM_HIGH) 78 if (SLICE_NUM_HIGH)
71 bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); 79 bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
72 80
73 if (start < SLICE_LOW_TOP) { 81 if (slice_addr_is_low(start)) {
74 unsigned long mend = min(end, 82 unsigned long mend = min(end,
75 (unsigned long)(SLICE_LOW_TOP - 1)); 83 (unsigned long)(SLICE_LOW_TOP - 1));
76 84
@@ -78,7 +86,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
78 - (1u << GET_LOW_SLICE_INDEX(start)); 86 - (1u << GET_LOW_SLICE_INDEX(start));
79 } 87 }
80 88
81 if ((start + len) > SLICE_LOW_TOP) { 89 if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
82 unsigned long start_index = GET_HIGH_SLICE_INDEX(start); 90 unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
83 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); 91 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
84 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; 92 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
@@ -133,7 +141,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
133 if (!slice_low_has_vma(mm, i)) 141 if (!slice_low_has_vma(mm, i))
134 ret->low_slices |= 1u << i; 142 ret->low_slices |= 1u << i;
135 143
136 if (high_limit <= SLICE_LOW_TOP) 144 if (slice_addr_is_low(high_limit - 1))
137 return; 145 return;
138 146
139 for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) 147 for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
@@ -182,7 +190,7 @@ static bool slice_check_range_fits(struct mm_struct *mm,
182 unsigned long end = start + len - 1; 190 unsigned long end = start + len - 1;
183 u64 low_slices = 0; 191 u64 low_slices = 0;
184 192
185 if (start < SLICE_LOW_TOP) { 193 if (slice_addr_is_low(start)) {
186 unsigned long mend = min(end, 194 unsigned long mend = min(end,
187 (unsigned long)(SLICE_LOW_TOP - 1)); 195 (unsigned long)(SLICE_LOW_TOP - 1));
188 196
@@ -192,7 +200,7 @@ static bool slice_check_range_fits(struct mm_struct *mm,
192 if ((low_slices & available->low_slices) != low_slices) 200 if ((low_slices & available->low_slices) != low_slices)
193 return false; 201 return false;
194 202
195 if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { 203 if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
196 unsigned long start_index = GET_HIGH_SLICE_INDEX(start); 204 unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
197 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); 205 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
198 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; 206 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
@@ -219,7 +227,7 @@ static void slice_flush_segments(void *parm)
219 copy_mm_to_paca(current->active_mm); 227 copy_mm_to_paca(current->active_mm);
220 228
221 local_irq_save(flags); 229 local_irq_save(flags);
222 slb_flush_and_rebolt(); 230 slb_flush_and_restore_bolted();
223 local_irq_restore(flags); 231 local_irq_restore(flags);
224#endif 232#endif
225} 233}
@@ -303,7 +311,7 @@ static bool slice_scan_available(unsigned long addr,
303 int end, unsigned long *boundary_addr) 311 int end, unsigned long *boundary_addr)
304{ 312{
305 unsigned long slice; 313 unsigned long slice;
306 if (addr < SLICE_LOW_TOP) { 314 if (slice_addr_is_low(addr)) {
307 slice = GET_LOW_SLICE_INDEX(addr); 315 slice = GET_LOW_SLICE_INDEX(addr);
308 *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; 316 *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
309 return !!(available->low_slices & (1u << slice)); 317 return !!(available->low_slices & (1u << slice));
@@ -706,7 +714,7 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
706 714
707 VM_BUG_ON(radix_enabled()); 715 VM_BUG_ON(radix_enabled());
708 716
709 if (addr < SLICE_LOW_TOP) { 717 if (slice_addr_is_low(addr)) {
710 psizes = mm->context.low_slices_psize; 718 psizes = mm->context.low_slices_psize;
711 index = GET_LOW_SLICE_INDEX(addr); 719 index = GET_LOW_SLICE_INDEX(addr);
712 } else { 720 } else {
@@ -757,6 +765,20 @@ void slice_init_new_context_exec(struct mm_struct *mm)
757 bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); 765 bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
758} 766}
759 767
768#ifdef CONFIG_PPC_BOOK3S_64
769void slice_setup_new_exec(void)
770{
771 struct mm_struct *mm = current->mm;
772
773 slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
774
775 if (!is_32bit_task())
776 return;
777
778 mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
779}
780#endif
781
760void slice_set_range_psize(struct mm_struct *mm, unsigned long start, 782void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
761 unsigned long len, unsigned int psize) 783 unsigned long len, unsigned int psize)
762{ 784{
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index fef3e1eb3a19..6a23b9ebd2a1 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -366,6 +366,7 @@ static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
366 __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); 366 __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
367 367
368 asm volatile("ptesync": : :"memory"); 368 asm volatile("ptesync": : :"memory");
369 asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
369} 370}
370 371
371 372
@@ -833,6 +834,15 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
833/* 834/*
834 * Flush partition scoped translations from LPID (=LPIDR) 835 * Flush partition scoped translations from LPID (=LPIDR)
835 */ 836 */
837void radix__flush_tlb_lpid(unsigned int lpid)
838{
839 _tlbie_lpid(lpid, RIC_FLUSH_ALL);
840}
841EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
842
843/*
844 * Flush partition scoped translations from LPID (=LPIDR)
845 */
836void radix__local_flush_tlb_lpid(unsigned int lpid) 846void radix__local_flush_tlb_lpid(unsigned int lpid)
837{ 847{
838 _tlbiel_lpid(lpid, RIC_FLUSH_ALL); 848 _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
@@ -1007,7 +1017,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1007 goto local; 1017 goto local;
1008 } 1018 }
1009 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1019 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1010 goto local;
1011 } else { 1020 } else {
1012local: 1021local:
1013 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1022 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 15fe5f0c8665..ae5d568e267f 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -503,6 +503,9 @@ static void setup_page_sizes(void)
503 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 503 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
504 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 504 struct mmu_psize_def *def = &mmu_psize_defs[psize];
505 505
506 if (!def->shift)
507 continue;
508
506 if (tlb1ps & (1U << (def->shift - 10))) { 509 if (tlb1ps & (1U << (def->shift - 10))) {
507 def->flags |= MMU_PAGE_SIZE_DIRECT; 510 def->flags |= MMU_PAGE_SIZE_DIRECT;
508 511
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 7a7834c39f64..8d26d7416481 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -1,5 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
3 2
4ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 3ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
5 4
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index ad054dd0d666..5df6290d1ccc 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -7,7 +7,7 @@
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8**/ 8**/
9 9
10#include <linux/compat_time.h> 10#include <linux/time.h>
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <asm/processor.h> 13#include <asm/processor.h>
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 82986d2acd9b..ab26df5bacb9 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -1,5 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
3 2
4obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o 3obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
5 4
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 1fafc32b12a0..6954636b16d1 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1392,7 +1392,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
1392 if (ret) 1392 if (ret)
1393 goto err_free_cpuhp_mem; 1393 goto err_free_cpuhp_mem;
1394 1394
1395 pr_info("%s performance monitor hardware support registered\n", 1395 pr_debug("%s performance monitor hardware support registered\n",
1396 pmu_ptr->pmu.name); 1396 pmu_ptr->pmu.name);
1397 1397
1398 return 0; 1398 return 0;
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 7963658dbc22..6dbae9884ec4 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -238,6 +238,7 @@ static int power7_marked_instr_event(u64 event)
238 case 6: 238 case 6:
239 if (psel == 0x64) 239 if (psel == 0x64)
240 return pmc >= 3; 240 return pmc >= 3;
241 break;
241 case 8: 242 case 8:
242 return unit == 0xd; 243 return unit == 0xd;
243 } 244 }
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index 60254a321a91..2a9d66254ffc 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -2,7 +2,6 @@
2config ACADIA 2config ACADIA
3 bool "Acadia" 3 bool "Acadia"
4 depends on 40x 4 depends on 40x
5 default n
6 select PPC40x_SIMPLE 5 select PPC40x_SIMPLE
7 select 405EZ 6 select 405EZ
8 help 7 help
@@ -11,7 +10,6 @@ config ACADIA
11config EP405 10config EP405
12 bool "EP405/EP405PC" 11 bool "EP405/EP405PC"
13 depends on 40x 12 depends on 40x
14 default n
15 select 405GP 13 select 405GP
16 select PCI 14 select PCI
17 help 15 help
@@ -20,7 +18,6 @@ config EP405
20config HOTFOOT 18config HOTFOOT
21 bool "Hotfoot" 19 bool "Hotfoot"
22 depends on 40x 20 depends on 40x
23 default n
24 select PPC40x_SIMPLE 21 select PPC40x_SIMPLE
25 select PCI 22 select PCI
26 help 23 help
@@ -29,7 +26,6 @@ config HOTFOOT
29config KILAUEA 26config KILAUEA
30 bool "Kilauea" 27 bool "Kilauea"
31 depends on 40x 28 depends on 40x
32 default n
33 select 405EX 29 select 405EX
34 select PPC40x_SIMPLE 30 select PPC40x_SIMPLE
35 select PPC4xx_PCI_EXPRESS 31 select PPC4xx_PCI_EXPRESS
@@ -41,7 +37,6 @@ config KILAUEA
41config MAKALU 37config MAKALU
42 bool "Makalu" 38 bool "Makalu"
43 depends on 40x 39 depends on 40x
44 default n
45 select 405EX 40 select 405EX
46 select PCI 41 select PCI
47 select PPC4xx_PCI_EXPRESS 42 select PPC4xx_PCI_EXPRESS
@@ -62,7 +57,6 @@ config WALNUT
62config XILINX_VIRTEX_GENERIC_BOARD 57config XILINX_VIRTEX_GENERIC_BOARD
63 bool "Generic Xilinx Virtex board" 58 bool "Generic Xilinx Virtex board"
64 depends on 40x 59 depends on 40x
65 default n
66 select XILINX_VIRTEX_II_PRO 60 select XILINX_VIRTEX_II_PRO
67 select XILINX_VIRTEX_4_FX 61 select XILINX_VIRTEX_4_FX
68 select XILINX_INTC 62 select XILINX_INTC
@@ -80,7 +74,6 @@ config XILINX_VIRTEX_GENERIC_BOARD
80config OBS600 74config OBS600
81 bool "OpenBlockS 600" 75 bool "OpenBlockS 600"
82 depends on 40x 76 depends on 40x
83 default n
84 select 405EX 77 select 405EX
85 select PPC40x_SIMPLE 78 select PPC40x_SIMPLE
86 help 79 help
@@ -90,7 +83,6 @@ config OBS600
90config PPC40x_SIMPLE 83config PPC40x_SIMPLE
91 bool "Simple PowerPC 40x board support" 84 bool "Simple PowerPC 40x board support"
92 depends on 40x 85 depends on 40x
93 default n
94 help 86 help
95 This option enables the simple PowerPC 40x platform support. 87 This option enables the simple PowerPC 40x platform support.
96 88
@@ -156,7 +148,6 @@ config IBM405_ERR51
156config APM8018X 148config APM8018X
157 bool "APM8018X" 149 bool "APM8018X"
158 depends on 40x 150 depends on 40x
159 default n
160 select PPC40x_SIMPLE 151 select PPC40x_SIMPLE
161 help 152 help
162 This option enables support for the AppliedMicro APM8018X evaluation 153 This option enables support for the AppliedMicro APM8018X evaluation
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index a6011422b861..f024efd5a4c2 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -2,7 +2,6 @@
2config PPC_47x 2config PPC_47x
3 bool "Support for 47x variant" 3 bool "Support for 47x variant"
4 depends on 44x 4 depends on 44x
5 default n
6 select MPIC 5 select MPIC
7 help 6 help
8 This option enables support for the 47x family of processors and is 7 This option enables support for the 47x family of processors and is
@@ -11,7 +10,6 @@ config PPC_47x
11config BAMBOO 10config BAMBOO
12 bool "Bamboo" 11 bool "Bamboo"
13 depends on 44x 12 depends on 44x
14 default n
15 select PPC44x_SIMPLE 13 select PPC44x_SIMPLE
16 select 440EP 14 select 440EP
17 select PCI 15 select PCI
@@ -21,7 +19,6 @@ config BAMBOO
21config BLUESTONE 19config BLUESTONE
22 bool "Bluestone" 20 bool "Bluestone"
23 depends on 44x 21 depends on 44x
24 default n
25 select PPC44x_SIMPLE 22 select PPC44x_SIMPLE
26 select APM821xx 23 select APM821xx
27 select PCI_MSI 24 select PCI_MSI
@@ -44,7 +41,6 @@ config EBONY
44config SAM440EP 41config SAM440EP
45 bool "Sam440ep" 42 bool "Sam440ep"
46 depends on 44x 43 depends on 44x
47 default n
48 select 440EP 44 select 440EP
49 select PCI 45 select PCI
50 help 46 help
@@ -53,7 +49,6 @@ config SAM440EP
53config SEQUOIA 49config SEQUOIA
54 bool "Sequoia" 50 bool "Sequoia"
55 depends on 44x 51 depends on 44x
56 default n
57 select PPC44x_SIMPLE 52 select PPC44x_SIMPLE
58 select 440EPX 53 select 440EPX
59 help 54 help
@@ -62,7 +57,6 @@ config SEQUOIA
62config TAISHAN 57config TAISHAN
63 bool "Taishan" 58 bool "Taishan"
64 depends on 44x 59 depends on 44x
65 default n
66 select PPC44x_SIMPLE 60 select PPC44x_SIMPLE
67 select 440GX 61 select 440GX
68 select PCI 62 select PCI
@@ -73,7 +67,6 @@ config TAISHAN
73config KATMAI 67config KATMAI
74 bool "Katmai" 68 bool "Katmai"
75 depends on 44x 69 depends on 44x
76 default n
77 select PPC44x_SIMPLE 70 select PPC44x_SIMPLE
78 select 440SPe 71 select 440SPe
79 select PCI 72 select PCI
@@ -86,7 +79,6 @@ config KATMAI
86config RAINIER 79config RAINIER
87 bool "Rainier" 80 bool "Rainier"
88 depends on 44x 81 depends on 44x
89 default n
90 select PPC44x_SIMPLE 82 select PPC44x_SIMPLE
91 select 440GRX 83 select 440GRX
92 select PCI 84 select PCI
@@ -96,7 +88,6 @@ config RAINIER
96config WARP 88config WARP
97 bool "PIKA Warp" 89 bool "PIKA Warp"
98 depends on 44x 90 depends on 44x
99 default n
100 select 440EP 91 select 440EP
101 help 92 help
102 This option enables support for the PIKA Warp(tm) Appliance. The Warp 93 This option enables support for the PIKA Warp(tm) Appliance. The Warp
@@ -109,7 +100,6 @@ config WARP
109config ARCHES 100config ARCHES
110 bool "Arches" 101 bool "Arches"
111 depends on 44x 102 depends on 44x
112 default n
113 select PPC44x_SIMPLE 103 select PPC44x_SIMPLE
114 select 460EX # Odd since it uses 460GT but the effects are the same 104 select 460EX # Odd since it uses 460GT but the effects are the same
115 select PCI 105 select PCI
@@ -120,7 +110,6 @@ config ARCHES
120config CANYONLANDS 110config CANYONLANDS
121 bool "Canyonlands" 111 bool "Canyonlands"
122 depends on 44x 112 depends on 44x
123 default n
124 select 460EX 113 select 460EX
125 select PCI 114 select PCI
126 select PPC4xx_PCI_EXPRESS 115 select PPC4xx_PCI_EXPRESS
@@ -134,7 +123,6 @@ config CANYONLANDS
134config GLACIER 123config GLACIER
135 bool "Glacier" 124 bool "Glacier"
136 depends on 44x 125 depends on 44x
137 default n
138 select PPC44x_SIMPLE 126 select PPC44x_SIMPLE
139 select 460EX # Odd since it uses 460GT but the effects are the same 127 select 460EX # Odd since it uses 460GT but the effects are the same
140 select PCI 128 select PCI
@@ -147,7 +135,6 @@ config GLACIER
147config REDWOOD 135config REDWOOD
148 bool "Redwood" 136 bool "Redwood"
149 depends on 44x 137 depends on 44x
150 default n
151 select PPC44x_SIMPLE 138 select PPC44x_SIMPLE
152 select 460SX 139 select 460SX
153 select PCI 140 select PCI
@@ -160,7 +147,6 @@ config REDWOOD
160config EIGER 147config EIGER
161 bool "Eiger" 148 bool "Eiger"
162 depends on 44x 149 depends on 44x
163 default n
164 select PPC44x_SIMPLE 150 select PPC44x_SIMPLE
165 select 460SX 151 select 460SX
166 select PCI 152 select PCI
@@ -172,7 +158,6 @@ config EIGER
172config YOSEMITE 158config YOSEMITE
173 bool "Yosemite" 159 bool "Yosemite"
174 depends on 44x 160 depends on 44x
175 default n
176 select PPC44x_SIMPLE 161 select PPC44x_SIMPLE
177 select 440EP 162 select 440EP
178 select PCI 163 select PCI
@@ -182,7 +167,6 @@ config YOSEMITE
182config ISS4xx 167config ISS4xx
183 bool "ISS 4xx Simulator" 168 bool "ISS 4xx Simulator"
184 depends on (44x || 40x) 169 depends on (44x || 40x)
185 default n
186 select 405GP if 40x 170 select 405GP if 40x
187 select 440GP if 44x && !PPC_47x 171 select 440GP if 44x && !PPC_47x
188 select PPC_FPU 172 select PPC_FPU
@@ -193,7 +177,6 @@ config ISS4xx
193config CURRITUCK 177config CURRITUCK
194 bool "IBM Currituck (476fpe) Support" 178 bool "IBM Currituck (476fpe) Support"
195 depends on PPC_47x 179 depends on PPC_47x
196 default n
197 select SWIOTLB 180 select SWIOTLB
198 select 476FPE 181 select 476FPE
199 select PPC4xx_PCI_EXPRESS 182 select PPC4xx_PCI_EXPRESS
@@ -203,7 +186,6 @@ config CURRITUCK
203config FSP2 186config FSP2
204 bool "IBM FSP2 (476fpe) Support" 187 bool "IBM FSP2 (476fpe) Support"
205 depends on PPC_47x 188 depends on PPC_47x
206 default n
207 select 476FPE 189 select 476FPE
208 select IBM_EMAC_EMAC4 if IBM_EMAC 190 select IBM_EMAC_EMAC4 if IBM_EMAC
209 select IBM_EMAC_RGMII if IBM_EMAC 191 select IBM_EMAC_RGMII if IBM_EMAC
@@ -215,7 +197,6 @@ config FSP2
215config AKEBONO 197config AKEBONO
216 bool "IBM Akebono (476gtr) Support" 198 bool "IBM Akebono (476gtr) Support"
217 depends on PPC_47x 199 depends on PPC_47x
218 default n
219 select SWIOTLB 200 select SWIOTLB
220 select 476FPE 201 select 476FPE
221 select PPC4xx_PCI_EXPRESS 202 select PPC4xx_PCI_EXPRESS
@@ -241,7 +222,6 @@ config AKEBONO
241config ICON 222config ICON
242 bool "Icon" 223 bool "Icon"
243 depends on 44x 224 depends on 44x
244 default n
245 select PPC44x_SIMPLE 225 select PPC44x_SIMPLE
246 select 440SPe 226 select 440SPe
247 select PCI 227 select PCI
@@ -252,7 +232,6 @@ config ICON
252config XILINX_VIRTEX440_GENERIC_BOARD 232config XILINX_VIRTEX440_GENERIC_BOARD
253 bool "Generic Xilinx Virtex 5 FXT board support" 233 bool "Generic Xilinx Virtex 5 FXT board support"
254 depends on 44x 234 depends on 44x
255 default n
256 select XILINX_VIRTEX_5_FXT 235 select XILINX_VIRTEX_5_FXT
257 select XILINX_INTC 236 select XILINX_INTC
258 help 237 help
@@ -280,7 +259,6 @@ config XILINX_ML510
280config PPC44x_SIMPLE 259config PPC44x_SIMPLE
281 bool "Simple PowerPC 44x board support" 260 bool "Simple PowerPC 44x board support"
282 depends on 44x 261 depends on 44x
283 default n
284 help 262 help
285 This option enables the simple PowerPC 44x platform support. 263 This option enables the simple PowerPC 44x platform support.
286 264
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
index 04f0c73a9b4f..7a507f775308 100644
--- a/arch/powerpc/platforms/44x/fsp2.c
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -210,15 +210,15 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
210 for_each_compatible_node(np, NULL, compat) { 210 for_each_compatible_node(np, NULL, compat) {
211 irq = irq_of_parse_and_map(np, 0); 211 irq = irq_of_parse_and_map(np, 0);
212 if (irq == NO_IRQ) { 212 if (irq == NO_IRQ) {
213 pr_err("device tree node %s is missing a interrupt", 213 pr_err("device tree node %pOFn is missing a interrupt",
214 np->name); 214 np);
215 return; 215 return;
216 } 216 }
217 217
218 rc = request_irq(irq, errirq_handler, 0, np->name, np); 218 rc = request_irq(irq, errirq_handler, 0, np->name, np);
219 if (rc) { 219 if (rc) {
220 pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d", 220 pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d",
221 np->full_name, rc); 221 np, rc);
222 return; 222 return;
223 } 223 }
224 } 224 }
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 69d9f60d9fe5..f5bbd4563342 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -113,7 +113,6 @@ static void __init ocm_init_node(int count, struct device_node *node)
113 int len; 113 int len;
114 114
115 struct resource rsrc; 115 struct resource rsrc;
116 int ioflags;
117 116
118 ocm = ocm_get_node(count); 117 ocm = ocm_get_node(count);
119 118
@@ -179,9 +178,8 @@ static void __init ocm_init_node(int count, struct device_node *node)
179 178
180 /* ioremap the non-cached region */ 179 /* ioremap the non-cached region */
181 if (ocm->nc.memtotal) { 180 if (ocm->nc.memtotal) {
182 ioflags = _PAGE_NO_CACHE | _PAGE_GUARDED | _PAGE_EXEC;
183 ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal, 181 ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal,
184 ioflags); 182 _PAGE_EXEC | PAGE_KERNEL_NCG);
185 183
186 if (!ocm->nc.virt) { 184 if (!ocm->nc.virt) {
187 printk(KERN_ERR 185 printk(KERN_ERR
@@ -195,9 +193,8 @@ static void __init ocm_init_node(int count, struct device_node *node)
195 /* ioremap the cached region */ 193 /* ioremap the cached region */
196 194
197 if (ocm->c.memtotal) { 195 if (ocm->c.memtotal) {
198 ioflags = _PAGE_EXEC;
199 ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal, 196 ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal,
200 ioflags); 197 _PAGE_EXEC | PAGE_KERNEL);
201 198
202 if (!ocm->c.virt) { 199 if (!ocm->c.virt) {
203 printk(KERN_ERR 200 printk(KERN_ERR
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c
index 5e36508b2a70..1844bf502fcf 100644
--- a/arch/powerpc/platforms/4xx/soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -200,7 +200,7 @@ void ppc4xx_reset_system(char *cmd)
200 u32 reset_type = DBCR0_RST_SYSTEM; 200 u32 reset_type = DBCR0_RST_SYSTEM;
201 const u32 *prop; 201 const u32 *prop;
202 202
203 np = of_find_node_by_type(NULL, "cpu"); 203 np = of_get_cpu_node(0, NULL);
204 if (np) { 204 if (np) {
205 prop = of_get_property(np, "reset-type", NULL); 205 prop = of_get_property(np, "reset-type", NULL);
206 206
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig
index 6e04099361b9..1947a88bc69f 100644
--- a/arch/powerpc/platforms/82xx/Kconfig
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -51,7 +51,6 @@ endif
51 51
52config PQ2ADS 52config PQ2ADS
53 bool 53 bool
54 default n
55 54
56config 8260 55config 8260
57 bool 56 bool
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 7e966f4cf19a..fff72425727a 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -216,8 +216,8 @@ static int smp_85xx_start_cpu(int cpu)
216 216
217 /* Map the spin table */ 217 /* Map the spin table */
218 if (ioremappable) 218 if (ioremappable)
219 spin_table = ioremap_prot(*cpu_rel_addr, 219 spin_table = ioremap_coherent(*cpu_rel_addr,
220 sizeof(struct epapr_spin_table), _PAGE_COHERENT); 220 sizeof(struct epapr_spin_table));
221 else 221 else
222 spin_table = phys_to_virt(*cpu_rel_addr); 222 spin_table = phys_to_virt(*cpu_rel_addr);
223 223
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 027c42d8966c..f1c805c8adbc 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -66,7 +66,7 @@ static int __init get_freq(char *name, unsigned long *val)
66 int found = 0; 66 int found = 0;
67 67
68 /* The cpu node should have timebase and clock frequency properties */ 68 /* The cpu node should have timebase and clock frequency properties */
69 cpu = of_find_node_by_type(NULL, "cpu"); 69 cpu = of_get_cpu_node(0, NULL);
70 70
71 if (cpu) { 71 if (cpu) {
72 fp = of_get_property(cpu, name, NULL); 72 fp = of_get_property(cpu, name, NULL);
@@ -147,8 +147,9 @@ void __init mpc8xx_calibrate_decr(void)
147 * we have to enable the timebase). The decrementer interrupt 147 * we have to enable the timebase). The decrementer interrupt
148 * is wired into the vector table, nothing to do here for that. 148 * is wired into the vector table, nothing to do here for that.
149 */ 149 */
150 cpu = of_find_node_by_type(NULL, "cpu"); 150 cpu = of_get_cpu_node(0, NULL);
151 virq= irq_of_parse_and_map(cpu, 0); 151 virq= irq_of_parse_and_map(cpu, 0);
152 of_node_put(cpu);
152 irq = virq_to_hw(virq); 153 irq = virq_to_hw(virq);
153 154
154 sys_tmr2 = immr_map(im_sit); 155 sys_tmr2 = immr_map(im_sit);
diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c
index 402016705a39..9944fc303df0 100644
--- a/arch/powerpc/platforms/8xx/machine_check.c
+++ b/arch/powerpc/platforms/8xx/machine_check.c
@@ -18,9 +18,9 @@ int machine_check_8xx(struct pt_regs *regs)
18 pr_err("Machine check in kernel mode.\n"); 18 pr_err("Machine check in kernel mode.\n");
19 pr_err("Caused by (from SRR1=%lx): ", reason); 19 pr_err("Caused by (from SRR1=%lx): ", reason);
20 if (reason & 0x40000000) 20 if (reason & 0x40000000)
21 pr_err("Fetch error at address %lx\n", regs->nip); 21 pr_cont("Fetch error at address %lx\n", regs->nip);
22 else 22 else
23 pr_err("Data access error at address %lx\n", regs->dar); 23 pr_cont("Data access error at address %lx\n", regs->dar);
24 24
25#ifdef CONFIG_PCI 25#ifdef CONFIG_PCI
26 /* the qspan pci read routines can cause machine checks -- Cort 26 /* the qspan pci read routines can cause machine checks -- Cort
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 14ef17e10ec9..260a56b7602d 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -23,7 +23,6 @@ source "arch/powerpc/platforms/amigaone/Kconfig"
23 23
24config KVM_GUEST 24config KVM_GUEST
25 bool "KVM Guest support" 25 bool "KVM Guest support"
26 default n
27 select EPAPR_PARAVIRT 26 select EPAPR_PARAVIRT
28 ---help--- 27 ---help---
29 This option enables various optimizations for running under the KVM 28 This option enables various optimizations for running under the KVM
@@ -34,7 +33,6 @@ config KVM_GUEST
34 33
35config EPAPR_PARAVIRT 34config EPAPR_PARAVIRT
36 bool "ePAPR para-virtualization support" 35 bool "ePAPR para-virtualization support"
37 default n
38 help 36 help
39 Enables ePAPR para-virtualization support for guests. 37 Enables ePAPR para-virtualization support for guests.
40 38
@@ -74,7 +72,6 @@ config PPC_DT_CPU_FTRS
74config UDBG_RTAS_CONSOLE 72config UDBG_RTAS_CONSOLE
75 bool "RTAS based debug console" 73 bool "RTAS based debug console"
76 depends on PPC_RTAS 74 depends on PPC_RTAS
77 default n
78 75
79config PPC_SMP_MUXED_IPI 76config PPC_SMP_MUXED_IPI
80 bool 77 bool
@@ -86,16 +83,13 @@ config PPC_SMP_MUXED_IPI
86 83
87config IPIC 84config IPIC
88 bool 85 bool
89 default n
90 86
91config MPIC 87config MPIC
92 bool 88 bool
93 default n
94 89
95config MPIC_TIMER 90config MPIC_TIMER
96 bool "MPIC Global Timer" 91 bool "MPIC Global Timer"
97 depends on MPIC && FSL_SOC 92 depends on MPIC && FSL_SOC
98 default n
99 help 93 help
100 The MPIC global timer is a hardware timer inside the 94 The MPIC global timer is a hardware timer inside the
101 Freescale PIC complying with OpenPIC standard. When the 95 Freescale PIC complying with OpenPIC standard. When the
@@ -107,7 +101,6 @@ config MPIC_TIMER
107config FSL_MPIC_TIMER_WAKEUP 101config FSL_MPIC_TIMER_WAKEUP
108 tristate "Freescale MPIC global timer wakeup driver" 102 tristate "Freescale MPIC global timer wakeup driver"
109 depends on FSL_SOC && MPIC_TIMER && PM 103 depends on FSL_SOC && MPIC_TIMER && PM
110 default n
111 help 104 help
112 The driver provides a way to wake up the system by MPIC 105 The driver provides a way to wake up the system by MPIC
113 timer. 106 timer.
@@ -115,43 +108,35 @@ config FSL_MPIC_TIMER_WAKEUP
115 108
116config PPC_EPAPR_HV_PIC 109config PPC_EPAPR_HV_PIC
117 bool 110 bool
118 default n
119 select EPAPR_PARAVIRT 111 select EPAPR_PARAVIRT
120 112
121config MPIC_WEIRD 113config MPIC_WEIRD
122 bool 114 bool
123 default n
124 115
125config MPIC_MSGR 116config MPIC_MSGR
126 bool "MPIC message register support" 117 bool "MPIC message register support"
127 depends on MPIC 118 depends on MPIC
128 default n
129 help 119 help
130 Enables support for the MPIC message registers. These 120 Enables support for the MPIC message registers. These
131 registers are used for inter-processor communication. 121 registers are used for inter-processor communication.
132 122
133config PPC_I8259 123config PPC_I8259
134 bool 124 bool
135 default n
136 125
137config U3_DART 126config U3_DART
138 bool 127 bool
139 depends on PPC64 128 depends on PPC64
140 default n
141 129
142config PPC_RTAS 130config PPC_RTAS
143 bool 131 bool
144 default n
145 132
146config RTAS_ERROR_LOGGING 133config RTAS_ERROR_LOGGING
147 bool 134 bool
148 depends on PPC_RTAS 135 depends on PPC_RTAS
149 default n
150 136
151config PPC_RTAS_DAEMON 137config PPC_RTAS_DAEMON
152 bool 138 bool
153 depends on PPC_RTAS 139 depends on PPC_RTAS
154 default n
155 140
156config RTAS_PROC 141config RTAS_PROC
157 bool "Proc interface to RTAS" 142 bool "Proc interface to RTAS"
@@ -164,11 +149,9 @@ config RTAS_FLASH
164 149
165config MMIO_NVRAM 150config MMIO_NVRAM
166 bool 151 bool
167 default n
168 152
169config MPIC_U3_HT_IRQS 153config MPIC_U3_HT_IRQS
170 bool 154 bool
171 default n
172 155
173config MPIC_BROKEN_REGREAD 156config MPIC_BROKEN_REGREAD
174 bool 157 bool
@@ -187,15 +170,12 @@ config EEH
187 170
188config PPC_MPC106 171config PPC_MPC106
189 bool 172 bool
190 default n
191 173
192config PPC_970_NAP 174config PPC_970_NAP
193 bool 175 bool
194 default n
195 176
196config PPC_P7_NAP 177config PPC_P7_NAP
197 bool 178 bool
198 default n
199 179
200config PPC_INDIRECT_PIO 180config PPC_INDIRECT_PIO
201 bool 181 bool
@@ -295,7 +275,6 @@ config CPM2
295 275
296config FSL_ULI1575 276config FSL_ULI1575
297 bool 277 bool
298 default n
299 select GENERIC_ISA_DMA 278 select GENERIC_ISA_DMA
300 help 279 help
301 Supports for the ULI1575 PCIe south bridge that exists on some 280 Supports for the ULI1575 PCIe south bridge that exists on some
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 6c6a7c72cae4..f4e2c5729374 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,7 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2config PPC64 2config PPC64
3 bool "64-bit kernel" 3 bool "64-bit kernel"
4 default n
5 select ZLIB_DEFLATE 4 select ZLIB_DEFLATE
6 help 5 help
7 This option selects whether a 32-bit or a 64-bit kernel 6 This option selects whether a 32-bit or a 64-bit kernel
@@ -72,6 +71,7 @@ config PPC_BOOK3S_64
72 select PPC_HAVE_PMU_SUPPORT 71 select PPC_HAVE_PMU_SUPPORT
73 select SYS_SUPPORTS_HUGETLBFS 72 select SYS_SUPPORTS_HUGETLBFS
74 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 73 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
74 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
75 select ARCH_SUPPORTS_NUMA_BALANCING 75 select ARCH_SUPPORTS_NUMA_BALANCING
76 select IRQ_WORK 76 select IRQ_WORK
77 77
@@ -368,7 +368,6 @@ config PPC_MM_SLICES
368 bool 368 bool
369 default y if PPC_BOOK3S_64 369 default y if PPC_BOOK3S_64
370 default y if PPC_8xx && HUGETLB_PAGE 370 default y if PPC_8xx && HUGETLB_PAGE
371 default n
372 371
373config PPC_HAVE_PMU_SUPPORT 372config PPC_HAVE_PMU_SUPPORT
374 bool 373 bool
@@ -382,7 +381,6 @@ config PPC_PERF_CTRS
382config FORCE_SMP 381config FORCE_SMP
383 # Allow platforms to force SMP=y by selecting this 382 # Allow platforms to force SMP=y by selecting this
384 bool 383 bool
385 default n
386 select SMP 384 select SMP
387 385
388config SMP 386config SMP
@@ -423,7 +421,6 @@ config CHECK_CACHE_COHERENCY
423 421
424config PPC_DOORBELL 422config PPC_DOORBELL
425 bool 423 bool
426 default n
427 424
428endmenu 425endmenu
429 426
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index e46bb7ea710f..143d4417f6cc 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -1,7 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2 2
3subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
4
5obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o 3obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o
6 4
7obj-$(CONFIG_PPC_PMAC) += powermac/ 5obj-$(CONFIG_PPC_PMAC) += powermac/
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 9f5958f16923..4b2f114f3116 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -1,7 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2config PPC_CELL 2config PPC_CELL
3 bool 3 bool
4 default n
5 4
6config PPC_CELL_COMMON 5config PPC_CELL_COMMON
7 bool 6 bool
@@ -22,7 +21,6 @@ config PPC_CELL_NATIVE
22 select IBM_EMAC_RGMII if IBM_EMAC 21 select IBM_EMAC_RGMII if IBM_EMAC
23 select IBM_EMAC_ZMII if IBM_EMAC #test only 22 select IBM_EMAC_ZMII if IBM_EMAC #test only
24 select IBM_EMAC_TAH if IBM_EMAC #test only 23 select IBM_EMAC_TAH if IBM_EMAC #test only
25 default n
26 24
27config PPC_IBM_CELL_BLADE 25config PPC_IBM_CELL_BLADE
28 bool "IBM Cell Blade" 26 bool "IBM Cell Blade"
@@ -54,7 +52,6 @@ config SPU_FS
54 52
55config SPU_BASE 53config SPU_BASE
56 bool 54 bool
57 default n
58 select PPC_COPRO_BASE 55 select PPC_COPRO_BASE
59 56
60config CBE_RAS 57config CBE_RAS
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 882944c36ef5..5d8e8b6bb1cc 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -49,7 +49,7 @@ static int calc_freq(struct spu_gov_info_struct *info)
49 cpu = info->policy->cpu; 49 cpu = info->policy->cpu;
50 busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus); 50 busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
51 51
52 CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1); 52 info->busy_spus = calc_load(info->busy_spus, EXP, busy_spus * FIXED_1);
53 pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n", 53 pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
54 cpu, busy_spus, info->busy_spus); 54 cpu, busy_spus, info->busy_spus);
55 55
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 0c45cdbac4cf..7f12c7b78c0f 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -50,11 +50,11 @@ struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
50EXPORT_SYMBOL_GPL(cbe_spu_info); 50EXPORT_SYMBOL_GPL(cbe_spu_info);
51 51
52/* 52/*
53 * The spufs fault-handling code needs to call force_sig_info to raise signals 53 * The spufs fault-handling code needs to call force_sig_fault to raise signals
54 * on DMA errors. Export it here to avoid general kernel-wide access to this 54 * on DMA errors. Export it here to avoid general kernel-wide access to this
55 * function 55 * function
56 */ 56 */
57EXPORT_SYMBOL_GPL(force_sig_info); 57EXPORT_SYMBOL_GPL(force_sig_fault);
58 58
59/* 59/*
60 * Protects cbe_spu_info and spu->number. 60 * Protects cbe_spu_info and spu->number.
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 5c409c98cca8..f7e36373f6e0 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -180,35 +180,22 @@ out:
180 180
181static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) 181static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
182{ 182{
183 struct of_phandle_args oirq;
184 int ret;
185 int i; 183 int i;
186 184
187 for (i=0; i < 3; i++) { 185 for (i=0; i < 3; i++) {
188 ret = of_irq_parse_one(np, i, &oirq); 186 spu->irqs[i] = irq_of_parse_and_map(np, i);
189 if (ret) { 187 if (!spu->irqs[i])
190 pr_debug("spu_new: failed to get irq %d\n", i);
191 goto err;
192 }
193 ret = -EINVAL;
194 pr_debug(" irq %d no 0x%x on %pOF\n", i, oirq.args[0],
195 oirq.np);
196 spu->irqs[i] = irq_create_of_mapping(&oirq);
197 if (!spu->irqs[i]) {
198 pr_debug("spu_new: failed to map it !\n");
199 goto err; 188 goto err;
200 }
201 } 189 }
202 return 0; 190 return 0;
203 191
204err: 192err:
205 pr_debug("failed to map irq %x for spu %s\n", *oirq.args, 193 pr_debug("failed to map irq %x for spu %s\n", i, spu->name);
206 spu->name);
207 for (; i >= 0; i--) { 194 for (; i >= 0; i--) {
208 if (spu->irqs[i]) 195 if (spu->irqs[i])
209 irq_dispose_mapping(spu->irqs[i]); 196 irq_dispose_mapping(spu->irqs[i]);
210 } 197 }
211 return ret; 198 return -EINVAL;
212} 199}
213 200
214static int spu_map_resource(struct spu *spu, int nr, 201static int spu_map_resource(struct spu *spu, int nr,
@@ -295,8 +282,8 @@ static int __init of_enumerate_spus(int (*fn)(void *data))
295 for_each_node_by_type(node, "spe") { 282 for_each_node_by_type(node, "spe") {
296 ret = fn(node); 283 ret = fn(node);
297 if (ret) { 284 if (ret) {
298 printk(KERN_WARNING "%s: Error initializing %s\n", 285 printk(KERN_WARNING "%s: Error initializing %pOFn\n",
299 __func__, node->name); 286 __func__, node);
300 of_node_put(node); 287 of_node_put(node);
301 break; 288 break;
302 } 289 }
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 83cf58daaa79..971ac43b5d60 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -36,42 +36,32 @@
36static void spufs_handle_event(struct spu_context *ctx, 36static void spufs_handle_event(struct spu_context *ctx,
37 unsigned long ea, int type) 37 unsigned long ea, int type)
38{ 38{
39 siginfo_t info;
40
41 if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) { 39 if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
42 ctx->event_return |= type; 40 ctx->event_return |= type;
43 wake_up_all(&ctx->stop_wq); 41 wake_up_all(&ctx->stop_wq);
44 return; 42 return;
45 } 43 }
46 44
47 clear_siginfo(&info);
48
49 switch (type) { 45 switch (type) {
50 case SPE_EVENT_INVALID_DMA: 46 case SPE_EVENT_INVALID_DMA:
51 info.si_signo = SIGBUS; 47 force_sig_fault(SIGBUS, BUS_OBJERR, NULL, current);
52 info.si_code = BUS_OBJERR;
53 break; 48 break;
54 case SPE_EVENT_SPE_DATA_STORAGE: 49 case SPE_EVENT_SPE_DATA_STORAGE:
55 info.si_signo = SIGSEGV;
56 info.si_addr = (void __user *)ea;
57 info.si_code = SEGV_ACCERR;
58 ctx->ops->restart_dma(ctx); 50 ctx->ops->restart_dma(ctx);
51 force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea,
52 current);
59 break; 53 break;
60 case SPE_EVENT_DMA_ALIGNMENT: 54 case SPE_EVENT_DMA_ALIGNMENT:
61 info.si_signo = SIGBUS;
62 /* DAR isn't set for an alignment fault :( */ 55 /* DAR isn't set for an alignment fault :( */
63 info.si_code = BUS_ADRALN; 56 force_sig_fault(SIGBUS, BUS_ADRALN, NULL, current);
64 break; 57 break;
65 case SPE_EVENT_SPE_ERROR: 58 case SPE_EVENT_SPE_ERROR:
66 info.si_signo = SIGILL; 59 force_sig_fault(
67 info.si_addr = (void __user *)(unsigned long) 60 SIGILL, ILL_ILLOPC,
68 ctx->ops->npc_read(ctx) - 4; 61 (void __user *)(unsigned long)
69 info.si_code = ILL_ILLOPC; 62 ctx->ops->npc_read(ctx) - 4, current);
70 break; 63 break;
71 } 64 }
72
73 if (info.si_signo)
74 force_sig_info(info.si_signo, &info, current);
75} 65}
76 66
77int spufs_handle_class0(struct spu_context *ctx) 67int spufs_handle_class0(struct spu_context *ctx)
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index c9ef3c532169..9fcccb4490b9 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -987,9 +987,9 @@ static void spu_calc_load(void)
987 unsigned long active_tasks; /* fixed-point */ 987 unsigned long active_tasks; /* fixed-point */
988 988
989 active_tasks = count_active_contexts() * FIXED_1; 989 active_tasks = count_active_contexts() * FIXED_1;
990 CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks); 990 spu_avenrun[0] = calc_load(spu_avenrun[0], EXP_1, active_tasks);
991 CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks); 991 spu_avenrun[1] = calc_load(spu_avenrun[1], EXP_5, active_tasks);
992 CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); 992 spu_avenrun[2] = calc_load(spu_avenrun[2], EXP_15, active_tasks);
993} 993}
994 994
995static void spusched_wake(struct timer_list *unused) 995static void spusched_wake(struct timer_list *unused)
@@ -1071,9 +1071,6 @@ void spuctx_switch_state(struct spu_context *ctx,
1071 } 1071 }
1072} 1072}
1073 1073
1074#define LOAD_INT(x) ((x) >> FSHIFT)
1075#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
1076
1077static int show_spu_loadavg(struct seq_file *s, void *private) 1074static int show_spu_loadavg(struct seq_file *s, void *private)
1078{ 1075{
1079 int a, b, c; 1076 int a, b, c;
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 403523c061ba..ecf703ee3a76 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -112,7 +112,7 @@ static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible)
112 } 112 }
113 error = of_address_to_resource(np, 0, &res); 113 error = of_address_to_resource(np, 0, &res);
114 if (error) { 114 if (error) {
115 pr_err("no valid reg found for %s\n", np->name); 115 pr_err("no valid reg found for %pOFn\n", np);
116 goto out_put; 116 goto out_put;
117 } 117 }
118 118
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
index 376d0be36b66..2601fac50354 100644
--- a/arch/powerpc/platforms/maple/Kconfig
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -13,7 +13,6 @@ config PPC_MAPLE
13 select PPC_RTAS 13 select PPC_RTAS
14 select MMIO_NVRAM 14 select MMIO_NVRAM
15 select ATA_NONSTANDARD if ATA 15 select ATA_NONSTANDARD if ATA
16 default n
17 help 16 help
18 This option enables support for the Maple 970FX Evaluation Board. 17 This option enables support for the Maple 970FX Evaluation Board.
19 For more information, refer to <http://www.970eval.com> 18 For more information, refer to <http://www.970eval.com>
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
index d458a791d35b..98e3bc22bebc 100644
--- a/arch/powerpc/platforms/pasemi/Kconfig
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -2,7 +2,6 @@
2config PPC_PASEMI 2config PPC_PASEMI
3 depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN 3 depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
4 bool "PA Semi SoC-based platforms" 4 bool "PA Semi SoC-based platforms"
5 default n
6 select MPIC 5 select MPIC
7 select PCI 6 select PCI
8 select PPC_UDBG_16550 7 select PPC_UDBG_16550
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index c80f72c370ae..53384eb42a76 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -576,7 +576,7 @@ int pasemi_dma_init(void)
576 res.start = 0xfd800000; 576 res.start = 0xfd800000;
577 res.end = res.start + 0x1000; 577 res.end = res.start + 0x1000;
578 } 578 }
579 dma_status = __ioremap(res.start, resource_size(&res), 0); 579 dma_status = ioremap_cache(res.start, resource_size(&res));
580 pci_dev_put(iob_pdev); 580 pci_dev_put(iob_pdev);
581 581
582 for (i = 0; i < MAX_TXCH; i++) 582 for (i = 0; i < MAX_TXCH; i++)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index f06c83f321e6..f2971522fb4a 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -213,7 +213,7 @@ static int __init iob_init(struct device_node *dn)
213 pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base); 213 pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base);
214 214
215 /* Allocate a spare page to map all invalid IOTLB pages. */ 215 /* Allocate a spare page to map all invalid IOTLB pages. */
216 tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE); 216 tmp = memblock_phys_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
217 if (!tmp) 217 if (!tmp)
218 panic("IOBMAP: Cannot allocate spare page!"); 218 panic("IOBMAP: Cannot allocate spare page!");
219 /* Empty l1 is marked invalid */ 219 /* Empty l1 is marked invalid */
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index f2839eed0f89..923bfb340433 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,9 +1,10 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2CFLAGS_bootx_init.o += -fPIC 2CFLAGS_bootx_init.o += -fPIC
3CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector)
3 4
4ifdef CONFIG_FUNCTION_TRACER 5ifdef CONFIG_FUNCTION_TRACER
5# Do not trace early boot code 6# Do not trace early boot code
6CFLAGS_REMOVE_bootx_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 7CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
7endif 8endif
8 9
9obj-y += pic.o setup.o time.o feature.o pci.o \ 10obj-y += pic.o setup.o time.o feature.o pci.o \
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 4eb8cb38fc69..ed2f54b3f173 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1049,7 +1049,6 @@ core99_reset_cpu(struct device_node *node, long param, long value)
1049 unsigned long flags; 1049 unsigned long flags;
1050 struct macio_chip *macio; 1050 struct macio_chip *macio;
1051 struct device_node *np; 1051 struct device_node *np;
1052 struct device_node *cpus;
1053 const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0, 1052 const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0,
1054 KL_GPIO_RESET_CPU1, 1053 KL_GPIO_RESET_CPU1,
1055 KL_GPIO_RESET_CPU2, 1054 KL_GPIO_RESET_CPU2,
@@ -1059,10 +1058,7 @@ core99_reset_cpu(struct device_node *node, long param, long value)
1059 if (macio->type != macio_keylargo) 1058 if (macio->type != macio_keylargo)
1060 return -ENODEV; 1059 return -ENODEV;
1061 1060
1062 cpus = of_find_node_by_path("/cpus"); 1061 for_each_of_cpu_node(np) {
1063 if (cpus == NULL)
1064 return -ENODEV;
1065 for (np = cpus->child; np != NULL; np = np->sibling) {
1066 const u32 *num = of_get_property(np, "reg", NULL); 1062 const u32 *num = of_get_property(np, "reg", NULL);
1067 const u32 *rst = of_get_property(np, "soft-reset", NULL); 1063 const u32 *rst = of_get_property(np, "soft-reset", NULL);
1068 if (num == NULL || rst == NULL) 1064 if (num == NULL || rst == NULL)
@@ -1072,7 +1068,6 @@ core99_reset_cpu(struct device_node *node, long param, long value)
1072 break; 1068 break;
1073 } 1069 }
1074 } 1070 }
1075 of_node_put(cpus);
1076 if (np == NULL || reset_io == 0) 1071 if (np == NULL || reset_io == 0)
1077 reset_io = dflt_reset_lines[param]; 1072 reset_io = dflt_reset_lines[param];
1078 1073
@@ -1504,16 +1499,12 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
1504 unsigned long flags; 1499 unsigned long flags;
1505 struct macio_chip *macio; 1500 struct macio_chip *macio;
1506 struct device_node *np; 1501 struct device_node *np;
1507 struct device_node *cpus;
1508 1502
1509 macio = &macio_chips[0]; 1503 macio = &macio_chips[0];
1510 if (macio->type != macio_keylargo2 && macio->type != macio_shasta) 1504 if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
1511 return -ENODEV; 1505 return -ENODEV;
1512 1506
1513 cpus = of_find_node_by_path("/cpus"); 1507 for_each_of_cpu_node(np) {
1514 if (cpus == NULL)
1515 return -ENODEV;
1516 for (np = cpus->child; np != NULL; np = np->sibling) {
1517 const u32 *num = of_get_property(np, "reg", NULL); 1508 const u32 *num = of_get_property(np, "reg", NULL);
1518 const u32 *rst = of_get_property(np, "soft-reset", NULL); 1509 const u32 *rst = of_get_property(np, "soft-reset", NULL);
1519 if (num == NULL || rst == NULL) 1510 if (num == NULL || rst == NULL)
@@ -1523,7 +1514,6 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
1523 break; 1514 break;
1524 } 1515 }
1525 } 1516 }
1526 of_node_put(cpus);
1527 if (np == NULL || reset_io == 0) 1517 if (np == NULL || reset_io == 0)
1528 return -ENODEV; 1518 return -ENODEV;
1529 1519
@@ -2515,31 +2505,26 @@ found:
2515 * supposed to be set when not supported, but I'm not very confident 2505 * supposed to be set when not supported, but I'm not very confident
2516 * that all Apple OF revs did it properly, I do it the paranoid way. 2506 * that all Apple OF revs did it properly, I do it the paranoid way.
2517 */ 2507 */
2518 while (uninorth_base && uninorth_rev > 3) { 2508 if (uninorth_base && uninorth_rev > 3) {
2519 struct device_node *cpus = of_find_node_by_path("/cpus");
2520 struct device_node *np; 2509 struct device_node *np;
2521 2510
2522 if (!cpus || !cpus->child) { 2511 for_each_of_cpu_node(np) {
2523 printk(KERN_WARNING "Can't find CPU(s) in device tree !\n"); 2512 int cpu_count = 1;
2524 of_node_put(cpus); 2513
2525 break; 2514 /* Nap mode not supported on SMP */
2526 } 2515 if (of_get_property(np, "flush-on-lock", NULL) ||
2527 np = cpus->child; 2516 (cpu_count > 1)) {
2528 /* Nap mode not supported on SMP */ 2517 powersave_nap = 0;
2529 if (np->sibling) { 2518 of_node_put(np);
2530 of_node_put(cpus); 2519 break;
2531 break; 2520 }
2532 } 2521
2533 /* Nap mode not supported if flush-on-lock property is present */ 2522 cpu_count++;
2534 if (of_get_property(np, "flush-on-lock", NULL)) { 2523 powersave_nap = 1;
2535 of_node_put(cpus);
2536 break;
2537 } 2524 }
2538 of_node_put(cpus);
2539 powersave_nap = 1;
2540 printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
2541 break;
2542 } 2525 }
2526 if (powersave_nap)
2527 printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
2543 2528
2544 /* On CPUs that support it (750FX), lowspeed by default during 2529 /* On CPUs that support it (750FX), lowspeed by default during
2545 * NAP mode 2530 * NAP mode
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 60b03a1703d1..ae54d7fe68f3 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -18,7 +18,7 @@
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/adb.h> 19#include <linux/adb.h>
20#include <linux/pmu.h> 20#include <linux/pmu.h>
21#include <linux/bootmem.h> 21#include <linux/memblock.h>
22#include <linux/completion.h> 22#include <linux/completion.h>
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <asm/sections.h> 24#include <asm/sections.h>
@@ -513,7 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
513 printk(KERN_ERR "nvram: no address\n"); 513 printk(KERN_ERR "nvram: no address\n");
514 return -EINVAL; 514 return -EINVAL;
515 } 515 }
516 nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0); 516 nvram_image = memblock_alloc(NVRAM_SIZE, SMP_CACHE_BYTES);
517 nvram_data = ioremap(addr, NVRAM_SIZE*2); 517 nvram_data = ioremap(addr, NVRAM_SIZE*2);
518 nvram_naddrs = 1; /* Make sure we get the correct case */ 518 nvram_naddrs = 1; /* Make sure we get the correct case */
519 519
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 3a529fcdae97..2f00e3daafb0 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -243,10 +243,9 @@ static void __init l2cr_init(void)
243{ 243{
244 /* Checks "l2cr-value" property in the registry */ 244 /* Checks "l2cr-value" property in the registry */
245 if (cpu_has_feature(CPU_FTR_L2CR)) { 245 if (cpu_has_feature(CPU_FTR_L2CR)) {
246 struct device_node *np = of_find_node_by_name(NULL, "cpus"); 246 struct device_node *np;
247 if (!np) 247
248 np = of_find_node_by_type(NULL, "cpu"); 248 for_each_of_cpu_node(np) {
249 if (np) {
250 const unsigned int *l2cr = 249 const unsigned int *l2cr =
251 of_get_property(np, "l2cr-value", NULL); 250 of_get_property(np, "l2cr-value", NULL);
252 if (l2cr) { 251 if (l2cr) {
@@ -256,6 +255,7 @@ static void __init l2cr_init(void)
256 _set_L2CR(ppc_override_l2cr_value); 255 _set_L2CR(ppc_override_l2cr_value);
257 } 256 }
258 of_node_put(np); 257 of_node_put(np);
258 break;
259 } 259 }
260 } 260 }
261 261
@@ -279,8 +279,8 @@ static void __init pmac_setup_arch(void)
279 /* Set loops_per_jiffy to a half-way reasonable value, 279 /* Set loops_per_jiffy to a half-way reasonable value,
280 for use until calibrate_delay gets called. */ 280 for use until calibrate_delay gets called. */
281 loops_per_jiffy = 50000000 / HZ; 281 loops_per_jiffy = 50000000 / HZ;
282 cpu = of_find_node_by_type(NULL, "cpu"); 282
283 if (cpu != NULL) { 283 for_each_of_cpu_node(cpu) {
284 fp = of_get_property(cpu, "clock-frequency", NULL); 284 fp = of_get_property(cpu, "clock-frequency", NULL);
285 if (fp != NULL) { 285 if (fp != NULL) {
286 if (pvr >= 0x30 && pvr < 0x80) 286 if (pvr >= 0x30 && pvr < 0x80)
@@ -292,8 +292,9 @@ static void __init pmac_setup_arch(void)
292 else 292 else
293 /* 601, 603, etc. */ 293 /* 601, 603, etc. */
294 loops_per_jiffy = *fp / (2 * HZ); 294 loops_per_jiffy = *fp / (2 * HZ);
295 of_node_put(cpu);
296 break;
295 } 297 }
296 of_node_put(cpu);
297 } 298 }
298 299
299 /* See if newworld or oldworld */ 300 /* See if newworld or oldworld */
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index f92c1918fb56..f157e3d071f2 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -45,13 +45,6 @@
45#endif 45#endif
46 46
47/* 47/*
48 * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU
49 * times wrap in 2040. If we need to handle later times, the read_time functions
50 * need to be changed to interpret wrapped times as post-2040.
51 */
52#define RTC_OFFSET 2082844800
53
54/*
55 * Calibrate the decrementer frequency with the VIA timer 1. 48 * Calibrate the decrementer frequency with the VIA timer 1.
56 */ 49 */
57#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */ 50#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */
@@ -90,98 +83,6 @@ long __init pmac_time_init(void)
90 return delta; 83 return delta;
91} 84}
92 85
93#ifdef CONFIG_ADB_CUDA
94static time64_t cuda_get_time(void)
95{
96 struct adb_request req;
97 time64_t now;
98
99 if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
100 return 0;
101 while (!req.complete)
102 cuda_poll();
103 if (req.reply_len != 7)
104 printk(KERN_ERR "cuda_get_time: got %d byte reply\n",
105 req.reply_len);
106 now = (u32)((req.reply[3] << 24) + (req.reply[4] << 16) +
107 (req.reply[5] << 8) + req.reply[6]);
108 /* it's either after year 2040, or the RTC has gone backwards */
109 WARN_ON(now < RTC_OFFSET);
110
111 return now - RTC_OFFSET;
112}
113
114#define cuda_get_rtc_time(tm) rtc_time64_to_tm(cuda_get_time(), (tm))
115
116static int cuda_set_rtc_time(struct rtc_time *tm)
117{
118 u32 nowtime;
119 struct adb_request req;
120
121 nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
122 if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
123 nowtime >> 24, nowtime >> 16, nowtime >> 8,
124 nowtime) < 0)
125 return -ENXIO;
126 while (!req.complete)
127 cuda_poll();
128 if ((req.reply_len != 3) && (req.reply_len != 7))
129 printk(KERN_ERR "cuda_set_rtc_time: got %d byte reply\n",
130 req.reply_len);
131 return 0;
132}
133
134#else
135#define cuda_get_time() 0
136#define cuda_get_rtc_time(tm)
137#define cuda_set_rtc_time(tm) 0
138#endif
139
140#ifdef CONFIG_ADB_PMU
141static time64_t pmu_get_time(void)
142{
143 struct adb_request req;
144 time64_t now;
145
146 if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
147 return 0;
148 pmu_wait_complete(&req);
149 if (req.reply_len != 4)
150 printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n",
151 req.reply_len);
152 now = (u32)((req.reply[0] << 24) + (req.reply[1] << 16) +
153 (req.reply[2] << 8) + req.reply[3]);
154
155 /* it's either after year 2040, or the RTC has gone backwards */
156 WARN_ON(now < RTC_OFFSET);
157
158 return now - RTC_OFFSET;
159}
160
161#define pmu_get_rtc_time(tm) rtc_time64_to_tm(pmu_get_time(), (tm))
162
163static int pmu_set_rtc_time(struct rtc_time *tm)
164{
165 u32 nowtime;
166 struct adb_request req;
167
168 nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
169 if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24,
170 nowtime >> 16, nowtime >> 8, nowtime) < 0)
171 return -ENXIO;
172 pmu_wait_complete(&req);
173 if (req.reply_len != 0)
174 printk(KERN_ERR "pmu_set_rtc_time: %d byte reply from PMU\n",
175 req.reply_len);
176 return 0;
177}
178
179#else
180#define pmu_get_time() 0
181#define pmu_get_rtc_time(tm)
182#define pmu_set_rtc_time(tm) 0
183#endif
184
185#ifdef CONFIG_PMAC_SMU 86#ifdef CONFIG_PMAC_SMU
186static time64_t smu_get_time(void) 87static time64_t smu_get_time(void)
187{ 88{
@@ -191,11 +92,6 @@ static time64_t smu_get_time(void)
191 return 0; 92 return 0;
192 return rtc_tm_to_time64(&tm); 93 return rtc_tm_to_time64(&tm);
193} 94}
194
195#else
196#define smu_get_time() 0
197#define smu_get_rtc_time(tm, spin)
198#define smu_set_rtc_time(tm, spin) 0
199#endif 95#endif
200 96
201/* Can't be __init, it's called when suspending and resuming */ 97/* Can't be __init, it's called when suspending and resuming */
@@ -203,12 +99,18 @@ time64_t pmac_get_boot_time(void)
203{ 99{
204 /* Get the time from the RTC, used only at boot time */ 100 /* Get the time from the RTC, used only at boot time */
205 switch (sys_ctrler) { 101 switch (sys_ctrler) {
102#ifdef CONFIG_ADB_CUDA
206 case SYS_CTRLER_CUDA: 103 case SYS_CTRLER_CUDA:
207 return cuda_get_time(); 104 return cuda_get_time();
105#endif
106#ifdef CONFIG_ADB_PMU
208 case SYS_CTRLER_PMU: 107 case SYS_CTRLER_PMU:
209 return pmu_get_time(); 108 return pmu_get_time();
109#endif
110#ifdef CONFIG_PMAC_SMU
210 case SYS_CTRLER_SMU: 111 case SYS_CTRLER_SMU:
211 return smu_get_time(); 112 return smu_get_time();
113#endif
212 default: 114 default:
213 return 0; 115 return 0;
214 } 116 }
@@ -218,15 +120,21 @@ void pmac_get_rtc_time(struct rtc_time *tm)
218{ 120{
219 /* Get the time from the RTC, used only at boot time */ 121 /* Get the time from the RTC, used only at boot time */
220 switch (sys_ctrler) { 122 switch (sys_ctrler) {
123#ifdef CONFIG_ADB_CUDA
221 case SYS_CTRLER_CUDA: 124 case SYS_CTRLER_CUDA:
222 cuda_get_rtc_time(tm); 125 rtc_time64_to_tm(cuda_get_time(), tm);
223 break; 126 break;
127#endif
128#ifdef CONFIG_ADB_PMU
224 case SYS_CTRLER_PMU: 129 case SYS_CTRLER_PMU:
225 pmu_get_rtc_time(tm); 130 rtc_time64_to_tm(pmu_get_time(), tm);
226 break; 131 break;
132#endif
133#ifdef CONFIG_PMAC_SMU
227 case SYS_CTRLER_SMU: 134 case SYS_CTRLER_SMU:
228 smu_get_rtc_time(tm, 1); 135 smu_get_rtc_time(tm, 1);
229 break; 136 break;
137#endif
230 default: 138 default:
231 ; 139 ;
232 } 140 }
@@ -235,12 +143,18 @@ void pmac_get_rtc_time(struct rtc_time *tm)
235int pmac_set_rtc_time(struct rtc_time *tm) 143int pmac_set_rtc_time(struct rtc_time *tm)
236{ 144{
237 switch (sys_ctrler) { 145 switch (sys_ctrler) {
146#ifdef CONFIG_ADB_CUDA
238 case SYS_CTRLER_CUDA: 147 case SYS_CTRLER_CUDA:
239 return cuda_set_rtc_time(tm); 148 return cuda_set_rtc_time(tm);
149#endif
150#ifdef CONFIG_ADB_PMU
240 case SYS_CTRLER_PMU: 151 case SYS_CTRLER_PMU:
241 return pmu_set_rtc_time(tm); 152 return pmu_set_rtc_time(tm);
153#endif
154#ifdef CONFIG_PMAC_SMU
242 case SYS_CTRLER_SMU: 155 case SYS_CTRLER_SMU:
243 return smu_set_rtc_time(tm, 1); 156 return smu_set_rtc_time(tm, 1);
157#endif
244 default: 158 default:
245 return -ENODEV; 159 return -ENODEV;
246 } 160 }
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index f8dc98d3dc01..99083fe992d5 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -15,11 +15,6 @@ config PPC_POWERNV
15 select PPC_SCOM 15 select PPC_SCOM
16 select ARCH_RANDOM 16 select ARCH_RANDOM
17 select CPU_FREQ 17 select CPU_FREQ
18 select CPU_FREQ_GOV_PERFORMANCE
19 select CPU_FREQ_GOV_POWERSAVE
20 select CPU_FREQ_GOV_USERSPACE
21 select CPU_FREQ_GOV_ONDEMAND
22 select CPU_FREQ_GOV_CONSERVATIVE
23 select PPC_DOORBELL 18 select PPC_DOORBELL
24 select MMU_NOTIFIER 19 select MMU_NOTIFIER
25 select FORCE_SMP 20 select FORCE_SMP
@@ -35,7 +30,6 @@ config OPAL_PRD
35config PPC_MEMTRACE 30config PPC_MEMTRACE
36 bool "Enable removal of RAM from kernel mappings for tracing" 31 bool "Enable removal of RAM from kernel mappings for tracing"
37 depends on PPC_POWERNV && MEMORY_HOTREMOVE 32 depends on PPC_POWERNV && MEMORY_HOTREMOVE
38 default n
39 help 33 help
40 Enabling this option allows for the removal of memory (RAM) 34 Enabling this option allows for the removal of memory (RAM)
41 from the kernel mappings to be used for hardware tracing. 35 from the kernel mappings to be used for hardware tracing.
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3c1beae29f2d..abc0be7507c8 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -223,14 +223,6 @@ int pnv_eeh_post_init(void)
223 eeh_probe_devices(); 223 eeh_probe_devices();
224 eeh_addr_cache_build(); 224 eeh_addr_cache_build();
225 225
226 if (eeh_has_flag(EEH_POSTPONED_PROBE)) {
227 eeh_clear_flag(EEH_POSTPONED_PROBE);
228 if (eeh_enabled())
229 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
230 else
231 pr_info("EEH: No capable adapters found\n");
232 }
233
234 /* Register OPAL event notifier */ 226 /* Register OPAL event notifier */
235 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); 227 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
236 if (eeh_event_irq < 0) { 228 if (eeh_event_irq < 0) {
@@ -391,12 +383,6 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
391 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) 383 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
392 return NULL; 384 return NULL;
393 385
394 /* Skip if we haven't probed yet */
395 if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) {
396 eeh_add_flag(EEH_POSTPONED_PROBE);
397 return NULL;
398 }
399
400 /* Initialize eeh device */ 386 /* Initialize eeh device */
401 edev->class_code = pdn->class_code; 387 edev->class_code = pdn->class_code;
402 edev->mode &= 0xFFFFFF00; 388 edev->mode &= 0xFFFFFF00;
@@ -604,7 +590,7 @@ static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
604 EEH_STATE_MMIO_ENABLED | 590 EEH_STATE_MMIO_ENABLED |
605 EEH_STATE_DMA_ENABLED); 591 EEH_STATE_DMA_ENABLED);
606 } else if (!(pe->state & EEH_PE_ISOLATED)) { 592 } else if (!(pe->state & EEH_PE_ISOLATED)) {
607 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 593 eeh_pe_mark_isolated(pe);
608 pnv_eeh_get_phb_diag(pe); 594 pnv_eeh_get_phb_diag(pe);
609 595
610 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 596 if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
@@ -706,7 +692,7 @@ static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
706 if (phb->freeze_pe) 692 if (phb->freeze_pe)
707 phb->freeze_pe(phb, pe->addr); 693 phb->freeze_pe(phb, pe->addr);
708 694
709 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 695 eeh_pe_mark_isolated(pe);
710 pnv_eeh_get_phb_diag(pe); 696 pnv_eeh_get_phb_diag(pe);
711 697
712 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 698 if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
@@ -1054,7 +1040,7 @@ static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
1054 int ret; 1040 int ret;
1055 1041
1056 /* The VF PE should have only one child device */ 1042 /* The VF PE should have only one child device */
1057 edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); 1043 edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
1058 pdn = eeh_dev_to_pdn(edev); 1044 pdn = eeh_dev_to_pdn(edev);
1059 if (!pdn) 1045 if (!pdn)
1060 return -ENXIO; 1046 return -ENXIO;
@@ -1148,43 +1134,6 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
1148} 1134}
1149 1135
1150/** 1136/**
1151 * pnv_eeh_wait_state - Wait for PE state
1152 * @pe: EEH PE
1153 * @max_wait: maximal period in millisecond
1154 *
1155 * Wait for the state of associated PE. It might take some time
1156 * to retrieve the PE's state.
1157 */
1158static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
1159{
1160 int ret;
1161 int mwait;
1162
1163 while (1) {
1164 ret = pnv_eeh_get_state(pe, &mwait);
1165
1166 /*
1167 * If the PE's state is temporarily unavailable,
1168 * we have to wait for the specified time. Otherwise,
1169 * the PE's state will be returned immediately.
1170 */
1171 if (ret != EEH_STATE_UNAVAILABLE)
1172 return ret;
1173
1174 if (max_wait <= 0) {
1175 pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
1176 __func__, pe->addr, max_wait);
1177 return EEH_STATE_NOT_SUPPORT;
1178 }
1179
1180 max_wait -= mwait;
1181 msleep(mwait);
1182 }
1183
1184 return EEH_STATE_NOT_SUPPORT;
1185}
1186
1187/**
1188 * pnv_eeh_get_log - Retrieve error log 1137 * pnv_eeh_get_log - Retrieve error log
1189 * @pe: EEH PE 1138 * @pe: EEH PE
1190 * @severity: temporary or permanent error log 1139 * @severity: temporary or permanent error log
@@ -1611,7 +1560,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
1611 if ((ret == EEH_NEXT_ERR_FROZEN_PE || 1560 if ((ret == EEH_NEXT_ERR_FROZEN_PE ||
1612 ret == EEH_NEXT_ERR_FENCED_PHB) && 1561 ret == EEH_NEXT_ERR_FENCED_PHB) &&
1613 !((*pe)->state & EEH_PE_ISOLATED)) { 1562 !((*pe)->state & EEH_PE_ISOLATED)) {
1614 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); 1563 eeh_pe_mark_isolated(*pe);
1615 pnv_eeh_get_phb_diag(*pe); 1564 pnv_eeh_get_phb_diag(*pe);
1616 1565
1617 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 1566 if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
@@ -1640,7 +1589,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
1640 } 1589 }
1641 1590
1642 /* We possibly migrate to another PE */ 1591 /* We possibly migrate to another PE */
1643 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); 1592 eeh_pe_mark_isolated(*pe);
1644 } 1593 }
1645 1594
1646 /* 1595 /*
@@ -1702,7 +1651,6 @@ static struct eeh_ops pnv_eeh_ops = {
1702 .get_pe_addr = pnv_eeh_get_pe_addr, 1651 .get_pe_addr = pnv_eeh_get_pe_addr,
1703 .get_state = pnv_eeh_get_state, 1652 .get_state = pnv_eeh_get_state,
1704 .reset = pnv_eeh_reset, 1653 .reset = pnv_eeh_reset,
1705 .wait_state = pnv_eeh_wait_state,
1706 .get_log = pnv_eeh_get_log, 1654 .get_log = pnv_eeh_get_log,
1707 .configure_bridge = pnv_eeh_configure_bridge, 1655 .configure_bridge = pnv_eeh_configure_bridge,
1708 .err_inject = pnv_eeh_err_inject, 1656 .err_inject = pnv_eeh_err_inject,
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 51dc398ae3f7..84d038ed3882 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -70,6 +70,7 @@ static int change_memblock_state(struct memory_block *mem, void *arg)
70 return 0; 70 return 0;
71} 71}
72 72
73/* called with device_hotplug_lock held */
73static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) 74static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
74{ 75{
75 u64 end_pfn = start_pfn + nr_pages - 1; 76 u64 end_pfn = start_pfn + nr_pages - 1;
@@ -90,17 +91,15 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
90 walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, 91 walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
91 change_memblock_state); 92 change_memblock_state);
92 93
93 lock_device_hotplug();
94 remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
95 unlock_device_hotplug();
96 94
97 return true; 95 return true;
98} 96}
99 97
100static u64 memtrace_alloc_node(u32 nid, u64 size) 98static u64 memtrace_alloc_node(u32 nid, u64 size)
101{ 99{
102 u64 start_pfn, end_pfn, nr_pages; 100 u64 start_pfn, end_pfn, nr_pages, pfn;
103 u64 base_pfn; 101 u64 base_pfn;
102 u64 bytes = memory_block_size_bytes();
104 103
105 if (!node_spanned_pages(nid)) 104 if (!node_spanned_pages(nid))
106 return 0; 105 return 0;
@@ -112,10 +111,24 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
112 /* Trace memory needs to be aligned to the size */ 111 /* Trace memory needs to be aligned to the size */
113 end_pfn = round_down(end_pfn - nr_pages, nr_pages); 112 end_pfn = round_down(end_pfn - nr_pages, nr_pages);
114 113
114 lock_device_hotplug();
115 for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { 115 for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
116 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) 116 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
117 /*
118 * Remove memory in memory block size chunks so that
119 * iomem resources are always split to the same size and
120 * we never try to remove memory that spans two iomem
121 * resources.
122 */
123 end_pfn = base_pfn + nr_pages;
124 for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
125 __remove_memory(nid, pfn << PAGE_SHIFT, bytes);
126 }
127 unlock_device_hotplug();
117 return base_pfn << PAGE_SHIFT; 128 return base_pfn << PAGE_SHIFT;
129 }
118 } 130 }
131 unlock_device_hotplug();
119 132
120 return 0; 133 return 0;
121} 134}
@@ -231,9 +244,11 @@ static int memtrace_online(void)
231 * we need to online the memory ourselves. 244 * we need to online the memory ourselves.
232 */ 245 */
233 if (!memhp_auto_online) { 246 if (!memhp_auto_online) {
247 lock_device_hotplug();
234 walk_memory_range(PFN_DOWN(ent->start), 248 walk_memory_range(PFN_DOWN(ent->start),
235 PFN_UP(ent->start + ent->size - 1), 249 PFN_UP(ent->start + ent->size - 1),
236 NULL, online_mem_block); 250 NULL, online_mem_block);
251 unlock_device_hotplug();
237 } 252 }
238 253
239 /* 254 /*
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 8006c54a91e3..6f60e0931922 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -17,7 +17,7 @@
17#include <linux/pci.h> 17#include <linux/pci.h>
18#include <linux/memblock.h> 18#include <linux/memblock.h>
19#include <linux/iommu.h> 19#include <linux/iommu.h>
20#include <linux/debugfs.h> 20#include <linux/sizes.h>
21 21
22#include <asm/debugfs.h> 22#include <asm/debugfs.h>
23#include <asm/tlb.h> 23#include <asm/tlb.h>
@@ -42,14 +42,6 @@
42static DEFINE_SPINLOCK(npu_context_lock); 42static DEFINE_SPINLOCK(npu_context_lock);
43 43
44/* 44/*
45 * When an address shootdown range exceeds this threshold we invalidate the
46 * entire TLB on the GPU for the given PID rather than each specific address in
47 * the range.
48 */
49static uint64_t atsd_threshold = 2 * 1024 * 1024;
50static struct dentry *atsd_threshold_dentry;
51
52/*
53 * Other types of TCE cache invalidation are not functional in the 45 * Other types of TCE cache invalidation are not functional in the
54 * hardware. 46 * hardware.
55 */ 47 */
@@ -454,79 +446,73 @@ static void put_mmio_atsd_reg(struct npu *npu, int reg)
454} 446}
455 447
456/* MMIO ATSD register offsets */ 448/* MMIO ATSD register offsets */
457#define XTS_ATSD_AVA 1 449#define XTS_ATSD_LAUNCH 0
458#define XTS_ATSD_STAT 2 450#define XTS_ATSD_AVA 1
459 451#define XTS_ATSD_STAT 2
460static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
461 unsigned long launch, unsigned long va)
462{
463 struct npu *npu = mmio_atsd_reg->npu;
464 int reg = mmio_atsd_reg->reg;
465
466 __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
467 eieio();
468 __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]);
469}
470 452
471static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], 453static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
472 unsigned long pid, bool flush)
473{ 454{
474 int i; 455 unsigned long launch = 0;
475 unsigned long launch;
476
477 for (i = 0; i <= max_npu2_index; i++) {
478 if (mmio_atsd_reg[i].reg < 0)
479 continue;
480 456
481 /* IS set to invalidate matching PID */ 457 if (psize == MMU_PAGE_COUNT) {
482 launch = PPC_BIT(12); 458 /* IS set to invalidate entire matching PID */
483 459 launch |= PPC_BIT(12);
484 /* PRS set to process-scoped */ 460 } else {
485 launch |= PPC_BIT(13); 461 /* AP set to invalidate region of psize */
462 launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
463 }
486 464
487 /* AP */ 465 /* PRS set to process-scoped */
488 launch |= (u64) 466 launch |= PPC_BIT(13);
489 mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
490 467
491 /* PID */ 468 /* PID */
492 launch |= pid << PPC_BITLSHIFT(38); 469 launch |= pid << PPC_BITLSHIFT(38);
493 470
494 /* No flush */ 471 /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
495 launch |= !flush << PPC_BITLSHIFT(39);
496 472
497 /* Invalidating the entire process doesn't use a va */ 473 return launch;
498 mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
499 }
500} 474}
501 475
502static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], 476static void mmio_atsd_regs_write(struct mmio_atsd_reg
503 unsigned long va, unsigned long pid, bool flush) 477 mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
478 unsigned long val)
504{ 479{
505 int i; 480 struct npu *npu;
506 unsigned long launch; 481 int i, reg;
507 482
508 for (i = 0; i <= max_npu2_index; i++) { 483 for (i = 0; i <= max_npu2_index; i++) {
509 if (mmio_atsd_reg[i].reg < 0) 484 reg = mmio_atsd_reg[i].reg;
485 if (reg < 0)
510 continue; 486 continue;
511 487
512 /* IS set to invalidate target VA */ 488 npu = mmio_atsd_reg[i].npu;
513 launch = 0; 489 __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
490 }
491}
492
493static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
494 unsigned long pid)
495{
496 unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
514 497
515 /* PRS set to process scoped */ 498 /* Invalidating the entire process doesn't use a va */
516 launch |= PPC_BIT(13); 499 mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
500}
517 501
518 /* AP */ 502static void mmio_invalidate_range(struct mmio_atsd_reg
519 launch |= (u64) 503 mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
520 mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); 504 unsigned long start, unsigned long psize)
505{
506 unsigned long launch = get_atsd_launch_val(pid, psize);
521 507
522 /* PID */ 508 /* Write all VAs first */
523 launch |= pid << PPC_BITLSHIFT(38); 509 mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
524 510
525 /* No flush */ 511 /* Issue one barrier for all address writes */
526 launch |= !flush << PPC_BITLSHIFT(39); 512 eieio();
527 513
528 mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); 514 /* Launch */
529 } 515 mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
530} 516}
531 517
532#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) 518#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
@@ -612,14 +598,36 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
612} 598}
613 599
614/* 600/*
615 * Invalidate either a single address or an entire PID depending on 601 * Invalidate a virtual address range
616 * the value of va.
617 */ 602 */
618static void mmio_invalidate(struct npu_context *npu_context, int va, 603static void mmio_invalidate(struct npu_context *npu_context,
619 unsigned long address, bool flush) 604 unsigned long start, unsigned long size)
620{ 605{
621 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; 606 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
622 unsigned long pid = npu_context->mm->context.id; 607 unsigned long pid = npu_context->mm->context.id;
608 unsigned long atsd_start = 0;
609 unsigned long end = start + size - 1;
610 int atsd_psize = MMU_PAGE_COUNT;
611
612 /*
613 * Convert the input range into one of the supported sizes. If the range
614 * doesn't fit, use the next larger supported size. Invalidation latency
615 * is high, so over-invalidation is preferred to issuing multiple
616 * invalidates.
617 *
618 * A 4K page size isn't supported by NPU/GPU ATS, so that case is
619 * ignored.
620 */
621 if (size == SZ_64K) {
622 atsd_start = start;
623 atsd_psize = MMU_PAGE_64K;
624 } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
625 atsd_start = ALIGN_DOWN(start, SZ_2M);
626 atsd_psize = MMU_PAGE_2M;
627 } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
628 atsd_start = ALIGN_DOWN(start, SZ_1G);
629 atsd_psize = MMU_PAGE_1G;
630 }
623 631
624 if (npu_context->nmmu_flush) 632 if (npu_context->nmmu_flush)
625 /* 633 /*
@@ -634,23 +642,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
634 * an invalidate. 642 * an invalidate.
635 */ 643 */
636 acquire_atsd_reg(npu_context, mmio_atsd_reg); 644 acquire_atsd_reg(npu_context, mmio_atsd_reg);
637 if (va) 645
638 mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); 646 if (atsd_psize == MMU_PAGE_COUNT)
647 mmio_invalidate_pid(mmio_atsd_reg, pid);
639 else 648 else
640 mmio_invalidate_pid(mmio_atsd_reg, pid, flush); 649 mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
650 atsd_psize);
641 651
642 mmio_invalidate_wait(mmio_atsd_reg); 652 mmio_invalidate_wait(mmio_atsd_reg);
643 if (flush) { 653
644 /* 654 /*
645 * The GPU requires two flush ATSDs to ensure all entries have 655 * The GPU requires two flush ATSDs to ensure all entries have been
646 * been flushed. We use PID 0 as it will never be used for a 656 * flushed. We use PID 0 as it will never be used for a process on the
647 * process on the GPU. 657 * GPU.
648 */ 658 */
649 mmio_invalidate_pid(mmio_atsd_reg, 0, true); 659 mmio_invalidate_pid(mmio_atsd_reg, 0);
650 mmio_invalidate_wait(mmio_atsd_reg); 660 mmio_invalidate_wait(mmio_atsd_reg);
651 mmio_invalidate_pid(mmio_atsd_reg, 0, true); 661 mmio_invalidate_pid(mmio_atsd_reg, 0);
652 mmio_invalidate_wait(mmio_atsd_reg); 662 mmio_invalidate_wait(mmio_atsd_reg);
653 } 663
654 release_atsd_reg(mmio_atsd_reg); 664 release_atsd_reg(mmio_atsd_reg);
655} 665}
656 666
@@ -667,7 +677,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
667 * There should be no more translation requests for this PID, but we 677 * There should be no more translation requests for this PID, but we
668 * need to ensure any entries for it are removed from the TLB. 678 * need to ensure any entries for it are removed from the TLB.
669 */ 679 */
670 mmio_invalidate(npu_context, 0, 0, true); 680 mmio_invalidate(npu_context, 0, ~0UL);
671} 681}
672 682
673static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, 683static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -676,8 +686,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
676 pte_t pte) 686 pte_t pte)
677{ 687{
678 struct npu_context *npu_context = mn_to_npu_context(mn); 688 struct npu_context *npu_context = mn_to_npu_context(mn);
679 689 mmio_invalidate(npu_context, address, PAGE_SIZE);
680 mmio_invalidate(npu_context, 1, address, true);
681} 690}
682 691
683static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, 692static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -685,21 +694,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
685 unsigned long start, unsigned long end) 694 unsigned long start, unsigned long end)
686{ 695{
687 struct npu_context *npu_context = mn_to_npu_context(mn); 696 struct npu_context *npu_context = mn_to_npu_context(mn);
688 unsigned long address; 697 mmio_invalidate(npu_context, start, end - start);
689
690 if (end - start > atsd_threshold) {
691 /*
692 * Just invalidate the entire PID if the address range is too
693 * large.
694 */
695 mmio_invalidate(npu_context, 0, 0, true);
696 } else {
697 for (address = start; address < end; address += PAGE_SIZE)
698 mmio_invalidate(npu_context, 1, address, false);
699
700 /* Do the flush only on the final addess == end */
701 mmio_invalidate(npu_context, 1, address, true);
702 }
703} 698}
704 699
705static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { 700static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -962,11 +957,6 @@ int pnv_npu2_init(struct pnv_phb *phb)
962 static int npu_index; 957 static int npu_index;
963 uint64_t rc = 0; 958 uint64_t rc = 0;
964 959
965 if (!atsd_threshold_dentry) {
966 atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
967 0600, powerpc_debugfs_root, &atsd_threshold);
968 }
969
970 phb->npu.nmmu_flush = 960 phb->npu.nmmu_flush =
971 of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); 961 of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
972 for_each_child_of_node(phb->hose->dn, dn) { 962 for_each_child_of_node(phb->hose->dn, dn) {
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
index badb29bde93f..d90ee4fc2c6a 100644
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -199,7 +199,7 @@ void __init opal_powercap_init(void)
199 } 199 }
200 200
201 j = 0; 201 j = 0;
202 pcaps[i].pg.name = node->name; 202 pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node);
203 if (has_min) { 203 if (has_min) {
204 powercap_add_attr(min, "powercap-min", 204 powercap_add_attr(min, "powercap-min",
205 &pcaps[i].pattrs[j]); 205 &pcaps[i].pattrs[j]);
@@ -237,6 +237,7 @@ out_pcaps_pattrs:
237 while (--i >= 0) { 237 while (--i >= 0) {
238 kfree(pcaps[i].pattrs); 238 kfree(pcaps[i].pattrs);
239 kfree(pcaps[i].pg.attrs); 239 kfree(pcaps[i].pg.attrs);
240 kfree(pcaps[i].pg.name);
240 } 241 }
241 kobject_put(powercap_kobj); 242 kobject_put(powercap_kobj);
242out_pcaps: 243out_pcaps:
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index f7d04b6a2d7a..179609220e6f 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -214,9 +214,9 @@ void __init opal_sensor_groups_init(void)
214 } 214 }
215 215
216 if (!of_property_read_u32(node, "ibm,chip-id", &chipid)) 216 if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
217 sprintf(sgs[i].name, "%s%d", node->name, chipid); 217 sprintf(sgs[i].name, "%pOFn%d", node, chipid);
218 else 218 else
219 sprintf(sgs[i].name, "%s", node->name); 219 sprintf(sgs[i].name, "%pOFn", node);
220 220
221 sgs[i].sg.name = sgs[i].name; 221 sgs[i].sg.name = sgs[i].name;
222 if (add_attr_group(ops, len, &sgs[i], sgid)) { 222 if (add_attr_group(ops, len, &sgs[i], sgid)) {
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 9aa87df114fd..916a4b7b1bb5 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -194,7 +194,7 @@ void __init opal_sys_param_init(void)
194 count = of_property_count_strings(sysparam, "param-name"); 194 count = of_property_count_strings(sysparam, "param-name");
195 if (count < 0) { 195 if (count < 0) {
196 pr_err("SYSPARAM: No string found of property param-name in " 196 pr_err("SYSPARAM: No string found of property param-name in "
197 "the node %s\n", sysparam->name); 197 "the node %pOFn\n", sysparam);
198 goto out_param_buf; 198 goto out_param_buf;
199 } 199 }
200 200
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 38fe4087484a..beed86f4224b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -171,7 +171,7 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
171 /* 171 /*
172 * Allocate a buffer to hold the MC recoverable ranges. 172 * Allocate a buffer to hold the MC recoverable ranges.
173 */ 173 */
174 mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64))); 174 mc_recoverable_range =__va(memblock_phys_alloc(size, __alignof__(u64)));
175 memset(mc_recoverable_range, 0, size); 175 memset(mc_recoverable_range, 0, size);
176 176
177 for (i = 0; i < mc_recoverable_range_len; i++) { 177 for (i = 0; i < mc_recoverable_range_len; i++) {
@@ -535,7 +535,7 @@ static int opal_recover_mce(struct pt_regs *regs,
535 return recovered; 535 return recovered;
536} 536}
537 537
538void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) 538void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
539{ 539{
540 panic_flush_kmsg_start(); 540 panic_flush_kmsg_start();
541 541
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cde710297a4e..dd807446801e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -17,11 +17,10 @@
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/bootmem.h> 20#include <linux/memblock.h>
21#include <linux/irq.h> 21#include <linux/irq.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/msi.h> 23#include <linux/msi.h>
24#include <linux/memblock.h>
25#include <linux/iommu.h> 24#include <linux/iommu.h>
26#include <linux/rculist.h> 25#include <linux/rculist.h>
27#include <linux/sizes.h> 26#include <linux/sizes.h>
@@ -3770,7 +3769,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3770 phb_id = be64_to_cpup(prop64); 3769 phb_id = be64_to_cpup(prop64);
3771 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 3770 pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
3772 3771
3773 phb = memblock_virt_alloc(sizeof(*phb), 0); 3772 phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES);
3774 3773
3775 /* Allocate PCI controller */ 3774 /* Allocate PCI controller */
3776 phb->hose = hose = pcibios_alloc_controller(np); 3775 phb->hose = hose = pcibios_alloc_controller(np);
@@ -3816,7 +3815,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3816 else 3815 else
3817 phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; 3816 phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
3818 3817
3819 phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0); 3818 phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES);
3820 3819
3821 /* Parse 32-bit and IO ranges (if any) */ 3820 /* Parse 32-bit and IO ranges (if any) */
3822 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 3821 pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
@@ -3875,7 +3874,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3875 } 3874 }
3876 pemap_off = size; 3875 pemap_off = size;
3877 size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); 3876 size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
3878 aux = memblock_virt_alloc(size, 0); 3877 aux = memblock_alloc(size, SMP_CACHE_BYTES);
3879 phb->ioda.pe_alloc = aux; 3878 phb->ioda.pe_alloc = aux;
3880 phb->ioda.m64_segmap = aux + m64map_off; 3879 phb->ioda.m64_segmap = aux + m64map_off;
3881 phb->ioda.m32_segmap = aux + m32map_off; 3880 phb->ioda.m32_segmap = aux + m32map_off;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index adddde023622..14befee4b3f1 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -219,17 +219,41 @@ static void pnv_prepare_going_down(void)
219 219
220static void __noreturn pnv_restart(char *cmd) 220static void __noreturn pnv_restart(char *cmd)
221{ 221{
222 long rc = OPAL_BUSY; 222 long rc;
223 223
224 pnv_prepare_going_down(); 224 pnv_prepare_going_down();
225 225
226 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 226 do {
227 rc = opal_cec_reboot(); 227 if (!cmd)
228 if (rc == OPAL_BUSY_EVENT) 228 rc = opal_cec_reboot();
229 opal_poll_events(NULL); 229 else if (strcmp(cmd, "full") == 0)
230 rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
230 else 231 else
232 rc = OPAL_UNSUPPORTED;
233
234 if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
235 /* Opal is busy wait for some time and retry */
236 opal_poll_events(NULL);
231 mdelay(10); 237 mdelay(10);
232 } 238
239 } else if (cmd && rc) {
240 /* Unknown error while issuing reboot */
241 if (rc == OPAL_UNSUPPORTED)
242 pr_err("Unsupported '%s' reboot.\n", cmd);
243 else
244 pr_err("Unable to issue '%s' reboot. Err=%ld\n",
245 cmd, rc);
246 pr_info("Forcing a cec-reboot\n");
247 cmd = NULL;
248 rc = OPAL_BUSY;
249
250 } else if (rc != OPAL_SUCCESS) {
251 /* Unknown error while issuing cec-reboot */
252 pr_err("Unable to reboot. Err=%ld\n", rc);
253 }
254
255 } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
256
233 for (;;) 257 for (;;)
234 opal_poll_events(NULL); 258 opal_poll_events(NULL);
235} 259}
@@ -437,6 +461,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
437 return ret_freq; 461 return ret_freq;
438} 462}
439 463
464static long pnv_machine_check_early(struct pt_regs *regs)
465{
466 long handled = 0;
467
468 if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
469 handled = cur_cpu_spec->machine_check_early(regs);
470
471 return handled;
472}
473
440define_machine(powernv) { 474define_machine(powernv) {
441 .name = "PowerNV", 475 .name = "PowerNV",
442 .probe = pnv_probe, 476 .probe = pnv_probe,
@@ -448,6 +482,7 @@ define_machine(powernv) {
448 .machine_shutdown = pnv_shutdown, 482 .machine_shutdown = pnv_shutdown,
449 .power_save = NULL, 483 .power_save = NULL,
450 .calibrate_decr = generic_calibrate_decr, 484 .calibrate_decr = generic_calibrate_decr,
485 .machine_check_early = pnv_machine_check_early,
451#ifdef CONFIG_KEXEC_CORE 486#ifdef CONFIG_KEXEC_CORE
452 .kexec_cpu_down = pnv_kexec_cpu_down, 487 .kexec_cpu_down = pnv_kexec_cpu_down,
453#endif 488#endif
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index 6f7525555b19..24864b8aaf5d 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -49,7 +49,6 @@ config PS3_HTAB_SIZE
49config PS3_DYNAMIC_DMA 49config PS3_DYNAMIC_DMA
50 depends on PPC_PS3 50 depends on PPC_PS3
51 bool "PS3 Platform dynamic DMA page table management" 51 bool "PS3 Platform dynamic DMA page table management"
52 default n
53 help 52 help
54 This option will enable kernel support to take advantage of the 53 This option will enable kernel support to take advantage of the
55 per device dynamic DMA page table management provided by the Cell 54 per device dynamic DMA page table management provided by the Cell
@@ -89,7 +88,6 @@ config PS3_SYS_MANAGER
89config PS3_REPOSITORY_WRITE 88config PS3_REPOSITORY_WRITE
90 bool "PS3 Repository write support" if PS3_ADVANCED 89 bool "PS3 Repository write support" if PS3_ADVANCED
91 depends on PPC_PS3 90 depends on PPC_PS3
92 default n
93 help 91 help
94 Enables support for writing to the PS3 System Repository. 92 Enables support for writing to the PS3 System Repository.
95 93
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index cdbfc5cfd6f3..f5387ad82279 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -664,7 +664,7 @@ static int update_flash_db(void)
664 db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff); 664 db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff);
665 665
666 count = os_area_flash_write(db, sizeof(struct os_area_db), pos); 666 count = os_area_flash_write(db, sizeof(struct os_area_db), pos);
667 if (count < sizeof(struct os_area_db)) { 667 if (count < 0 || count < sizeof(struct os_area_db)) {
668 pr_debug("%s: os_area_flash_write failed %zd\n", __func__, 668 pr_debug("%s: os_area_flash_write failed %zd\n", __func__,
669 count); 669 count);
670 error = count < 0 ? count : -EIO; 670 error = count < 0 ? count : -EIO;
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 77a37520068d..658bfab3350b 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -24,7 +24,7 @@
24#include <linux/root_dev.h> 24#include <linux/root_dev.h>
25#include <linux/console.h> 25#include <linux/console.h>
26#include <linux/export.h> 26#include <linux/export.h>
27#include <linux/bootmem.h> 27#include <linux/memblock.h>
28 28
29#include <asm/machdep.h> 29#include <asm/machdep.h>
30#include <asm/firmware.h> 30#include <asm/firmware.h>
@@ -126,7 +126,7 @@ static void __init prealloc(struct ps3_prealloc *p)
126 if (!p->size) 126 if (!p->size)
127 return; 127 return;
128 128
129 p->address = memblock_virt_alloc(p->size, p->align); 129 p->address = memblock_alloc(p->size, p->align);
130 130
131 printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size, 131 printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size,
132 p->address); 132 p->address);
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index b54850845466..7746c2a3c509 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -215,8 +215,7 @@ static int __init setup_areas(struct spu *spu)
215 goto fail_ioremap; 215 goto fail_ioremap;
216 } 216 }
217 217
218 spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys, 218 spu->local_store = (__force void *)ioremap_wc(spu->local_store_phys, LS_SIZE);
219 LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
220 219
221 if (!spu->local_store) { 220 if (!spu->local_store) {
222 pr_debug("%s:%d: ioremap local_store failed\n", 221 pr_debug("%s:%d: ioremap local_store failed\n",
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 0c698fd6d491..2e4bd32154b5 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -28,7 +28,6 @@ config PPC_PSERIES
28config PPC_SPLPAR 28config PPC_SPLPAR
29 depends on PPC_PSERIES 29 depends on PPC_PSERIES
30 bool "Support for shared-processor logical partitions" 30 bool "Support for shared-processor logical partitions"
31 default n
32 help 31 help
33 Enabling this option will make the kernel run more efficiently 32 Enabling this option will make the kernel run more efficiently
34 on logically-partitioned pSeries systems which use shared 33 on logically-partitioned pSeries systems which use shared
@@ -99,7 +98,6 @@ config PPC_SMLPAR
99 bool "Support for shared-memory logical partitions" 98 bool "Support for shared-memory logical partitions"
100 depends on PPC_PSERIES 99 depends on PPC_PSERIES
101 select LPARCFG 100 select LPARCFG
102 default n
103 help 101 help
104 Select this option to enable shared memory partition support. 102 Select this option to enable shared memory partition support.
105 With this option a system running in an LPAR can be given more 103 With this option a system running in an LPAR can be given more
@@ -140,3 +138,10 @@ config IBMEBUS
140 bool "Support for GX bus based adapters" 138 bool "Support for GX bus based adapters"
141 help 139 help
142 Bus device driver for GX bus based adapters. 140 Bus device driver for GX bus based adapters.
141
142config PAPR_SCM
143 depends on PPC_PSERIES && MEMORY_HOTPLUG
144 select LIBNVDIMM
145 tristate "Support for the PAPR Storage Class Memory interface"
146 help
147 Enable access to hypervisor provided storage class memory.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 7e89d5c47068..a43ec843c8e2 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_KEXEC_CORE) += kexec.o
13obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o 13obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
14 14
15obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o 15obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
16obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o 16obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o
17 17
18obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o 18obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
19obj-$(CONFIG_HVCS) += hvcserver.o 19obj-$(CONFIG_HVCS) += hvcserver.o
@@ -24,6 +24,7 @@ obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
24obj-$(CONFIG_LPARCFG) += lparcfg.o 24obj-$(CONFIG_LPARCFG) += lparcfg.o
25obj-$(CONFIG_IBMVIO) += vio.o 25obj-$(CONFIG_IBMVIO) += vio.o
26obj-$(CONFIG_IBMEBUS) += ibmebus.o 26obj-$(CONFIG_IBMEBUS) += ibmebus.o
27obj-$(CONFIG_PAPR_SCM) += papr_scm.o
27 28
28ifdef CONFIG_PPC_PSERIES 29ifdef CONFIG_PPC_PSERIES
29obj-$(CONFIG_SUSPEND) += suspend.o 30obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c03f078..7625546caefd 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -32,8 +32,6 @@ static struct workqueue_struct *pseries_hp_wq;
32struct pseries_hp_work { 32struct pseries_hp_work {
33 struct work_struct work; 33 struct work_struct work;
34 struct pseries_hp_errorlog *errlog; 34 struct pseries_hp_errorlog *errlog;
35 struct completion *hp_completion;
36 int *rc;
37}; 35};
38 36
39struct cc_workarea { 37struct cc_workarea {
@@ -329,7 +327,7 @@ int dlpar_release_drc(u32 drc_index)
329 return 0; 327 return 0;
330} 328}
331 329
332static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) 330int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
333{ 331{
334 int rc; 332 int rc;
335 333
@@ -357,6 +355,10 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
357 case PSERIES_HP_ELOG_RESOURCE_CPU: 355 case PSERIES_HP_ELOG_RESOURCE_CPU:
358 rc = dlpar_cpu(hp_elog); 356 rc = dlpar_cpu(hp_elog);
359 break; 357 break;
358 case PSERIES_HP_ELOG_RESOURCE_PMEM:
359 rc = dlpar_hp_pmem(hp_elog);
360 break;
361
360 default: 362 default:
361 pr_warn_ratelimited("Invalid resource (%d) specified\n", 363 pr_warn_ratelimited("Invalid resource (%d) specified\n",
362 hp_elog->resource); 364 hp_elog->resource);
@@ -371,20 +373,13 @@ static void pseries_hp_work_fn(struct work_struct *work)
371 struct pseries_hp_work *hp_work = 373 struct pseries_hp_work *hp_work =
372 container_of(work, struct pseries_hp_work, work); 374 container_of(work, struct pseries_hp_work, work);
373 375
374 if (hp_work->rc) 376 handle_dlpar_errorlog(hp_work->errlog);
375 *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog);
376 else
377 handle_dlpar_errorlog(hp_work->errlog);
378
379 if (hp_work->hp_completion)
380 complete(hp_work->hp_completion);
381 377
382 kfree(hp_work->errlog); 378 kfree(hp_work->errlog);
383 kfree((void *)work); 379 kfree((void *)work);
384} 380}
385 381
386void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, 382void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
387 struct completion *hotplug_done, int *rc)
388{ 383{
389 struct pseries_hp_work *work; 384 struct pseries_hp_work *work;
390 struct pseries_hp_errorlog *hp_errlog_copy; 385 struct pseries_hp_errorlog *hp_errlog_copy;
@@ -397,13 +392,9 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
397 if (work) { 392 if (work) {
398 INIT_WORK((struct work_struct *)work, pseries_hp_work_fn); 393 INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
399 work->errlog = hp_errlog_copy; 394 work->errlog = hp_errlog_copy;
400 work->hp_completion = hotplug_done;
401 work->rc = rc;
402 queue_work(pseries_hp_wq, (struct work_struct *)work); 395 queue_work(pseries_hp_wq, (struct work_struct *)work);
403 } else { 396 } else {
404 *rc = -ENOMEM;
405 kfree(hp_errlog_copy); 397 kfree(hp_errlog_copy);
406 complete(hotplug_done);
407 } 398 }
408} 399}
409 400
@@ -521,18 +512,15 @@ static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog)
521static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, 512static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
522 const char *buf, size_t count) 513 const char *buf, size_t count)
523{ 514{
524 struct pseries_hp_errorlog *hp_elog; 515 struct pseries_hp_errorlog hp_elog;
525 struct completion hotplug_done;
526 char *argbuf; 516 char *argbuf;
527 char *args; 517 char *args;
528 int rc; 518 int rc;
529 519
530 args = argbuf = kstrdup(buf, GFP_KERNEL); 520 args = argbuf = kstrdup(buf, GFP_KERNEL);
531 hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); 521 if (!argbuf) {
532 if (!hp_elog || !argbuf) {
533 pr_info("Could not allocate resources for DLPAR operation\n"); 522 pr_info("Could not allocate resources for DLPAR operation\n");
534 kfree(argbuf); 523 kfree(argbuf);
535 kfree(hp_elog);
536 return -ENOMEM; 524 return -ENOMEM;
537 } 525 }
538 526
@@ -540,25 +528,22 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
540 * Parse out the request from the user, this will be in the form: 528 * Parse out the request from the user, this will be in the form:
541 * <resource> <action> <id_type> <id> 529 * <resource> <action> <id_type> <id>
542 */ 530 */
543 rc = dlpar_parse_resource(&args, hp_elog); 531 rc = dlpar_parse_resource(&args, &hp_elog);
544 if (rc) 532 if (rc)
545 goto dlpar_store_out; 533 goto dlpar_store_out;
546 534
547 rc = dlpar_parse_action(&args, hp_elog); 535 rc = dlpar_parse_action(&args, &hp_elog);
548 if (rc) 536 if (rc)
549 goto dlpar_store_out; 537 goto dlpar_store_out;
550 538
551 rc = dlpar_parse_id_type(&args, hp_elog); 539 rc = dlpar_parse_id_type(&args, &hp_elog);
552 if (rc) 540 if (rc)
553 goto dlpar_store_out; 541 goto dlpar_store_out;
554 542
555 init_completion(&hotplug_done); 543 rc = handle_dlpar_errorlog(&hp_elog);
556 queue_hotplug_event(hp_elog, &hotplug_done, &rc);
557 wait_for_completion(&hotplug_done);
558 544
559dlpar_store_out: 545dlpar_store_out:
560 kfree(argbuf); 546 kfree(argbuf);
561 kfree(hp_elog);
562 547
563 if (rc) 548 if (rc)
564 pr_err("Could not handle DLPAR request \"%s\"\n", buf); 549 pr_err("Could not handle DLPAR request \"%s\"\n", buf);
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 18014cdeb590..ef6595153642 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -149,7 +149,7 @@ static int dtl_start(struct dtl *dtl)
149 149
150 /* Register our dtl buffer with the hypervisor. The HV expects the 150 /* Register our dtl buffer with the hypervisor. The HV expects the
151 * buffer size to be passed in the second word of the buffer */ 151 * buffer size to be passed in the second word of the buffer */
152 ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES; 152 ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
153 153
154 hwcpu = get_hard_smp_processor_id(dtl->cpu); 154 hwcpu = get_hard_smp_processor_id(dtl->cpu);
155 addr = __pa(dtl->buf); 155 addr = __pa(dtl->buf);
@@ -184,7 +184,7 @@ static void dtl_stop(struct dtl *dtl)
184 184
185static u64 dtl_current_index(struct dtl *dtl) 185static u64 dtl_current_index(struct dtl *dtl)
186{ 186{
187 return lppaca_of(dtl->cpu).dtl_idx; 187 return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx);
188} 188}
189#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 189#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
190 190
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 823cb27efa8b..c9e5ca4afb26 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -438,7 +438,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
438/** 438/**
439 * pseries_eeh_get_state - Retrieve PE state 439 * pseries_eeh_get_state - Retrieve PE state
440 * @pe: EEH PE 440 * @pe: EEH PE
441 * @state: return value 441 * @delay: suggested time to wait if state is unavailable
442 * 442 *
443 * Retrieve the state of the specified PE. On RTAS compliant 443 * Retrieve the state of the specified PE. On RTAS compliant
444 * pseries platform, there already has one dedicated RTAS function 444 * pseries platform, there already has one dedicated RTAS function
@@ -448,7 +448,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
448 * RTAS calls for the purpose, we need to try the new one and back 448 * RTAS calls for the purpose, we need to try the new one and back
449 * to the old one if the new one couldn't work properly. 449 * to the old one if the new one couldn't work properly.
450 */ 450 */
451static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) 451static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
452{ 452{
453 int config_addr; 453 int config_addr;
454 int ret; 454 int ret;
@@ -499,7 +499,8 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
499 break; 499 break;
500 case 5: 500 case 5:
501 if (rets[2]) { 501 if (rets[2]) {
502 if (state) *state = rets[2]; 502 if (delay)
503 *delay = rets[2];
503 result = EEH_STATE_UNAVAILABLE; 504 result = EEH_STATE_UNAVAILABLE;
504 } else { 505 } else {
505 result = EEH_STATE_NOT_SUPPORT; 506 result = EEH_STATE_NOT_SUPPORT;
@@ -554,64 +555,6 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
554} 555}
555 556
556/** 557/**
557 * pseries_eeh_wait_state - Wait for PE state
558 * @pe: EEH PE
559 * @max_wait: maximal period in millisecond
560 *
561 * Wait for the state of associated PE. It might take some time
562 * to retrieve the PE's state.
563 */
564static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
565{
566 int ret;
567 int mwait;
568
569 /*
570 * According to PAPR, the state of PE might be temporarily
571 * unavailable. Under the circumstance, we have to wait
572 * for indicated time determined by firmware. The maximal
573 * wait time is 5 minutes, which is acquired from the original
574 * EEH implementation. Also, the original implementation
575 * also defined the minimal wait time as 1 second.
576 */
577#define EEH_STATE_MIN_WAIT_TIME (1000)
578#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
579
580 while (1) {
581 ret = pseries_eeh_get_state(pe, &mwait);
582
583 /*
584 * If the PE's state is temporarily unavailable,
585 * we have to wait for the specified time. Otherwise,
586 * the PE's state will be returned immediately.
587 */
588 if (ret != EEH_STATE_UNAVAILABLE)
589 return ret;
590
591 if (max_wait <= 0) {
592 pr_warn("%s: Timeout when getting PE's state (%d)\n",
593 __func__, max_wait);
594 return EEH_STATE_NOT_SUPPORT;
595 }
596
597 if (mwait <= 0) {
598 pr_warn("%s: Firmware returned bad wait value %d\n",
599 __func__, mwait);
600 mwait = EEH_STATE_MIN_WAIT_TIME;
601 } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
602 pr_warn("%s: Firmware returned too long wait value %d\n",
603 __func__, mwait);
604 mwait = EEH_STATE_MAX_WAIT_TIME;
605 }
606
607 max_wait -= mwait;
608 msleep(mwait);
609 }
610
611 return EEH_STATE_NOT_SUPPORT;
612}
613
614/**
615 * pseries_eeh_get_log - Retrieve error log 558 * pseries_eeh_get_log - Retrieve error log
616 * @pe: EEH PE 559 * @pe: EEH PE
617 * @severity: temporary or permanent error log 560 * @severity: temporary or permanent error log
@@ -849,7 +792,6 @@ static struct eeh_ops pseries_eeh_ops = {
849 .get_pe_addr = pseries_eeh_get_pe_addr, 792 .get_pe_addr = pseries_eeh_get_pe_addr,
850 .get_state = pseries_eeh_get_state, 793 .get_state = pseries_eeh_get_state,
851 .reset = pseries_eeh_reset, 794 .reset = pseries_eeh_reset,
852 .wait_state = pseries_eeh_wait_state,
853 .get_log = pseries_eeh_get_log, 795 .get_log = pseries_eeh_get_log,
854 .configure_bridge = pseries_eeh_configure_bridge, 796 .configure_bridge = pseries_eeh_configure_bridge,
855 .err_inject = NULL, 797 .err_inject = NULL,
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index 6eeb0d4bab61..446ef104fb3a 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -16,7 +16,8 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19#include <asm/prom.h> 19#include <linux/interrupt.h>
20#include <linux/of_irq.h>
20 21
21#include "pseries.h" 22#include "pseries.h"
22 23
@@ -24,34 +25,19 @@ void request_event_sources_irqs(struct device_node *np,
24 irq_handler_t handler, 25 irq_handler_t handler,
25 const char *name) 26 const char *name)
26{ 27{
27 int i, index, count = 0; 28 int i, virq, rc;
28 struct of_phandle_args oirq;
29 unsigned int virqs[16];
30 29
31 /* First try to do a proper OF tree parsing */ 30 for (i = 0; i < 16; i++) {
32 for (index = 0; of_irq_parse_one(np, index, &oirq) == 0; 31 virq = of_irq_get(np, i);
33 index++) { 32 if (virq < 0)
34 if (count > 15) 33 return;
35 break; 34 if (WARN(!virq, "event-sources: Unable to allocate "
36 virqs[count] = irq_create_of_mapping(&oirq); 35 "interrupt number for %pOF\n", np))
37 if (!virqs[count]) { 36 continue;
38 pr_err("event-sources: Unable to allocate "
39 "interrupt number for %pOF\n",
40 np);
41 WARN_ON(1);
42 } else {
43 count++;
44 }
45 }
46 37
47 /* Now request them */ 38 rc = request_irq(virq, handler, 0, name, NULL);
48 for (i = 0; i < count; i++) { 39 if (WARN(rc, "event-sources: Unable to request interrupt %d for %pOF\n",
49 if (request_irq(virqs[i], handler, 0, name, NULL)) { 40 virq, np))
50 pr_err("event-sources: Unable to request interrupt "
51 "%d for %pOF\n", virqs[i], np);
52 WARN_ON(1);
53 return; 41 return;
54 }
55 } 42 }
56} 43}
57
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index a3bbeb43689e..608ecad0178f 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -65,6 +65,8 @@ hypertas_fw_features_table[] = {
65 {FW_FEATURE_SET_MODE, "hcall-set-mode"}, 65 {FW_FEATURE_SET_MODE, "hcall-set-mode"},
66 {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"}, 66 {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"},
67 {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"}, 67 {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"},
68 {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"},
69 {FW_FEATURE_PAPR_SCM, "hcall-scm"},
68}; 70};
69 71
70/* Build up the firmware features bitmask using the contents of 72/* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6ef77caf7bcf..2f8e62163602 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -287,7 +287,7 @@ static int pseries_add_processor(struct device_node *np)
287 287
288 if (cpumask_empty(tmp)) { 288 if (cpumask_empty(tmp)) {
289 printk(KERN_ERR "Unable to find space in cpu_present_mask for" 289 printk(KERN_ERR "Unable to find space in cpu_present_mask for"
290 " processor %s with %d thread(s)\n", np->name, 290 " processor %pOFn with %d thread(s)\n", np,
291 nthreads); 291 nthreads);
292 goto out_unlock; 292 goto out_unlock;
293 } 293 }
@@ -481,8 +481,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
481 481
482 if (rc) { 482 if (rc) {
483 saved_rc = rc; 483 saved_rc = rc;
484 pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n", 484 pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
485 dn->name, rc, drc_index); 485 dn, rc, drc_index);
486 486
487 rc = dlpar_release_drc(drc_index); 487 rc = dlpar_release_drc(drc_index);
488 if (!rc) 488 if (!rc)
@@ -494,8 +494,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
494 rc = dlpar_online_cpu(dn); 494 rc = dlpar_online_cpu(dn);
495 if (rc) { 495 if (rc) {
496 saved_rc = rc; 496 saved_rc = rc;
497 pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n", 497 pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
498 dn->name, rc, drc_index); 498 dn, rc, drc_index);
499 499
500 rc = dlpar_detach_node(dn); 500 rc = dlpar_detach_node(dn);
501 if (!rc) 501 if (!rc)
@@ -504,7 +504,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
504 return saved_rc; 504 return saved_rc;
505 } 505 }
506 506
507 pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name, 507 pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,
508 drc_index); 508 drc_index);
509 return rc; 509 return rc;
510} 510}
@@ -570,19 +570,19 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
570{ 570{
571 int rc; 571 int rc;
572 572
573 pr_debug("Attempting to remove CPU %s, drc index: %x\n", 573 pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
574 dn->name, drc_index); 574 dn, drc_index);
575 575
576 rc = dlpar_offline_cpu(dn); 576 rc = dlpar_offline_cpu(dn);
577 if (rc) { 577 if (rc) {
578 pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc); 578 pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);
579 return -EINVAL; 579 return -EINVAL;
580 } 580 }
581 581
582 rc = dlpar_release_drc(drc_index); 582 rc = dlpar_release_drc(drc_index);
583 if (rc) { 583 if (rc) {
584 pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n", 584 pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
585 drc_index, dn->name, rc); 585 drc_index, dn, rc);
586 dlpar_online_cpu(dn); 586 dlpar_online_cpu(dn);
587 return rc; 587 return rc;
588 } 588 }
@@ -591,7 +591,7 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
591 if (rc) { 591 if (rc) {
592 int saved_rc = rc; 592 int saved_rc = rc;
593 593
594 pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc); 594 pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);
595 595
596 rc = dlpar_acquire_drc(drc_index); 596 rc = dlpar_acquire_drc(drc_index);
597 if (!rc) 597 if (!rc)
@@ -662,8 +662,8 @@ static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
662 rc = of_property_read_u32(dn, "ibm,my-drc-index", 662 rc = of_property_read_u32(dn, "ibm,my-drc-index",
663 &cpu_drcs[cpus_found - 1]); 663 &cpu_drcs[cpus_found - 1]);
664 if (rc) { 664 if (rc) {
665 pr_warn("Error occurred getting drc-index for %s\n", 665 pr_warn("Error occurred getting drc-index for %pOFn\n",
666 dn->name); 666 dn);
667 of_node_put(dn); 667 of_node_put(dn);
668 return -1; 668 return -1;
669 } 669 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index c1578f54c626..2a983b5a52e1 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -101,11 +101,12 @@ static struct property *dlpar_clone_property(struct property *prop,
101 return new_prop; 101 return new_prop;
102} 102}
103 103
104static u32 find_aa_index(struct device_node *dr_node, 104static bool find_aa_index(struct device_node *dr_node,
105 struct property *ala_prop, const u32 *lmb_assoc) 105 struct property *ala_prop,
106 const u32 *lmb_assoc, u32 *aa_index)
106{ 107{
107 u32 *assoc_arrays; 108 u32 *assoc_arrays, new_prop_size;
108 u32 aa_index; 109 struct property *new_prop;
109 int aa_arrays, aa_array_entries, aa_array_sz; 110 int aa_arrays, aa_array_entries, aa_array_sz;
110 int i, index; 111 int i, index;
111 112
@@ -121,54 +122,48 @@ static u32 find_aa_index(struct device_node *dr_node,
121 aa_array_entries = be32_to_cpu(assoc_arrays[1]); 122 aa_array_entries = be32_to_cpu(assoc_arrays[1]);
122 aa_array_sz = aa_array_entries * sizeof(u32); 123 aa_array_sz = aa_array_entries * sizeof(u32);
123 124
124 aa_index = -1;
125 for (i = 0; i < aa_arrays; i++) { 125 for (i = 0; i < aa_arrays; i++) {
126 index = (i * aa_array_entries) + 2; 126 index = (i * aa_array_entries) + 2;
127 127
128 if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz)) 128 if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz))
129 continue; 129 continue;
130 130
131 aa_index = i; 131 *aa_index = i;
132 break; 132 return true;
133 } 133 }
134 134
135 if (aa_index == -1) { 135 new_prop_size = ala_prop->length + aa_array_sz;
136 struct property *new_prop; 136 new_prop = dlpar_clone_property(ala_prop, new_prop_size);
137 u32 new_prop_size; 137 if (!new_prop)
138 138 return false;
139 new_prop_size = ala_prop->length + aa_array_sz;
140 new_prop = dlpar_clone_property(ala_prop, new_prop_size);
141 if (!new_prop)
142 return -1;
143
144 assoc_arrays = new_prop->value;
145 139
146 /* increment the number of entries in the lookup array */ 140 assoc_arrays = new_prop->value;
147 assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
148 141
149 /* copy the new associativity into the lookup array */ 142 /* increment the number of entries in the lookup array */
150 index = aa_arrays * aa_array_entries + 2; 143 assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
151 memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
152 144
153 of_update_property(dr_node, new_prop); 145 /* copy the new associativity into the lookup array */
146 index = aa_arrays * aa_array_entries + 2;
147 memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
154 148
155 /* 149 of_update_property(dr_node, new_prop);
156 * The associativity lookup array index for this lmb is
157 * number of entries - 1 since we added its associativity
158 * to the end of the lookup array.
159 */
160 aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
161 }
162 150
163 return aa_index; 151 /*
152 * The associativity lookup array index for this lmb is
153 * number of entries - 1 since we added its associativity
154 * to the end of the lookup array.
155 */
156 *aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
157 return true;
164} 158}
165 159
166static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb) 160static int update_lmb_associativity_index(struct drmem_lmb *lmb)
167{ 161{
168 struct device_node *parent, *lmb_node, *dr_node; 162 struct device_node *parent, *lmb_node, *dr_node;
169 struct property *ala_prop; 163 struct property *ala_prop;
170 const u32 *lmb_assoc; 164 const u32 *lmb_assoc;
171 u32 aa_index; 165 u32 aa_index;
166 bool found;
172 167
173 parent = of_find_node_by_path("/"); 168 parent = of_find_node_by_path("/");
174 if (!parent) 169 if (!parent)
@@ -200,46 +195,17 @@ static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
200 return -ENODEV; 195 return -ENODEV;
201 } 196 }
202 197
203 aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc); 198 found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index);
204 199
205 dlpar_free_cc_nodes(lmb_node); 200 dlpar_free_cc_nodes(lmb_node);
206 return aa_index;
207}
208 201
209static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb) 202 if (!found) {
210{ 203 pr_err("Could not find LMB associativity\n");
211 int rc, aa_index; 204 return -1;
212
213 lmb->flags |= DRCONF_MEM_ASSIGNED;
214
215 aa_index = lookup_lmb_associativity_index(lmb);
216 if (aa_index < 0) {
217 pr_err("Couldn't find associativity index for drc index %x\n",
218 lmb->drc_index);
219 return aa_index;
220 } 205 }
221 206
222 lmb->aa_index = aa_index; 207 lmb->aa_index = aa_index;
223 208 return 0;
224 rtas_hp_event = true;
225 rc = drmem_update_dt();
226 rtas_hp_event = false;
227
228 return rc;
229}
230
231static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
232{
233 int rc;
234
235 lmb->flags &= ~DRCONF_MEM_ASSIGNED;
236 lmb->aa_index = 0xffffffff;
237
238 rtas_hp_event = true;
239 rc = drmem_update_dt();
240 rtas_hp_event = false;
241
242 return rc;
243} 209}
244 210
245static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) 211static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
@@ -334,7 +300,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
334 nid = memory_add_physaddr_to_nid(base); 300 nid = memory_add_physaddr_to_nid(base);
335 301
336 for (i = 0; i < sections_per_block; i++) { 302 for (i = 0; i < sections_per_block; i++) {
337 remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); 303 __remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
338 base += MIN_MEMORY_BLOCK_SIZE; 304 base += MIN_MEMORY_BLOCK_SIZE;
339 } 305 }
340 306
@@ -423,12 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
423 block_sz = pseries_memory_block_size(); 389 block_sz = pseries_memory_block_size();
424 nid = memory_add_physaddr_to_nid(lmb->base_addr); 390 nid = memory_add_physaddr_to_nid(lmb->base_addr);
425 391
426 remove_memory(nid, lmb->base_addr, block_sz); 392 __remove_memory(nid, lmb->base_addr, block_sz);
427 393
428 /* Update memory regions for memory remove */ 394 /* Update memory regions for memory remove */
429 memblock_remove(lmb->base_addr, block_sz); 395 memblock_remove(lmb->base_addr, block_sz);
430 396
431 dlpar_remove_device_tree_lmb(lmb); 397 invalidate_lmb_associativity_index(lmb);
398 lmb->flags &= ~DRCONF_MEM_ASSIGNED;
399
432 return 0; 400 return 0;
433} 401}
434 402
@@ -688,10 +656,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
688 if (lmb->flags & DRCONF_MEM_ASSIGNED) 656 if (lmb->flags & DRCONF_MEM_ASSIGNED)
689 return -EINVAL; 657 return -EINVAL;
690 658
691 rc = dlpar_add_device_tree_lmb(lmb); 659 rc = update_lmb_associativity_index(lmb);
692 if (rc) { 660 if (rc) {
693 pr_err("Couldn't update device tree for drc index %x\n",
694 lmb->drc_index);
695 dlpar_release_drc(lmb->drc_index); 661 dlpar_release_drc(lmb->drc_index);
696 return rc; 662 return rc;
697 } 663 }
@@ -702,16 +668,16 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
702 nid = memory_add_physaddr_to_nid(lmb->base_addr); 668 nid = memory_add_physaddr_to_nid(lmb->base_addr);
703 669
704 /* Add the memory */ 670 /* Add the memory */
705 rc = add_memory(nid, lmb->base_addr, block_sz); 671 rc = __add_memory(nid, lmb->base_addr, block_sz);
706 if (rc) { 672 if (rc) {
707 dlpar_remove_device_tree_lmb(lmb); 673 invalidate_lmb_associativity_index(lmb);
708 return rc; 674 return rc;
709 } 675 }
710 676
711 rc = dlpar_online_lmb(lmb); 677 rc = dlpar_online_lmb(lmb);
712 if (rc) { 678 if (rc) {
713 remove_memory(nid, lmb->base_addr, block_sz); 679 __remove_memory(nid, lmb->base_addr, block_sz);
714 dlpar_remove_device_tree_lmb(lmb); 680 invalidate_lmb_associativity_index(lmb);
715 } else { 681 } else {
716 lmb->flags |= DRCONF_MEM_ASSIGNED; 682 lmb->flags |= DRCONF_MEM_ASSIGNED;
717 } 683 }
@@ -958,6 +924,12 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
958 break; 924 break;
959 } 925 }
960 926
927 if (!rc) {
928 rtas_hp_event = true;
929 rc = drmem_update_dt();
930 rtas_hp_event = false;
931 }
932
961 unlock_device_hotplug(); 933 unlock_device_hotplug();
962 return rc; 934 return rc;
963} 935}
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index c7c1140c13b6..5b4a56131904 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -404,7 +404,7 @@ static ssize_t name_show(struct device *dev,
404 struct platform_device *ofdev; 404 struct platform_device *ofdev;
405 405
406 ofdev = to_platform_device(dev); 406 ofdev = to_platform_device(dev);
407 return sprintf(buf, "%s\n", ofdev->dev.of_node->name); 407 return sprintf(buf, "%pOFn\n", ofdev->dev.of_node);
408} 408}
409static DEVICE_ATTR_RO(name); 409static DEVICE_ATTR_RO(name);
410 410
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index d3992ced0782..32d4452973e7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -48,6 +48,7 @@
48#include <asm/kexec.h> 48#include <asm/kexec.h>
49#include <asm/fadump.h> 49#include <asm/fadump.h>
50#include <asm/asm-prototypes.h> 50#include <asm/asm-prototypes.h>
51#include <asm/debugfs.h>
51 52
52#include "pseries.h" 53#include "pseries.h"
53 54
@@ -417,6 +418,79 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
417 BUG_ON(lpar_rc != H_SUCCESS); 418 BUG_ON(lpar_rc != H_SUCCESS);
418} 419}
419 420
421
422/*
423 * As defined in the PAPR's section 14.5.4.1.8
424 * The control mask doesn't include the returned reference and change bit from
425 * the processed PTE.
426 */
427#define HBLKR_AVPN 0x0100000000000000UL
428#define HBLKR_CTRL_MASK 0xf800000000000000UL
429#define HBLKR_CTRL_SUCCESS 0x8000000000000000UL
430#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
431#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
432
433/**
434 * H_BLOCK_REMOVE caller.
435 * @idx should point to the latest @param entry set with a PTEX.
436 * If PTE cannot be processed because another CPUs has already locked that
437 * group, those entries are put back in @param starting at index 1.
438 * If entries has to be retried and @retry_busy is set to true, these entries
439 * are retried until success. If @retry_busy is set to false, the returned
440 * is the number of entries yet to process.
441 */
442static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
443 bool retry_busy)
444{
445 unsigned long i, rc, new_idx;
446 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
447
448 if (idx < 2) {
449 pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
450 return 0;
451 }
452again:
453 new_idx = 0;
454 if (idx > PLPAR_HCALL9_BUFSIZE) {
455 pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
456 idx = PLPAR_HCALL9_BUFSIZE;
457 } else if (idx < PLPAR_HCALL9_BUFSIZE)
458 param[idx] = HBR_END;
459
460 rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
461 param[0], /* AVA */
462 param[1], param[2], param[3], param[4], /* TS0-7 */
463 param[5], param[6], param[7], param[8]);
464 if (rc == H_SUCCESS)
465 return 0;
466
467 BUG_ON(rc != H_PARTIAL);
468
469 /* Check that the unprocessed entries were 'not found' or 'busy' */
470 for (i = 0; i < idx-1; i++) {
471 unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
472
473 if (ctrl == HBLKR_CTRL_ERRBUSY) {
474 param[++new_idx] = param[i+1];
475 continue;
476 }
477
478 BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
479 && ctrl != HBLKR_CTRL_ERRNOTFOUND);
480 }
481
482 /*
483 * If there were entries found busy, retry these entries if requested,
484 * of if all the entries have to be retried.
485 */
486 if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
487 idx = new_idx + 1;
488 goto again;
489 }
490
491 return new_idx;
492}
493
420#ifdef CONFIG_TRANSPARENT_HUGEPAGE 494#ifdef CONFIG_TRANSPARENT_HUGEPAGE
421/* 495/*
422 * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need 496 * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
@@ -424,17 +498,57 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
424 */ 498 */
425#define PPC64_HUGE_HPTE_BATCH 12 499#define PPC64_HUGE_HPTE_BATCH 12
426 500
427static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, 501static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn,
428 unsigned long *vpn, int count, 502 int count, int psize, int ssize)
429 int psize, int ssize)
430{ 503{
431 unsigned long param[PLPAR_HCALL9_BUFSIZE]; 504 unsigned long param[PLPAR_HCALL9_BUFSIZE];
432 int i = 0, pix = 0, rc; 505 unsigned long shift, current_vpgb, vpgb;
433 unsigned long flags = 0; 506 int i, pix = 0;
434 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
435 507
436 if (lock_tlbie) 508 shift = mmu_psize_defs[psize].shift;
437 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 509
510 for (i = 0; i < count; i++) {
511 /*
512 * Shifting 3 bits more on the right to get a
513 * 8 pages aligned virtual addresse.
514 */
515 vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
516 if (!pix || vpgb != current_vpgb) {
517 /*
518 * Need to start a new 8 pages block, flush
519 * the current one if needed.
520 */
521 if (pix)
522 (void)call_block_remove(pix, param, true);
523 current_vpgb = vpgb;
524 param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
525 pix = 1;
526 }
527
528 param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i];
529 if (pix == PLPAR_HCALL9_BUFSIZE) {
530 pix = call_block_remove(pix, param, false);
531 /*
532 * pix = 0 means that all the entries were
533 * removed, we can start a new block.
534 * Otherwise, this means that there are entries
535 * to retry, and pix points to latest one, so
536 * we should increment it and try to continue
537 * the same block.
538 */
539 if (pix)
540 pix++;
541 }
542 }
543 if (pix)
544 (void)call_block_remove(pix, param, true);
545}
546
547static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn,
548 int count, int psize, int ssize)
549{
550 unsigned long param[PLPAR_HCALL9_BUFSIZE];
551 int i = 0, pix = 0, rc;
438 552
439 for (i = 0; i < count; i++) { 553 for (i = 0; i < count; i++) {
440 554
@@ -462,6 +576,23 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
462 param[6], param[7]); 576 param[6], param[7]);
463 BUG_ON(rc != H_SUCCESS); 577 BUG_ON(rc != H_SUCCESS);
464 } 578 }
579}
580
581static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
582 unsigned long *vpn,
583 int count, int psize,
584 int ssize)
585{
586 unsigned long flags = 0;
587 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
588
589 if (lock_tlbie)
590 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
591
592 if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
593 hugepage_block_invalidate(slot, vpn, count, psize, ssize);
594 else
595 hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
465 596
466 if (lock_tlbie) 597 if (lock_tlbie)
467 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 598 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
@@ -546,6 +677,86 @@ static int pSeries_lpar_hpte_removebolted(unsigned long ea,
546 return 0; 677 return 0;
547} 678}
548 679
680
681static inline unsigned long compute_slot(real_pte_t pte,
682 unsigned long vpn,
683 unsigned long index,
684 unsigned long shift,
685 int ssize)
686{
687 unsigned long slot, hash, hidx;
688
689 hash = hpt_hash(vpn, shift, ssize);
690 hidx = __rpte_to_hidx(pte, index);
691 if (hidx & _PTEIDX_SECONDARY)
692 hash = ~hash;
693 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
694 slot += hidx & _PTEIDX_GROUP_IX;
695 return slot;
696}
697
698/**
699 * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
700 * "all within the same naturally aligned 8 page virtual address block".
701 */
702static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
703 unsigned long *param)
704{
705 unsigned long vpn;
706 unsigned long i, pix = 0;
707 unsigned long index, shift, slot, current_vpgb, vpgb;
708 real_pte_t pte;
709 int psize, ssize;
710
711 psize = batch->psize;
712 ssize = batch->ssize;
713
714 for (i = 0; i < number; i++) {
715 vpn = batch->vpn[i];
716 pte = batch->pte[i];
717 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
718 /*
719 * Shifting 3 bits more on the right to get a
720 * 8 pages aligned virtual addresse.
721 */
722 vpgb = (vpn >> (shift - VPN_SHIFT + 3));
723 if (!pix || vpgb != current_vpgb) {
724 /*
725 * Need to start a new 8 pages block, flush
726 * the current one if needed.
727 */
728 if (pix)
729 (void)call_block_remove(pix, param,
730 true);
731 current_vpgb = vpgb;
732 param[0] = hpte_encode_avpn(vpn, psize,
733 ssize);
734 pix = 1;
735 }
736
737 slot = compute_slot(pte, vpn, index, shift, ssize);
738 param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot;
739
740 if (pix == PLPAR_HCALL9_BUFSIZE) {
741 pix = call_block_remove(pix, param, false);
742 /*
743 * pix = 0 means that all the entries were
744 * removed, we can start a new block.
745 * Otherwise, this means that there are entries
746 * to retry, and pix points to latest one, so
747 * we should increment it and try to continue
748 * the same block.
749 */
750 if (pix)
751 pix++;
752 }
753 } pte_iterate_hashed_end();
754 }
755
756 if (pix)
757 (void)call_block_remove(pix, param, true);
758}
759
549/* 760/*
550 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 761 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
551 * lock. 762 * lock.
@@ -558,13 +769,18 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
558 struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); 769 struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
559 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 770 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
560 unsigned long param[PLPAR_HCALL9_BUFSIZE]; 771 unsigned long param[PLPAR_HCALL9_BUFSIZE];
561 unsigned long hash, index, shift, hidx, slot; 772 unsigned long index, shift, slot;
562 real_pte_t pte; 773 real_pte_t pte;
563 int psize, ssize; 774 int psize, ssize;
564 775
565 if (lock_tlbie) 776 if (lock_tlbie)
566 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 777 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
567 778
779 if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
780 do_block_remove(number, batch, param);
781 goto out;
782 }
783
568 psize = batch->psize; 784 psize = batch->psize;
569 ssize = batch->ssize; 785 ssize = batch->ssize;
570 pix = 0; 786 pix = 0;
@@ -572,12 +788,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
572 vpn = batch->vpn[i]; 788 vpn = batch->vpn[i];
573 pte = batch->pte[i]; 789 pte = batch->pte[i];
574 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 790 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
575 hash = hpt_hash(vpn, shift, ssize); 791 slot = compute_slot(pte, vpn, index, shift, ssize);
576 hidx = __rpte_to_hidx(pte, index);
577 if (hidx & _PTEIDX_SECONDARY)
578 hash = ~hash;
579 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
580 slot += hidx & _PTEIDX_GROUP_IX;
581 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 792 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
582 /* 793 /*
583 * lpar doesn't use the passed actual page size 794 * lpar doesn't use the passed actual page size
@@ -608,6 +819,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
608 BUG_ON(rc != H_SUCCESS); 819 BUG_ON(rc != H_SUCCESS);
609 } 820 }
610 821
822out:
611 if (lock_tlbie) 823 if (lock_tlbie)
612 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 824 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
613} 825}
@@ -1028,3 +1240,56 @@ static int __init reserve_vrma_context_id(void)
1028 return 0; 1240 return 0;
1029} 1241}
1030machine_device_initcall(pseries, reserve_vrma_context_id); 1242machine_device_initcall(pseries, reserve_vrma_context_id);
1243
1244#ifdef CONFIG_DEBUG_FS
1245/* debugfs file interface for vpa data */
1246static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len,
1247 loff_t *pos)
1248{
1249 int cpu = (long)filp->private_data;
1250 struct lppaca *lppaca = &lppaca_of(cpu);
1251
1252 return simple_read_from_buffer(buf, len, pos, lppaca,
1253 sizeof(struct lppaca));
1254}
1255
1256static const struct file_operations vpa_fops = {
1257 .open = simple_open,
1258 .read = vpa_file_read,
1259 .llseek = default_llseek,
1260};
1261
1262static int __init vpa_debugfs_init(void)
1263{
1264 char name[16];
1265 long i;
1266 static struct dentry *vpa_dir;
1267
1268 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
1269 return 0;
1270
1271 vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
1272 if (!vpa_dir) {
1273 pr_warn("%s: can't create vpa root dir\n", __func__);
1274 return -ENOMEM;
1275 }
1276
1277 /* set up the per-cpu vpa file*/
1278 for_each_possible_cpu(i) {
1279 struct dentry *d;
1280
1281 sprintf(name, "cpu-%ld", i);
1282
1283 d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
1284 &vpa_fops);
1285 if (!d) {
1286 pr_warn("%s: can't create per-cpu vpa file\n",
1287 __func__);
1288 return -ENOMEM;
1289 }
1290 }
1291
1292 return 0;
1293}
1294machine_arch_initcall(pseries, vpa_debugfs_init);
1295#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 7c872dc01bdb..8bd590af488a 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -585,8 +585,7 @@ static ssize_t update_mpp(u64 *entitlement, u8 *weight)
585static ssize_t lparcfg_write(struct file *file, const char __user * buf, 585static ssize_t lparcfg_write(struct file *file, const char __user * buf,
586 size_t count, loff_t * off) 586 size_t count, loff_t * off)
587{ 587{
588 int kbuf_sz = 64; 588 char kbuf[64];
589 char kbuf[kbuf_sz];
590 char *tmp; 589 char *tmp;
591 u64 new_entitled, *new_entitled_ptr = &new_entitled; 590 u64 new_entitled, *new_entitled_ptr = &new_entitled;
592 u8 new_weight, *new_weight_ptr = &new_weight; 591 u8 new_weight, *new_weight_ptr = &new_weight;
@@ -595,7 +594,7 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
595 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 594 if (!firmware_has_feature(FW_FEATURE_SPLPAR))
596 return -EINVAL; 595 return -EINVAL;
597 596
598 if (count > kbuf_sz) 597 if (count > sizeof(kbuf))
599 return -EINVAL; 598 return -EINVAL;
600 599
601 if (copy_from_user(kbuf, buf, count)) 600 if (copy_from_user(kbuf, buf, count))
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index f0e30dc94988..88925f8ca8a0 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -242,7 +242,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
242 242
243static void prrn_update_node(__be32 phandle) 243static void prrn_update_node(__be32 phandle)
244{ 244{
245 struct pseries_hp_errorlog *hp_elog; 245 struct pseries_hp_errorlog hp_elog;
246 struct device_node *dn; 246 struct device_node *dn;
247 247
248 /* 248 /*
@@ -255,18 +255,12 @@ static void prrn_update_node(__be32 phandle)
255 return; 255 return;
256 } 256 }
257 257
258 hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); 258 hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM;
259 if(!hp_elog) 259 hp_elog.action = PSERIES_HP_ELOG_ACTION_READD;
260 return; 260 hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
261 261 hp_elog._drc_u.drc_index = phandle;
262 hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
263 hp_elog->action = PSERIES_HP_ELOG_ACTION_READD;
264 hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
265 hp_elog->_drc_u.drc_index = phandle;
266
267 queue_hotplug_event(hp_elog, NULL, NULL);
268 262
269 kfree(hp_elog); 263 handle_dlpar_errorlog(&hp_elog);
270} 264}
271 265
272int pseries_devicetree_update(s32 scope) 266int pseries_devicetree_update(s32 scope)
@@ -366,6 +360,8 @@ static ssize_t migration_store(struct class *class,
366 if (rc) 360 if (rc)
367 return rc; 361 return rc;
368 362
363 stop_topology_update();
364
369 do { 365 do {
370 rc = rtas_ibm_suspend_me(streamid); 366 rc = rtas_ibm_suspend_me(streamid);
371 if (rc == -EAGAIN) 367 if (rc == -EAGAIN)
@@ -376,6 +372,9 @@ static ssize_t migration_store(struct class *class,
376 return rc; 372 return rc;
377 373
378 post_mobility_fixup(); 374 post_mobility_fixup();
375
376 start_topology_update();
377
379 return count; 378 return count;
380} 379}
381 380
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index b7496948129e..8011b4129e3a 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -203,7 +203,8 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
203 /* Get the top level device in the PE */ 203 /* Get the top level device in the PE */
204 edev = pdn_to_eeh_dev(PCI_DN(dn)); 204 edev = pdn_to_eeh_dev(PCI_DN(dn));
205 if (edev->pe) 205 if (edev->pe)
206 edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list); 206 edev = list_first_entry(&edev->pe->edevs, struct eeh_dev,
207 entry);
207 dn = pci_device_to_OF_node(edev->pdev); 208 dn = pci_device_to_OF_node(edev->pdev);
208 if (!dn) 209 if (!dn)
209 return NULL; 210 return NULL;
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
new file mode 100644
index 000000000000..ee9372b65ca5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -0,0 +1,345 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#define pr_fmt(fmt) "papr-scm: " fmt
4
5#include <linux/of.h>
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/ioport.h>
9#include <linux/slab.h>
10#include <linux/ndctl.h>
11#include <linux/sched.h>
12#include <linux/libnvdimm.h>
13#include <linux/platform_device.h>
14
15#include <asm/plpar_wrappers.h>
16
17#define BIND_ANY_ADDR (~0ul)
18
19#define PAPR_SCM_DIMM_CMD_MASK \
20 ((1ul << ND_CMD_GET_CONFIG_SIZE) | \
21 (1ul << ND_CMD_GET_CONFIG_DATA) | \
22 (1ul << ND_CMD_SET_CONFIG_DATA))
23
24struct papr_scm_priv {
25 struct platform_device *pdev;
26 struct device_node *dn;
27 uint32_t drc_index;
28 uint64_t blocks;
29 uint64_t block_size;
30 int metadata_size;
31
32 uint64_t bound_addr;
33
34 struct nvdimm_bus_descriptor bus_desc;
35 struct nvdimm_bus *bus;
36 struct nvdimm *nvdimm;
37 struct resource res;
38 struct nd_region *region;
39 struct nd_interleave_set nd_set;
40};
41
42static int drc_pmem_bind(struct papr_scm_priv *p)
43{
44 unsigned long ret[PLPAR_HCALL_BUFSIZE];
45 uint64_t rc, token;
46
47 /*
48 * When the hypervisor cannot map all the requested memory in a single
49 * hcall it returns H_BUSY and we call again with the token until
50 * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
51 * leave the system in an undefined state, so we wait.
52 */
53 token = 0;
54
55 do {
56 rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
57 p->blocks, BIND_ANY_ADDR, token);
58 token = be64_to_cpu(ret[0]);
59 cond_resched();
60 } while (rc == H_BUSY);
61
62 if (rc) {
63 dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
64 return -ENXIO;
65 }
66
67 p->bound_addr = be64_to_cpu(ret[1]);
68
69 dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
70
71 return 0;
72}
73
74static int drc_pmem_unbind(struct papr_scm_priv *p)
75{
76 unsigned long ret[PLPAR_HCALL_BUFSIZE];
77 uint64_t rc, token;
78
79 token = 0;
80
81 /* NB: unbind has the same retry requirements mentioned above */
82 do {
83 rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
84 p->bound_addr, p->blocks, token);
85 token = be64_to_cpu(ret);
86 cond_resched();
87 } while (rc == H_BUSY);
88
89 if (rc)
90 dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
91
92 return !!rc;
93}
94
95static int papr_scm_meta_get(struct papr_scm_priv *p,
96 struct nd_cmd_get_config_data_hdr *hdr)
97{
98 unsigned long data[PLPAR_HCALL_BUFSIZE];
99 int64_t ret;
100
101 if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
102 return -EINVAL;
103
104 ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
105 hdr->in_offset, 1);
106
107 if (ret == H_PARAMETER) /* bad DRC index */
108 return -ENODEV;
109 if (ret)
110 return -EINVAL; /* other invalid parameter */
111
112 hdr->out_buf[0] = data[0] & 0xff;
113
114 return 0;
115}
116
117static int papr_scm_meta_set(struct papr_scm_priv *p,
118 struct nd_cmd_set_config_hdr *hdr)
119{
120 int64_t ret;
121
122 if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
123 return -EINVAL;
124
125 ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
126 p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
127
128 if (ret == H_PARAMETER) /* bad DRC index */
129 return -ENODEV;
130 if (ret)
131 return -EINVAL; /* other invalid parameter */
132
133 return 0;
134}
135
136int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
137 unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
138{
139 struct nd_cmd_get_config_size *get_size_hdr;
140 struct papr_scm_priv *p;
141
142 /* Only dimm-specific calls are supported atm */
143 if (!nvdimm)
144 return -EINVAL;
145
146 p = nvdimm_provider_data(nvdimm);
147
148 switch (cmd) {
149 case ND_CMD_GET_CONFIG_SIZE:
150 get_size_hdr = buf;
151
152 get_size_hdr->status = 0;
153 get_size_hdr->max_xfer = 1;
154 get_size_hdr->config_size = p->metadata_size;
155 *cmd_rc = 0;
156 break;
157
158 case ND_CMD_GET_CONFIG_DATA:
159 *cmd_rc = papr_scm_meta_get(p, buf);
160 break;
161
162 case ND_CMD_SET_CONFIG_DATA:
163 *cmd_rc = papr_scm_meta_set(p, buf);
164 break;
165
166 default:
167 return -EINVAL;
168 }
169
170 dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
171
172 return 0;
173}
174
175static const struct attribute_group *region_attr_groups[] = {
176 &nd_region_attribute_group,
177 &nd_device_attribute_group,
178 &nd_mapping_attribute_group,
179 &nd_numa_attribute_group,
180 NULL,
181};
182
183static const struct attribute_group *bus_attr_groups[] = {
184 &nvdimm_bus_attribute_group,
185 NULL,
186};
187
188static const struct attribute_group *papr_scm_dimm_groups[] = {
189 &nvdimm_attribute_group,
190 &nd_device_attribute_group,
191 NULL,
192};
193
194static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
195{
196 struct device *dev = &p->pdev->dev;
197 struct nd_mapping_desc mapping;
198 struct nd_region_desc ndr_desc;
199 unsigned long dimm_flags;
200
201 p->bus_desc.ndctl = papr_scm_ndctl;
202 p->bus_desc.module = THIS_MODULE;
203 p->bus_desc.of_node = p->pdev->dev.of_node;
204 p->bus_desc.attr_groups = bus_attr_groups;
205 p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
206
207 if (!p->bus_desc.provider_name)
208 return -ENOMEM;
209
210 p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
211 if (!p->bus) {
212 dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
213 return -ENXIO;
214 }
215
216 dimm_flags = 0;
217 set_bit(NDD_ALIASING, &dimm_flags);
218
219 p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
220 dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
221 if (!p->nvdimm) {
222 dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
223 goto err;
224 }
225
226 /* now add the region */
227
228 memset(&mapping, 0, sizeof(mapping));
229 mapping.nvdimm = p->nvdimm;
230 mapping.start = 0;
231 mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
232
233 memset(&ndr_desc, 0, sizeof(ndr_desc));
234 ndr_desc.attr_groups = region_attr_groups;
235 ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
236 ndr_desc.res = &p->res;
237 ndr_desc.of_node = p->dn;
238 ndr_desc.provider_data = p;
239 ndr_desc.mapping = &mapping;
240 ndr_desc.num_mappings = 1;
241 ndr_desc.nd_set = &p->nd_set;
242 set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
243
244 p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
245 if (!p->region) {
246 dev_err(dev, "Error registering region %pR from %pOF\n",
247 ndr_desc.res, p->dn);
248 goto err;
249 }
250
251 return 0;
252
253err: nvdimm_bus_unregister(p->bus);
254 kfree(p->bus_desc.provider_name);
255 return -ENXIO;
256}
257
258static int papr_scm_probe(struct platform_device *pdev)
259{
260 uint32_t drc_index, metadata_size, unit_cap[2];
261 struct device_node *dn = pdev->dev.of_node;
262 struct papr_scm_priv *p;
263 int rc;
264
265 /* check we have all the required DT properties */
266 if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
267 dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
268 return -ENODEV;
269 }
270
271 if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) {
272 dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn);
273 return -ENODEV;
274 }
275
276 p = kzalloc(sizeof(*p), GFP_KERNEL);
277 if (!p)
278 return -ENOMEM;
279
280 /* optional DT properties */
281 of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
282
283 p->dn = dn;
284 p->drc_index = drc_index;
285 p->block_size = unit_cap[0];
286 p->blocks = unit_cap[1];
287
288 /* might be zero */
289 p->metadata_size = metadata_size;
290 p->pdev = pdev;
291
292 /* request the hypervisor to bind this region to somewhere in memory */
293 rc = drc_pmem_bind(p);
294 if (rc)
295 goto err;
296
297 /* setup the resource for the newly bound range */
298 p->res.start = p->bound_addr;
299 p->res.end = p->bound_addr + p->blocks * p->block_size;
300 p->res.name = pdev->name;
301 p->res.flags = IORESOURCE_MEM;
302
303 rc = papr_scm_nvdimm_init(p);
304 if (rc)
305 goto err2;
306
307 platform_set_drvdata(pdev, p);
308
309 return 0;
310
311err2: drc_pmem_unbind(p);
312err: kfree(p);
313 return rc;
314}
315
316static int papr_scm_remove(struct platform_device *pdev)
317{
318 struct papr_scm_priv *p = platform_get_drvdata(pdev);
319
320 nvdimm_bus_unregister(p->bus);
321 drc_pmem_unbind(p);
322 kfree(p);
323
324 return 0;
325}
326
327static const struct of_device_id papr_scm_match[] = {
328 { .compatible = "ibm,pmemory" },
329 { },
330};
331
332static struct platform_driver papr_scm_driver = {
333 .probe = papr_scm_probe,
334 .remove = papr_scm_remove,
335 .driver = {
336 .name = "papr_scm",
337 .owner = THIS_MODULE,
338 .of_match_table = papr_scm_match,
339 },
340};
341
342module_platform_driver(papr_scm_driver);
343MODULE_DEVICE_TABLE(of, papr_scm_match);
344MODULE_LICENSE("GPL");
345MODULE_AUTHOR("IBM Corporation");
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index eab96637d6cf..41d8a4d1d02e 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -239,6 +239,7 @@ void __init pSeries_final_fixup(void)
239{ 239{
240 pSeries_request_regions(); 240 pSeries_request_regions();
241 241
242 eeh_probe_devices();
242 eeh_addr_cache_build(); 243 eeh_addr_cache_build();
243 244
244#ifdef CONFIG_PCI_IOV 245#ifdef CONFIG_PCI_IOV
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
new file mode 100644
index 000000000000..a27f40eb57b1
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -0,0 +1,164 @@
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Handles hot and cold plug of persistent memory regions on pseries.
5 */
6
7#define pr_fmt(fmt) "pseries-pmem: " fmt
8
9#include <linux/kernel.h>
10#include <linux/interrupt.h>
11#include <linux/delay.h>
12#include <linux/sched.h> /* for idle_task_exit */
13#include <linux/sched/hotplug.h>
14#include <linux/cpu.h>
15#include <linux/of.h>
16#include <linux/of_platform.h>
17#include <linux/slab.h>
18#include <asm/prom.h>
19#include <asm/rtas.h>
20#include <asm/firmware.h>
21#include <asm/machdep.h>
22#include <asm/vdso_datapage.h>
23#include <asm/plpar_wrappers.h>
24#include <asm/topology.h>
25
26#include "pseries.h"
27#include "offline_states.h"
28
29static struct device_node *pmem_node;
30
31static ssize_t pmem_drc_add_node(u32 drc_index)
32{
33 struct device_node *dn;
34 int rc;
35
36 pr_debug("Attempting to add pmem node, drc index: %x\n", drc_index);
37
38 rc = dlpar_acquire_drc(drc_index);
39 if (rc) {
40 pr_err("Failed to acquire DRC, rc: %d, drc index: %x\n",
41 rc, drc_index);
42 return -EINVAL;
43 }
44
45 dn = dlpar_configure_connector(cpu_to_be32(drc_index), pmem_node);
46 if (!dn) {
47 pr_err("configure-connector failed for drc %x\n", drc_index);
48 dlpar_release_drc(drc_index);
49 return -EINVAL;
50 }
51
52 /* NB: The of reconfig notifier creates platform device from the node */
53 rc = dlpar_attach_node(dn, pmem_node);
54 if (rc) {
55 pr_err("Failed to attach node %s, rc: %d, drc index: %x\n",
56 dn->name, rc, drc_index);
57
58 if (dlpar_release_drc(drc_index))
59 dlpar_free_cc_nodes(dn);
60
61 return rc;
62 }
63
64 pr_info("Successfully added %pOF, drc index: %x\n", dn, drc_index);
65
66 return 0;
67}
68
69static ssize_t pmem_drc_remove_node(u32 drc_index)
70{
71 struct device_node *dn;
72 uint32_t index;
73 int rc;
74
75 for_each_child_of_node(pmem_node, dn) {
76 if (of_property_read_u32(dn, "ibm,my-drc-index", &index))
77 continue;
78 if (index == drc_index)
79 break;
80 }
81
82 if (!dn) {
83 pr_err("Attempting to remove unused DRC index %x\n", drc_index);
84 return -ENODEV;
85 }
86
87 pr_debug("Attempting to remove %pOF, drc index: %x\n", dn, drc_index);
88
89 /* * NB: tears down the ibm,pmemory device as a side-effect */
90 rc = dlpar_detach_node(dn);
91 if (rc)
92 return rc;
93
94 rc = dlpar_release_drc(drc_index);
95 if (rc) {
96 pr_err("Failed to release drc (%x) for CPU %s, rc: %d\n",
97 drc_index, dn->name, rc);
98 dlpar_attach_node(dn, pmem_node);
99 return rc;
100 }
101
102 pr_info("Successfully removed PMEM with drc index: %x\n", drc_index);
103
104 return 0;
105}
106
107int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
108{
109 u32 count, drc_index;
110 int rc;
111
112 /* slim chance, but we might get a hotplug event while booting */
113 if (!pmem_node)
114 pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
115 if (!pmem_node) {
116 pr_err("Hotplug event for a pmem device, but none exists\n");
117 return -ENODEV;
118 }
119
120 if (hp_elog->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) {
121 pr_err("Unsupported hotplug event type %d\n",
122 hp_elog->id_type);
123 return -EINVAL;
124 }
125
126 count = hp_elog->_drc_u.drc_count;
127 drc_index = hp_elog->_drc_u.drc_index;
128
129 lock_device_hotplug();
130
131 if (hp_elog->action == PSERIES_HP_ELOG_ACTION_ADD) {
132 rc = pmem_drc_add_node(drc_index);
133 } else if (hp_elog->action == PSERIES_HP_ELOG_ACTION_REMOVE) {
134 rc = pmem_drc_remove_node(drc_index);
135 } else {
136 pr_err("Unsupported hotplug action (%d)\n", hp_elog->action);
137 rc = -EINVAL;
138 }
139
140 unlock_device_hotplug();
141 return rc;
142}
143
144const struct of_device_id drc_pmem_match[] = {
145 { .type = "ibm,persistent-memory", },
146 {}
147};
148
149static int pseries_pmem_init(void)
150{
151 pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
152 if (!pmem_node)
153 return 0;
154
155 /*
156 * The generic OF bus probe/populate handles creating platform devices
157 * from the child (ibm,pmemory) nodes. The generic code registers an of
158 * reconfig notifier to handle the hot-add/remove cases too.
159 */
160 of_platform_bus_probe(pmem_node, drc_pmem_match, NULL);
161
162 return 0;
163}
164machine_arch_initcall(pseries, pseries_pmem_init);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 60db2ee511fb..7dee8c5d3363 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -24,6 +24,7 @@ struct pt_regs;
24 24
25extern int pSeries_system_reset_exception(struct pt_regs *regs); 25extern int pSeries_system_reset_exception(struct pt_regs *regs);
26extern int pSeries_machine_check_exception(struct pt_regs *regs); 26extern int pSeries_machine_check_exception(struct pt_regs *regs);
27extern long pseries_machine_check_realmode(struct pt_regs *regs);
27 28
28#ifdef CONFIG_SMP 29#ifdef CONFIG_SMP
29extern void smp_init_pseries(void); 30extern void smp_init_pseries(void);
@@ -59,15 +60,21 @@ extern int dlpar_detach_node(struct device_node *);
59extern int dlpar_acquire_drc(u32 drc_index); 60extern int dlpar_acquire_drc(u32 drc_index);
60extern int dlpar_release_drc(u32 drc_index); 61extern int dlpar_release_drc(u32 drc_index);
61 62
62void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, 63void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
63 struct completion *hotplug_done, int *rc); 64int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
65
64#ifdef CONFIG_MEMORY_HOTPLUG 66#ifdef CONFIG_MEMORY_HOTPLUG
65int dlpar_memory(struct pseries_hp_errorlog *hp_elog); 67int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
68int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
66#else 69#else
67static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog) 70static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
68{ 71{
69 return -EOPNOTSUPP; 72 return -EOPNOTSUPP;
70} 73}
74static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
75{
76 return -EOPNOTSUPP;
77}
71#endif 78#endif
72 79
73#ifdef CONFIG_HOTPLUG_CPU 80#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 851ce326874a..d97d52772789 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -27,6 +27,7 @@
27#include <asm/machdep.h> 27#include <asm/machdep.h>
28#include <asm/rtas.h> 28#include <asm/rtas.h>
29#include <asm/firmware.h> 29#include <asm/firmware.h>
30#include <asm/mce.h>
30 31
31#include "pseries.h" 32#include "pseries.h"
32 33
@@ -50,6 +51,101 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
50static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); 51static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
51static irqreturn_t ras_error_interrupt(int irq, void *dev_id); 52static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
52 53
54/* RTAS pseries MCE errorlog section. */
55struct pseries_mc_errorlog {
56 __be32 fru_id;
57 __be32 proc_id;
58 u8 error_type;
59 /*
60 * sub_err_type (1 byte). Bit fields depends on error_type
61 *
62 * MSB0
63 * |
64 * V
65 * 01234567
66 * XXXXXXXX
67 *
68 * For error_type == MC_ERROR_TYPE_UE
69 * XXXXXXXX
70 * X 1: Permanent or Transient UE.
71 * X 1: Effective address provided.
72 * X 1: Logical address provided.
73 * XX 2: Reserved.
74 * XXX 3: Type of UE error.
75 *
76 * For error_type != MC_ERROR_TYPE_UE
77 * XXXXXXXX
78 * X 1: Effective address provided.
79 * XXXXX 5: Reserved.
80 * XX 2: Type of SLB/ERAT/TLB error.
81 */
82 u8 sub_err_type;
83 u8 reserved_1[6];
84 __be64 effective_address;
85 __be64 logical_address;
86} __packed;
87
88/* RTAS pseries MCE error types */
89#define MC_ERROR_TYPE_UE 0x00
90#define MC_ERROR_TYPE_SLB 0x01
91#define MC_ERROR_TYPE_ERAT 0x02
92#define MC_ERROR_TYPE_TLB 0x04
93#define MC_ERROR_TYPE_D_CACHE 0x05
94#define MC_ERROR_TYPE_I_CACHE 0x07
95
96/* RTAS pseries MCE error sub types */
97#define MC_ERROR_UE_INDETERMINATE 0
98#define MC_ERROR_UE_IFETCH 1
99#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2
100#define MC_ERROR_UE_LOAD_STORE 3
101#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
102
103#define MC_ERROR_SLB_PARITY 0
104#define MC_ERROR_SLB_MULTIHIT 1
105#define MC_ERROR_SLB_INDETERMINATE 2
106
107#define MC_ERROR_ERAT_PARITY 1
108#define MC_ERROR_ERAT_MULTIHIT 2
109#define MC_ERROR_ERAT_INDETERMINATE 3
110
111#define MC_ERROR_TLB_PARITY 1
112#define MC_ERROR_TLB_MULTIHIT 2
113#define MC_ERROR_TLB_INDETERMINATE 3
114
115static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
116{
117 switch (mlog->error_type) {
118 case MC_ERROR_TYPE_UE:
119 return (mlog->sub_err_type & 0x07);
120 case MC_ERROR_TYPE_SLB:
121 case MC_ERROR_TYPE_ERAT:
122 case MC_ERROR_TYPE_TLB:
123 return (mlog->sub_err_type & 0x03);
124 default:
125 return 0;
126 }
127}
128
129static
130inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
131{
132 __be64 addr = 0;
133
134 switch (mlog->error_type) {
135 case MC_ERROR_TYPE_UE:
136 if (mlog->sub_err_type & 0x40)
137 addr = mlog->effective_address;
138 break;
139 case MC_ERROR_TYPE_SLB:
140 case MC_ERROR_TYPE_ERAT:
141 case MC_ERROR_TYPE_TLB:
142 if (mlog->sub_err_type & 0x80)
143 addr = mlog->effective_address;
144 default:
145 break;
146 }
147 return be64_to_cpu(addr);
148}
53 149
54/* 150/*
55 * Enable the hotplug interrupt late because processing them may touch other 151 * Enable the hotplug interrupt late because processing them may touch other
@@ -237,8 +333,9 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
237 * hotplug events on the ras_log_buf to be handled by rtas_errd. 333 * hotplug events on the ras_log_buf to be handled by rtas_errd.
238 */ 334 */
239 if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || 335 if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
240 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU) 336 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
241 queue_hotplug_event(hp_elog, NULL, NULL); 337 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
338 queue_hotplug_event(hp_elog);
242 else 339 else
243 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 340 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
244 341
@@ -427,6 +524,188 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
427 return 0; /* need to perform reset */ 524 return 0; /* need to perform reset */
428} 525}
429 526
527#define VAL_TO_STRING(ar, val) \
528 (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
529
530static void pseries_print_mce_info(struct pt_regs *regs,
531 struct rtas_error_log *errp)
532{
533 const char *level, *sevstr;
534 struct pseries_errorlog *pseries_log;
535 struct pseries_mc_errorlog *mce_log;
536 u8 error_type, err_sub_type;
537 u64 addr;
538 u8 initiator = rtas_error_initiator(errp);
539 int disposition = rtas_error_disposition(errp);
540
541 static const char * const initiators[] = {
542 "Unknown",
543 "CPU",
544 "PCI",
545 "ISA",
546 "Memory",
547 "Power Mgmt",
548 };
549 static const char * const mc_err_types[] = {
550 "UE",
551 "SLB",
552 "ERAT",
553 "TLB",
554 "D-Cache",
555 "Unknown",
556 "I-Cache",
557 };
558 static const char * const mc_ue_types[] = {
559 "Indeterminate",
560 "Instruction fetch",
561 "Page table walk ifetch",
562 "Load/Store",
563 "Page table walk Load/Store",
564 };
565
566 /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
567 static const char * const mc_slb_types[] = {
568 "Parity",
569 "Multihit",
570 "Indeterminate",
571 };
572
573 /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
574 static const char * const mc_soft_types[] = {
575 "Unknown",
576 "Parity",
577 "Multihit",
578 "Indeterminate",
579 };
580
581 if (!rtas_error_extended(errp)) {
582 pr_err("Machine check interrupt: Missing extended error log\n");
583 return;
584 }
585
586 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
587 if (pseries_log == NULL)
588 return;
589
590 mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
591
592 error_type = mce_log->error_type;
593 err_sub_type = rtas_mc_error_sub_type(mce_log);
594
595 switch (rtas_error_severity(errp)) {
596 case RTAS_SEVERITY_NO_ERROR:
597 level = KERN_INFO;
598 sevstr = "Harmless";
599 break;
600 case RTAS_SEVERITY_WARNING:
601 level = KERN_WARNING;
602 sevstr = "";
603 break;
604 case RTAS_SEVERITY_ERROR:
605 case RTAS_SEVERITY_ERROR_SYNC:
606 level = KERN_ERR;
607 sevstr = "Severe";
608 break;
609 case RTAS_SEVERITY_FATAL:
610 default:
611 level = KERN_ERR;
612 sevstr = "Fatal";
613 break;
614 }
615
616#ifdef CONFIG_PPC_BOOK3S_64
617 /* Display faulty slb contents for SLB errors. */
618 if (error_type == MC_ERROR_TYPE_SLB)
619 slb_dump_contents(local_paca->mce_faulty_slbs);
620#endif
621
622 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
623 disposition == RTAS_DISP_FULLY_RECOVERED ?
624 "Recovered" : "Not recovered");
625 if (user_mode(regs)) {
626 printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
627 regs->nip, current->pid, current->comm);
628 } else {
629 printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
630 (void *)regs->nip);
631 }
632 printk("%s Initiator: %s\n", level,
633 VAL_TO_STRING(initiators, initiator));
634
635 switch (error_type) {
636 case MC_ERROR_TYPE_UE:
637 printk("%s Error type: %s [%s]\n", level,
638 VAL_TO_STRING(mc_err_types, error_type),
639 VAL_TO_STRING(mc_ue_types, err_sub_type));
640 break;
641 case MC_ERROR_TYPE_SLB:
642 printk("%s Error type: %s [%s]\n", level,
643 VAL_TO_STRING(mc_err_types, error_type),
644 VAL_TO_STRING(mc_slb_types, err_sub_type));
645 break;
646 case MC_ERROR_TYPE_ERAT:
647 case MC_ERROR_TYPE_TLB:
648 printk("%s Error type: %s [%s]\n", level,
649 VAL_TO_STRING(mc_err_types, error_type),
650 VAL_TO_STRING(mc_soft_types, err_sub_type));
651 break;
652 default:
653 printk("%s Error type: %s\n", level,
654 VAL_TO_STRING(mc_err_types, error_type));
655 break;
656 }
657
658 addr = rtas_mc_get_effective_addr(mce_log);
659 if (addr)
660 printk("%s Effective address: %016llx\n", level, addr);
661}
662
663static int mce_handle_error(struct rtas_error_log *errp)
664{
665 struct pseries_errorlog *pseries_log;
666 struct pseries_mc_errorlog *mce_log;
667 int disposition = rtas_error_disposition(errp);
668 u8 error_type;
669
670 if (!rtas_error_extended(errp))
671 goto out;
672
673 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
674 if (pseries_log == NULL)
675 goto out;
676
677 mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
678 error_type = mce_log->error_type;
679
680#ifdef CONFIG_PPC_BOOK3S_64
681 if (disposition == RTAS_DISP_NOT_RECOVERED) {
682 switch (error_type) {
683 case MC_ERROR_TYPE_SLB:
684 case MC_ERROR_TYPE_ERAT:
685 /*
686 * Store the old slb content in paca before flushing.
687 * Print this when we go to virtual mode.
688 * There are chances that we may hit MCE again if there
689 * is a parity error on the SLB entry we trying to read
690 * for saving. Hence limit the slb saving to single
691 * level of recursion.
692 */
693 if (local_paca->in_mce == 1)
694 slb_save_contents(local_paca->mce_faulty_slbs);
695 flush_and_reload_slb();
696 disposition = RTAS_DISP_FULLY_RECOVERED;
697 rtas_set_disposition_recovered(errp);
698 break;
699 default:
700 break;
701 }
702 }
703#endif
704
705out:
706 return disposition;
707}
708
430/* 709/*
431 * Process MCE rtas errlog event. 710 * Process MCE rtas errlog event.
432 */ 711 */
@@ -452,8 +731,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
452 int recovered = 0; 731 int recovered = 0;
453 int disposition = rtas_error_disposition(err); 732 int disposition = rtas_error_disposition(err);
454 733
734 pseries_print_mce_info(regs, err);
735
455 if (!(regs->msr & MSR_RI)) { 736 if (!(regs->msr & MSR_RI)) {
456 /* If MSR_RI isn't set, we cannot recover */ 737 /* If MSR_RI isn't set, we cannot recover */
738 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
457 recovered = 0; 739 recovered = 0;
458 740
459 } else if (disposition == RTAS_DISP_FULLY_RECOVERED) { 741 } else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
@@ -503,11 +785,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs)
503 struct rtas_error_log *errp; 785 struct rtas_error_log *errp;
504 786
505 if (fwnmi_active) { 787 if (fwnmi_active) {
506 errp = fwnmi_get_errinfo(regs);
507 fwnmi_release_errinfo(); 788 fwnmi_release_errinfo();
789 errp = fwnmi_get_errlog();
508 if (errp && recover_mce(regs, errp)) 790 if (errp && recover_mce(regs, errp))
509 return 1; 791 return 1;
510 } 792 }
511 793
512 return 0; 794 return 0;
513} 795}
796
797long pseries_machine_check_realmode(struct pt_regs *regs)
798{
799 struct rtas_error_log *errp;
800 int disposition;
801
802 if (fwnmi_active) {
803 errp = fwnmi_get_errinfo(regs);
804 /*
805 * Call to fwnmi_release_errinfo() in real mode causes kernel
806 * to panic. Hence we will call it as soon as we go into
807 * virtual mode.
808 */
809 disposition = mce_handle_error(errp);
810 if (disposition == RTAS_DISP_FULLY_RECOVERED)
811 return 1;
812 }
813
814 return 0;
815}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ba1791fd3234..0f553dcfa548 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -107,6 +107,10 @@ static void __init fwnmi_init(void)
107 u8 *mce_data_buf; 107 u8 *mce_data_buf;
108 unsigned int i; 108 unsigned int i;
109 int nr_cpus = num_possible_cpus(); 109 int nr_cpus = num_possible_cpus();
110#ifdef CONFIG_PPC_BOOK3S_64
111 struct slb_entry *slb_ptr;
112 size_t size;
113#endif
110 114
111 int ibm_nmi_register = rtas_token("ibm,nmi-register"); 115 int ibm_nmi_register = rtas_token("ibm,nmi-register");
112 if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) 116 if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
@@ -132,6 +136,15 @@ static void __init fwnmi_init(void)
132 paca_ptrs[i]->mce_data_buf = mce_data_buf + 136 paca_ptrs[i]->mce_data_buf = mce_data_buf +
133 (RTAS_ERROR_LOG_MAX * i); 137 (RTAS_ERROR_LOG_MAX * i);
134 } 138 }
139
140#ifdef CONFIG_PPC_BOOK3S_64
141 /* Allocate per cpu slb area to save old slb contents during MCE */
142 size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
143 slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry),
144 ppc64_rma_size));
145 for_each_possible_cpu(i)
146 paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
147#endif
135} 148}
136 149
137static void pseries_8259_cascade(struct irq_desc *desc) 150static void pseries_8259_cascade(struct irq_desc *desc)
@@ -1017,6 +1030,7 @@ define_machine(pseries) {
1017 .calibrate_decr = generic_calibrate_decr, 1030 .calibrate_decr = generic_calibrate_decr,
1018 .progress = rtas_progress, 1031 .progress = rtas_progress,
1019 .system_reset_exception = pSeries_system_reset_exception, 1032 .system_reset_exception = pSeries_system_reset_exception,
1033 .machine_check_early = pseries_machine_check_realmode,
1020 .machine_check_exception = pSeries_machine_check_exception, 1034 .machine_check_exception = pSeries_machine_check_exception,
1021#ifdef CONFIG_KEXEC_CORE 1035#ifdef CONFIG_KEXEC_CORE
1022 .machine_kexec = pSeries_machine_kexec, 1036 .machine_kexec = pSeries_machine_kexec,
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 49e04ec19238..88f1ad1d6309 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1349,7 +1349,6 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
1349 struct device_node *parent_node; 1349 struct device_node *parent_node;
1350 const __be32 *prop; 1350 const __be32 *prop;
1351 enum vio_dev_family family; 1351 enum vio_dev_family family;
1352 const char *of_node_name = of_node->name ? of_node->name : "<unknown>";
1353 1352
1354 /* 1353 /*
1355 * Determine if this node is a under the /vdevice node or under the 1354 * Determine if this node is a under the /vdevice node or under the
@@ -1362,24 +1361,24 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
1362 else if (!strcmp(parent_node->type, "vdevice")) 1361 else if (!strcmp(parent_node->type, "vdevice"))
1363 family = VDEVICE; 1362 family = VDEVICE;
1364 else { 1363 else {
1365 pr_warn("%s: parent(%pOF) of %s not recognized.\n", 1364 pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n",
1366 __func__, 1365 __func__,
1367 parent_node, 1366 parent_node,
1368 of_node_name); 1367 of_node);
1369 of_node_put(parent_node); 1368 of_node_put(parent_node);
1370 return NULL; 1369 return NULL;
1371 } 1370 }
1372 of_node_put(parent_node); 1371 of_node_put(parent_node);
1373 } else { 1372 } else {
1374 pr_warn("%s: could not determine the parent of node %s.\n", 1373 pr_warn("%s: could not determine the parent of node %pOFn.\n",
1375 __func__, of_node_name); 1374 __func__, of_node);
1376 return NULL; 1375 return NULL;
1377 } 1376 }
1378 1377
1379 if (family == PFO) { 1378 if (family == PFO) {
1380 if (of_get_property(of_node, "interrupt-controller", NULL)) { 1379 if (of_get_property(of_node, "interrupt-controller", NULL)) {
1381 pr_debug("%s: Skipping the interrupt controller %s.\n", 1380 pr_debug("%s: Skipping the interrupt controller %pOFn.\n",
1382 __func__, of_node_name); 1381 __func__, of_node);
1383 return NULL; 1382 return NULL;
1384 } 1383 }
1385 } 1384 }
@@ -1399,15 +1398,15 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
1399 if (of_node->type != NULL) 1398 if (of_node->type != NULL)
1400 viodev->type = of_node->type; 1399 viodev->type = of_node->type;
1401 else { 1400 else {
1402 pr_warn("%s: node %s is missing the 'device_type' " 1401 pr_warn("%s: node %pOFn is missing the 'device_type' "
1403 "property.\n", __func__, of_node_name); 1402 "property.\n", __func__, of_node);
1404 goto out; 1403 goto out;
1405 } 1404 }
1406 1405
1407 prop = of_get_property(of_node, "reg", NULL); 1406 prop = of_get_property(of_node, "reg", NULL);
1408 if (prop == NULL) { 1407 if (prop == NULL) {
1409 pr_warn("%s: node %s missing 'reg'\n", 1408 pr_warn("%s: node %pOFn missing 'reg'\n",
1410 __func__, of_node_name); 1409 __func__, of_node);
1411 goto out; 1410 goto out;
1412 } 1411 }
1413 unit_address = of_read_number(prop, 1); 1412 unit_address = of_read_number(prop, 1);
@@ -1422,8 +1421,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
1422 if (prop != NULL) 1421 if (prop != NULL)
1423 viodev->resource_id = of_read_number(prop, 1); 1422 viodev->resource_id = of_read_number(prop, 1);
1424 1423
1425 dev_set_name(&viodev->dev, "%s", of_node_name); 1424 dev_set_name(&viodev->dev, "%pOFn", of_node);
1426 viodev->type = of_node_name; 1425 viodev->type = dev_name(&viodev->dev);
1427 viodev->irq = 0; 1426 viodev->irq = 0;
1428 } 1427 }
1429 1428
@@ -1694,7 +1693,7 @@ struct vio_dev *vio_find_node(struct device_node *vnode)
1694 snprintf(kobj_name, sizeof(kobj_name), "%x", 1693 snprintf(kobj_name, sizeof(kobj_name), "%x",
1695 (uint32_t)of_read_number(prop, 1)); 1694 (uint32_t)of_read_number(prop, 1));
1696 } else if (!strcmp(dev_type, "ibm,platform-facilities")) 1695 } else if (!strcmp(dev_type, "ibm,platform-facilities"))
1697 snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name); 1696 snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode);
1698 else 1697 else
1699 return NULL; 1698 return NULL;
1700 1699
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index bcef2ac56479..e0dbec780fe9 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -6,19 +6,16 @@
6config PPC4xx_PCI_EXPRESS 6config PPC4xx_PCI_EXPRESS
7 bool 7 bool
8 depends on PCI && 4xx 8 depends on PCI && 4xx
9 default n
10 9
11config PPC4xx_HSTA_MSI 10config PPC4xx_HSTA_MSI
12 bool 11 bool
13 depends on PCI_MSI 12 depends on PCI_MSI
14 depends on PCI && 4xx 13 depends on PCI && 4xx
15 default n
16 14
17config PPC4xx_MSI 15config PPC4xx_MSI
18 bool 16 bool
19 depends on PCI_MSI 17 depends on PCI_MSI
20 depends on PCI && 4xx 18 depends on PCI && 4xx
21 default n
22 19
23config PPC_MSI_BITMAP 20config PPC_MSI_BITMAP
24 bool 21 bool
@@ -37,11 +34,9 @@ config PPC_SCOM
37config SCOM_DEBUGFS 34config SCOM_DEBUGFS
38 bool "Expose SCOM controllers via debugfs" 35 bool "Expose SCOM controllers via debugfs"
39 depends on PPC_SCOM && DEBUG_FS 36 depends on PPC_SCOM && DEBUG_FS
40 default n
41 37
42config GE_FPGA 38config GE_FPGA
43 bool 39 bool
44 default n
45 40
46config FSL_CORENET_RCPM 41config FSL_CORENET_RCPM
47 bool 42 bool
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index f730539074c4..2caa4defdfb6 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -1,5 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
3 2
4ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 3ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
5 4
@@ -56,8 +55,6 @@ obj-$(CONFIG_PPC_SCOM) += scom.o
56 55
57obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o 56obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
58 57
59subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
60
61obj-$(CONFIG_PPC_XICS) += xics/ 58obj-$(CONFIG_PPC_XICS) += xics/
62obj-$(CONFIG_PPC_XIVE) += xive/ 59obj-$(CONFIG_PPC_XIVE) += xive/
63 60
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 5ca3e22d0512..a5b40d1460f1 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -261,7 +261,7 @@ static void allocate_dart(void)
261 * that to work around what looks like a problem with the HT bridge 261 * that to work around what looks like a problem with the HT bridge
262 * prefetching into invalid pages and corrupting data 262 * prefetching into invalid pages and corrupting data
263 */ 263 */
264 tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); 264 tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
265 dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & 265 dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
266 DARTMAP_RPNMASK); 266 DARTMAP_RPNMASK);
267 267
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
index 00ccf3e4fcb4..15cbdd4fde06 100644
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -107,11 +107,11 @@ int __init instantiate_cache_sram(struct platform_device *dev,
107 goto out_free; 107 goto out_free;
108 } 108 }
109 109
110 cache_sram->base_virt = ioremap_prot(cache_sram->base_phys, 110 cache_sram->base_virt = ioremap_coherent(cache_sram->base_phys,
111 cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL); 111 cache_sram->size);
112 if (!cache_sram->base_virt) { 112 if (!cache_sram->base_virt) {
113 dev_err(&dev->dev, "%pOF: ioremap_prot failed\n", 113 dev_err(&dev->dev, "%pOF: ioremap_coherent failed\n",
114 dev->dev.of_node); 114 dev->dev.of_node);
115 ret = -ENOMEM; 115 ret = -ENOMEM;
116 goto out_release; 116 goto out_release;
117 } 117 }
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 535cf1f6941c..6300123ce965 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -846,7 +846,7 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
846 846
847u32 ipic_get_mcp_status(void) 847u32 ipic_get_mcp_status(void)
848{ 848{
849 return ipic_read(primary_ipic->regs, IPIC_SERSR); 849 return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0;
850} 850}
851 851
852void ipic_clear_mcp_status(u32 mask) 852void ipic_clear_mcp_status(u32 mask)
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index e64a411d1a00..d45450f6666a 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -12,7 +12,7 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/kmemleak.h> 13#include <linux/kmemleak.h>
14#include <linux/bitmap.h> 14#include <linux/bitmap.h>
15#include <linux/bootmem.h> 15#include <linux/memblock.h>
16#include <asm/msi_bitmap.h> 16#include <asm/msi_bitmap.h>
17#include <asm/setup.h> 17#include <asm/setup.h>
18 18
@@ -128,7 +128,7 @@ int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
128 if (bmp->bitmap_from_slab) 128 if (bmp->bitmap_from_slab)
129 bmp->bitmap = kzalloc(size, GFP_KERNEL); 129 bmp->bitmap = kzalloc(size, GFP_KERNEL);
130 else { 130 else {
131 bmp->bitmap = memblock_virt_alloc(size, 0); 131 bmp->bitmap = memblock_alloc(size, SMP_CACHE_BYTES);
132 /* the bitmap won't be freed from memblock allocator */ 132 /* the bitmap won't be freed from memblock allocator */
133 kmemleak_not_leak(bmp->bitmap); 133 kmemleak_not_leak(bmp->bitmap);
134 } 134 }
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
index 5d438d92472b..ba1e3117b1c0 100644
--- a/arch/powerpc/sysdev/xics/Makefile
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -1,5 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
3 2
4obj-y += xics-common.o 3obj-y += xics-common.o
5obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o 4obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 70ee976e1de0..785c292d104b 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -1,17 +1,14 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2config PPC_XIVE 2config PPC_XIVE
3 bool 3 bool
4 default n
5 select PPC_SMP_MUXED_IPI 4 select PPC_SMP_MUXED_IPI
6 select HARDIRQS_SW_RESEND 5 select HARDIRQS_SW_RESEND
7 6
8config PPC_XIVE_NATIVE 7config PPC_XIVE_NATIVE
9 bool 8 bool
10 default n
11 select PPC_XIVE 9 select PPC_XIVE
12 depends on PPC_POWERNV 10 depends on PPC_POWERNV
13 11
14config PPC_XIVE_SPAPR 12config PPC_XIVE_SPAPR
15 bool 13 bool
16 default n
17 select PPC_XIVE 14 select PPC_XIVE
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile
index 536d6e5706e3..dea2abc23f4d 100644
--- a/arch/powerpc/sysdev/xive/Makefile
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -1,4 +1,3 @@
1subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
2 1
3obj-y += common.o 2obj-y += common.o
4obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o 3obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 959a2a62f233..9824074ec1b5 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1010,12 +1010,13 @@ static void xive_ipi_eoi(struct irq_data *d)
1010{ 1010{
1011 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1011 struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1012 1012
1013 DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
1014 d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
1015
1016 /* Handle possible race with unplug and drop stale IPIs */ 1013 /* Handle possible race with unplug and drop stale IPIs */
1017 if (!xc) 1014 if (!xc)
1018 return; 1015 return;
1016
1017 DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
1018 d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
1019
1019 xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); 1020 xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data);
1020 xive_do_queue_eoi(xc); 1021 xive_do_queue_eoi(xc);
1021} 1022}
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 5b20a678d755..1ca127d052a6 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -238,20 +238,11 @@ static bool xive_native_match(struct device_node *node)
238#ifdef CONFIG_SMP 238#ifdef CONFIG_SMP
239static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) 239static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
240{ 240{
241 struct device_node *np;
242 unsigned int chip_id;
243 s64 irq; 241 s64 irq;
244 242
245 /* Find the chip ID */
246 np = of_get_cpu_node(cpu, NULL);
247 if (np) {
248 if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
249 chip_id = 0;
250 }
251
252 /* Allocate an IPI and populate info about it */ 243 /* Allocate an IPI and populate info about it */
253 for (;;) { 244 for (;;) {
254 irq = opal_xive_allocate_irq(chip_id); 245 irq = opal_xive_allocate_irq(xc->chip_id);
255 if (irq == OPAL_BUSY) { 246 if (irq == OPAL_BUSY) {
256 msleep(OPAL_BUSY_DELAY_MS); 247 msleep(OPAL_BUSY_DELAY_MS);
257 continue; 248 continue;
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 1bc3abb237cd..69e7fb47bcaa 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -1,14 +1,15 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2# Makefile for xmon 2# Makefile for xmon
3 3
4subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror 4# Disable clang warning for using setjmp without setjmp.h header
5subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
5 6
6GCOV_PROFILE := n 7GCOV_PROFILE := n
7UBSAN_SANITIZE := n 8UBSAN_SANITIZE := n
8 9
9# Disable ftrace for the entire directory 10# Disable ftrace for the entire directory
10ORIG_CFLAGS := $(KBUILD_CFLAGS) 11ORIG_CFLAGS := $(KBUILD_CFLAGS)
11KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))) 12KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
12 13
13ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 14ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
14 15
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 4264aedc7775..36b8dc47a3c3 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2378,25 +2378,33 @@ static void dump_one_paca(int cpu)
2378 DUMP(p, cpu_start, "%#-*x"); 2378 DUMP(p, cpu_start, "%#-*x");
2379 DUMP(p, kexec_state, "%#-*x"); 2379 DUMP(p, kexec_state, "%#-*x");
2380#ifdef CONFIG_PPC_BOOK3S_64 2380#ifdef CONFIG_PPC_BOOK3S_64
2381 for (i = 0; i < SLB_NUM_BOLTED; i++) { 2381 if (!early_radix_enabled()) {
2382 u64 esid, vsid; 2382 for (i = 0; i < SLB_NUM_BOLTED; i++) {
2383 u64 esid, vsid;
2383 2384
2384 if (!p->slb_shadow_ptr) 2385 if (!p->slb_shadow_ptr)
2385 continue; 2386 continue;
2387
2388 esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid);
2389 vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
2386 2390
2387 esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid); 2391 if (esid || vsid) {
2388 vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid); 2392 printf(" %-*s[%d] = 0x%016llx 0x%016llx\n",
2393 22, "slb_shadow", i, esid, vsid);
2394 }
2395 }
2396 DUMP(p, vmalloc_sllp, "%#-*x");
2397 DUMP(p, stab_rr, "%#-*x");
2398 DUMP(p, slb_used_bitmap, "%#-*x");
2399 DUMP(p, slb_kern_bitmap, "%#-*x");
2389 2400
2390 if (esid || vsid) { 2401 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
2391 printf(" %-*s[%d] = 0x%016llx 0x%016llx\n", 2402 DUMP(p, slb_cache_ptr, "%#-*x");
2392 22, "slb_shadow", i, esid, vsid); 2403 for (i = 0; i < SLB_CACHE_ENTRIES; i++)
2404 printf(" %-*s[%d] = 0x%016x\n",
2405 22, "slb_cache", i, p->slb_cache[i]);
2393 } 2406 }
2394 } 2407 }
2395 DUMP(p, vmalloc_sllp, "%#-*x");
2396 DUMP(p, slb_cache_ptr, "%#-*x");
2397 for (i = 0; i < SLB_CACHE_ENTRIES; i++)
2398 printf(" %-*s[%d] = 0x%016x\n",
2399 22, "slb_cache", i, p->slb_cache[i]);
2400 2408
2401 DUMP(p, rfi_flush_fallback_area, "%-*px"); 2409 DUMP(p, rfi_flush_fallback_area, "%-*px");
2402#endif 2410#endif
@@ -2412,7 +2420,9 @@ static void dump_one_paca(int cpu)
2412 DUMP(p, __current, "%-*px"); 2420 DUMP(p, __current, "%-*px");
2413 DUMP(p, kstack, "%#-*llx"); 2421 DUMP(p, kstack, "%#-*llx");
2414 printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1)); 2422 printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1));
2415 DUMP(p, stab_rr, "%#-*llx"); 2423#ifdef CONFIG_STACKPROTECTOR
2424 DUMP(p, canary, "%#-*lx");
2425#endif
2416 DUMP(p, saved_r1, "%#-*llx"); 2426 DUMP(p, saved_r1, "%#-*llx");
2417 DUMP(p, trap_save, "%#-*x"); 2427 DUMP(p, trap_save, "%#-*x");
2418 DUMP(p, irq_soft_mask, "%#-*x"); 2428 DUMP(p, irq_soft_mask, "%#-*x");
@@ -2444,11 +2454,15 @@ static void dump_one_paca(int cpu)
2444 2454
2445 DUMP(p, accounting.utime, "%#-*lx"); 2455 DUMP(p, accounting.utime, "%#-*lx");
2446 DUMP(p, accounting.stime, "%#-*lx"); 2456 DUMP(p, accounting.stime, "%#-*lx");
2457#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
2447 DUMP(p, accounting.utime_scaled, "%#-*lx"); 2458 DUMP(p, accounting.utime_scaled, "%#-*lx");
2459#endif
2448 DUMP(p, accounting.starttime, "%#-*lx"); 2460 DUMP(p, accounting.starttime, "%#-*lx");
2449 DUMP(p, accounting.starttime_user, "%#-*lx"); 2461 DUMP(p, accounting.starttime_user, "%#-*lx");
2462#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
2450 DUMP(p, accounting.startspurr, "%#-*lx"); 2463 DUMP(p, accounting.startspurr, "%#-*lx");
2451 DUMP(p, accounting.utime_sspurr, "%#-*lx"); 2464 DUMP(p, accounting.utime_sspurr, "%#-*lx");
2465#endif
2452 DUMP(p, accounting.steal_time, "%#-*lx"); 2466 DUMP(p, accounting.steal_time, "%#-*lx");
2453#undef DUMP 2467#undef DUMP
2454 2468
@@ -2988,15 +3002,17 @@ static void show_task(struct task_struct *tsk)
2988#ifdef CONFIG_PPC_BOOK3S_64 3002#ifdef CONFIG_PPC_BOOK3S_64
2989void format_pte(void *ptep, unsigned long pte) 3003void format_pte(void *ptep, unsigned long pte)
2990{ 3004{
3005 pte_t entry = __pte(pte);
3006
2991 printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte); 3007 printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte);
2992 printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK); 3008 printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK);
2993 3009
2994 printf("Flags = %s%s%s%s%s\n", 3010 printf("Flags = %s%s%s%s%s\n",
2995 (pte & _PAGE_ACCESSED) ? "Accessed " : "", 3011 pte_young(entry) ? "Accessed " : "",
2996 (pte & _PAGE_DIRTY) ? "Dirty " : "", 3012 pte_dirty(entry) ? "Dirty " : "",
2997 (pte & _PAGE_READ) ? "Read " : "", 3013 pte_read(entry) ? "Read " : "",
2998 (pte & _PAGE_WRITE) ? "Write " : "", 3014 pte_write(entry) ? "Write " : "",
2999 (pte & _PAGE_EXEC) ? "Exec " : ""); 3015 pte_exec(entry) ? "Exec " : "");
3000} 3016}
3001 3017
3002static void show_pte(unsigned long addr) 3018static void show_pte(unsigned long addr)