aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 15:47:46 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 15:47:46 -0500
commit5b0e2cb020085efe202123162502e0b551e49a0e (patch)
tree534bbb4c9f98c2ed9a520e11107029e5df38c3c2
parent758f875848d78148cf9a9cdb3ff1ddf29b234056 (diff)
parent3ffa9d9e2a7c10127d8cbf91ea2be15390b450ed (diff)
Merge tag 'powerpc-4.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "A bit of a small release, I suspect in part due to me travelling for KS. But my backlog of patches to review is smaller than usual, so I think in part folks just didn't send as much this cycle. Non-highlights: - Five fixes for the >128T address space handling, both to fix bugs in our implementation and to bring the semantics exactly into line with x86. Highlights: - Support for a new OPAL call on bare metal machines which gives us a true NMI (ie. is not masked by MSR[EE]=0) for debugging etc. - Support for Power9 DD2 in the CXL driver. - Improvements to machine check handling so that uncorrectable errors can be reported into the generic memory_failure() machinery. - Some fixes and improvements for VPHN, which is used under PowerVM to notify the Linux partition of topology changes. - Plumbing to enable TM (transactional memory) without suspend on some Power9 processors (PPC_FEATURE2_HTM_NO_SUSPEND). - Support for emulating vector loads form cache-inhibited memory, on some Power9 revisions. - Disable the fast-endian switch "syscall" by default (behind a CONFIG), we believe it has never had any users. - A major rework of the API drivers use when initiating and waiting for long running operations performed by OPAL firmware, and changes to the powernv_flash driver to use the new API. - Several fixes for the handling of FP/VMX/VSX while processes are using transactional memory. - Optimisations of TLB range flushes when using the radix MMU on Power9. - Improvements to the VAS facility used to access coprocessors on Power9, and related improvements to the way the NX crypto driver handles requests. - Implementation of PMEM_API and UACCESS_FLUSHCACHE for 64-bit. Thanks to: Alexey Kardashevskiy, Alistair Popple, Allen Pais, Andrew Donnellan, Aneesh Kumar K.V, Arnd Bergmann, Balbir Singh, Benjamin Herrenschmidt, Breno Leitao, Christophe Leroy, Christophe Lombard, Cyril Bur, Frederic Barrat, Gautham R. Shenoy, Geert Uytterhoeven, Guilherme G. Piccoli, Gustavo Romero, Haren Myneni, Joel Stanley, Kamalesh Babulal, Kautuk Consul, Markus Elfring, Masami Hiramatsu, Michael Bringmann, Michael Neuling, Michal Suchanek, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Paul Mackerras, Pedro Miraglia Franco de Carvalho, Philippe Bergheaud, Sandipan Das, Seth Forshee, Shriya, Stephen Rothwell, Stewart Smith, Sukadev Bhattiprolu, Tyrel Datwyler, Vaibhav Jain, Vaidyanathan Srinivasan, and William A. Kennington III" * tag 'powerpc-4.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (151 commits) powerpc/64s: Fix Power9 DD2.0 workarounds by adding DD2.1 feature powerpc/64s: Fix masking of SRR1 bits on instruction fault powerpc/64s: mm_context.addr_limit is only used on hash powerpc/64s/radix: Fix 128TB-512TB virtual address boundary case allocation powerpc/64s/hash: Allow MAP_FIXED allocations to cross 128TB boundary powerpc/64s/hash: Fix fork() with 512TB process address space powerpc/64s/hash: Fix 128TB-512TB virtual address boundary case allocation powerpc/64s/hash: Fix 512T hint detection to use >= 128T powerpc: Fix DABR match on hash based systems powerpc/signal: Properly handle return value from uprobe_deny_signal() powerpc/fadump: use kstrtoint to handle sysfs store powerpc/lib: Implement UACCESS_FLUSHCACHE API powerpc/lib: Implement PMEM API powerpc/powernv/npu: Don't explicitly flush nmmu tlb powerpc/powernv/npu: Use flush_all_mm() instead of flush_tlb_mm() powerpc/powernv/idle: Round up latency and residency values powerpc/kprobes: refactor kprobe_lookup_name for safer string operations powerpc/kprobes: Blacklist emulate_update_regs() from kprobes powerpc/kprobes: Do not disable interrupts for optprobes and kprobes_on_ftrace powerpc/kprobes: Disable preemption before invoking probe handler for optprobes ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--arch/powerpc/Kconfig8
-rw-r--r--arch/powerpc/Kconfig.debug6
-rw-r--r--arch/powerpc/boot/dts/acadia.dts2
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig232
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h22
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h15
-rw-r--r--arch/powerpc/include/asm/cputable.h12
-rw-r--r--arch/powerpc/include/asm/eeh.h10
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h4
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h12
-rw-r--r--arch/powerpc/include/asm/exception-64s.h5
-rw-r--r--arch/powerpc/include/asm/hugetlb.h6
-rw-r--r--arch/powerpc/include/asm/hw_irq.h1
-rw-r--r--arch/powerpc/include/asm/kprobes.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h4
-rw-r--r--arch/powerpc/include/asm/mce.h4
-rw-r--r--arch/powerpc/include/asm/mmu_context.h50
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/opal-api.h3
-rw-r--r--arch/powerpc/include/asm/opal.h6
-rw-r--r--arch/powerpc/include/asm/paca.h13
-rw-r--r--arch/powerpc/include/asm/page_64.h6
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h4
-rw-r--r--arch/powerpc/include/asm/powernv.h4
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h27
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/string.h2
-rw-r--r--arch/powerpc/include/asm/switch_to.h5
-rw-r--r--arch/powerpc/include/asm/tlbflush.h2
-rw-r--r--arch/powerpc/include/asm/tm.h7
-rw-r--r--arch/powerpc/include/asm/topology.h8
-rw-r--r--arch/powerpc/include/asm/uaccess.h22
-rw-r--r--arch/powerpc/include/asm/vas.h21
-rw-r--r--arch/powerpc/include/uapi/asm/cputable.h1
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c6
-rw-r--r--arch/powerpc/kernel/cputable.c24
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c4
-rw-r--r--arch/powerpc/kernel/eeh.c46
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c8
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S49
-rw-r--r--arch/powerpc/kernel/fadump.c17
-rw-r--r--arch/powerpc/kernel/head_32.S2
-rw-r--r--arch/powerpc/kernel/head_64.S16
-rw-r--r--arch/powerpc/kernel/idle_book3s.S70
-rw-r--r--arch/powerpc/kernel/irq.c51
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c34
-rw-r--r--arch/powerpc/kernel/kprobes.c92
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c4
-rw-r--r--arch/powerpc/kernel/mce.c147
-rw-r--r--arch/powerpc/kernel/mce_power.c115
-rw-r--r--arch/powerpc/kernel/module_64.c3
-rw-r--r--arch/powerpc/kernel/optprobes.c15
-rw-r--r--arch/powerpc/kernel/paca.c16
-rw-r--r--arch/powerpc/kernel/pci_64.c4
-rw-r--r--arch/powerpc/kernel/process.c225
-rw-r--r--arch/powerpc/kernel/prom.c37
-rw-r--r--arch/powerpc/kernel/setup-common.c7
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_64.c19
-rw-r--r--arch/powerpc/kernel/signal.c2
-rw-r--r--arch/powerpc/kernel/signal_32.c6
-rw-r--r--arch/powerpc/kernel/signal_64.c7
-rw-r--r--arch/powerpc/kernel/sysfs.c11
-rw-r--r--arch/powerpc/kernel/tau_6xx.c3
-rw-r--r--arch/powerpc/kernel/tm.S59
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S4
-rw-r--r--arch/powerpc/kernel/traps.c256
-rw-r--r--arch/powerpc/kernel/watchdog.c29
-rw-r--r--arch/powerpc/kvm/book3s_hv.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S8
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/pmem.c67
-rw-r--r--arch/powerpc/lib/sstep.c20
-rw-r--r--arch/powerpc/mm/Makefile6
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c10
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-radix.c20
-rw-r--r--arch/powerpc/mm/init_64.c21
-rw-r--r--arch/powerpc/mm/mmap.c49
-rw-r--r--arch/powerpc/mm/mmu_context.c9
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c33
-rw-r--r--arch/powerpc/mm/numa.c63
-rw-r--r--arch/powerpc/mm/pgtable-radix.c10
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/mm/slb_low.S6
-rw-r--r--arch/powerpc/mm/slice.c62
-rw-r--r--arch/powerpc/mm/tlb-radix.c347
-rw-r--r--arch/powerpc/net/bpf_jit64.h7
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c16
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c8
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype19
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile3
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c42
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c28
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c180
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c17
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.h4
-rw-r--r--arch/powerpc/platforms/powernv/setup.c26
-rw-r--r--arch/powerpc/platforms/powernv/smp.c59
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c209
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c242
-rw-r--r--arch/powerpc/platforms/powernv/vas.c31
-rw-r--r--arch/powerpc/platforms/powernv/vas.h93
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c19
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c8
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c2
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
-rw-r--r--arch/powerpc/sysdev/axonram.c2
-rw-r--r--arch/powerpc/sysdev/ipic.c4
-rw-r--r--arch/powerpc/xmon/xmon.c169
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c4
-rw-r--r--drivers/crypto/nx/nx-842-powernv.c167
-rw-r--r--drivers/crypto/nx/nx-842.c2
-rw-r--r--drivers/misc/cxl/api.c16
-rw-r--r--drivers/misc/cxl/context.c3
-rw-r--r--drivers/misc/cxl/cxl.h22
-rw-r--r--drivers/misc/cxl/debugfs.c29
-rw-r--r--drivers/misc/cxl/fault.c15
-rw-r--r--drivers/misc/cxl/file.c24
-rw-r--r--drivers/misc/cxl/native.c27
-rw-r--r--drivers/misc/cxl/pci.c88
-rw-r--r--drivers/mtd/devices/powernv_flash.c83
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c17
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c6
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c371
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h5
150 files changed, 3696 insertions, 1144 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5ab1089d1422..62436bd5f34a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3204,6 +3204,10 @@
3204 allowed (eg kernel_enable_fpu()/kernel_disable_fpu()). 3204 allowed (eg kernel_enable_fpu()/kernel_disable_fpu()).
3205 There is some performance impact when enabling this. 3205 There is some performance impact when enabling this.
3206 3206
3207 ppc_tm= [PPC]
3208 Format: {"off"}
3209 Disable Hardware Transactional Memory
3210
3207 print-fatal-signals= 3211 print-fatal-signals=
3208 [KNL] debug: print fatal signals 3212 [KNL] debug: print fatal signals
3209 3213
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cb782ac1c35d..c51e6ce42e7a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,9 +139,11 @@ config PPC
139 select ARCH_HAS_ELF_RANDOMIZE 139 select ARCH_HAS_ELF_RANDOMIZE
140 select ARCH_HAS_FORTIFY_SOURCE 140 select ARCH_HAS_FORTIFY_SOURCE
141 select ARCH_HAS_GCOV_PROFILE_ALL 141 select ARCH_HAS_GCOV_PROFILE_ALL
142 select ARCH_HAS_PMEM_API if PPC64
142 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE 143 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
143 select ARCH_HAS_SG_CHAIN 144 select ARCH_HAS_SG_CHAIN
144 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 145 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
146 select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
145 select ARCH_HAS_UBSAN_SANITIZE_ALL 147 select ARCH_HAS_UBSAN_SANITIZE_ALL
146 select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64 148 select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
147 select ARCH_HAVE_NMI_SAFE_CMPXCHG 149 select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -335,7 +337,7 @@ config PPC_OF_PLATFORM_PCI
335 default n 337 default n
336 338
337config ARCH_SUPPORTS_DEBUG_PAGEALLOC 339config ARCH_SUPPORTS_DEBUG_PAGEALLOC
338 depends on PPC32 || PPC_STD_MMU_64 340 depends on PPC32 || PPC_BOOK3S_64
339 def_bool y 341 def_bool y
340 342
341config ARCH_SUPPORTS_UPROBES 343config ARCH_SUPPORTS_UPROBES
@@ -722,7 +724,7 @@ config PPC_16K_PAGES
722 724
723config PPC_64K_PAGES 725config PPC_64K_PAGES
724 bool "64k page size" 726 bool "64k page size"
725 depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64) 727 depends on !PPC_FSL_BOOK3E && (44x || PPC_BOOK3S_64 || PPC_BOOK3E_64)
726 select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 728 select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
727 729
728config PPC_256K_PAGES 730config PPC_256K_PAGES
@@ -781,7 +783,7 @@ config FORCE_MAX_ZONEORDER
781 783
782config PPC_SUBPAGE_PROT 784config PPC_SUBPAGE_PROT
783 bool "Support setting protections for 4k subpages" 785 bool "Support setting protections for 4k subpages"
784 depends on PPC_STD_MMU_64 && PPC_64K_PAGES 786 depends on PPC_BOOK3S_64 && PPC_64K_PAGES
785 help 787 help
786 This option adds support for a system call to allow user programs 788 This option adds support for a system call to allow user programs
787 to set access permissions (read/write, readonly, or no access) 789 to set access permissions (read/write, readonly, or no access)
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index be1c8c5beb61..657c33cd4eee 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -370,4 +370,10 @@ config PPC_HTDUMP
370 def_bool y 370 def_bool y
371 depends on PPC_PTDUMP && PPC_BOOK3S 371 depends on PPC_PTDUMP && PPC_BOOK3S
372 372
373config PPC_FAST_ENDIAN_SWITCH
374 bool "Deprecated fast endian-switch syscall"
375 depends on DEBUG_KERNEL && PPC_BOOK3S_64
376 help
377 If you're unsure what this is, say N.
378
373endmenu 379endmenu
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
index 57291f61ffe7..86266159521e 100644
--- a/arch/powerpc/boot/dts/acadia.dts
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -183,7 +183,7 @@
183 usb@ef603000 { 183 usb@ef603000 {
184 compatible = "ohci-be"; 184 compatible = "ohci-be";
185 reg = <0xef603000 0x80>; 185 reg = <0xef603000 0x80>;
186 interrupts-parent = <&UIC0>; 186 interrupt-parent = <&UIC0>;
187 interrupts = <0xd 0x4 0xe 0x4>; 187 interrupts = <0xd 0x4 0xe 0x4>;
188 }; 188 };
189 189
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index caee834760d2..4891bbed6258 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -192,6 +192,7 @@ CONFIG_IPMI_DEVICE_INTERFACE=y
192CONFIG_IPMI_POWERNV=y 192CONFIG_IPMI_POWERNV=y
193CONFIG_RAW_DRIVER=y 193CONFIG_RAW_DRIVER=y
194CONFIG_MAX_RAW_DEVS=1024 194CONFIG_MAX_RAW_DEVS=1024
195CONFIG_I2C_CHARDEV=y
195CONFIG_DRM=y 196CONFIG_DRM=y
196CONFIG_DRM_AST=y 197CONFIG_DRM_AST=y
197CONFIG_FIRMWARE_EDID=y 198CONFIG_FIRMWARE_EDID=y
@@ -295,6 +296,7 @@ CONFIG_FUNCTION_GRAPH_TRACER=y
295CONFIG_SCHED_TRACER=y 296CONFIG_SCHED_TRACER=y
296CONFIG_FTRACE_SYSCALLS=y 297CONFIG_FTRACE_SYSCALLS=y
297CONFIG_BLK_DEV_IO_TRACE=y 298CONFIG_BLK_DEV_IO_TRACE=y
299CONFIG_PPC_EMULATED_STATS=y
298CONFIG_CODE_PATCHING_SELFTEST=y 300CONFIG_CODE_PATCHING_SELFTEST=y
299CONFIG_FTR_FIXUP_SELFTEST=y 301CONFIG_FTR_FIXUP_SELFTEST=y
300CONFIG_MSI_BITMAP_SELFTEST=y 302CONFIG_MSI_BITMAP_SELFTEST=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 3d935969e5a2..bde2cd1005a2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -193,6 +193,7 @@ CONFIG_VIRTIO_CONSOLE=m
193CONFIG_IBM_BSR=m 193CONFIG_IBM_BSR=m
194CONFIG_RAW_DRIVER=y 194CONFIG_RAW_DRIVER=y
195CONFIG_MAX_RAW_DEVS=1024 195CONFIG_MAX_RAW_DEVS=1024
196CONFIG_I2C_CHARDEV=y
196CONFIG_FB=y 197CONFIG_FB=y
197CONFIG_FIRMWARE_EDID=y 198CONFIG_FIRMWARE_EDID=y
198CONFIG_FB_OF=y 199CONFIG_FB_OF=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
new file mode 100644
index 000000000000..6bd5e7261335
--- /dev/null
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -0,0 +1,232 @@
1CONFIG_PPC64=y
2CONFIG_ALTIVEC=y
3CONFIG_VSX=y
4CONFIG_NR_CPUS=2048
5CONFIG_CPU_LITTLE_ENDIAN=y
6# CONFIG_SWAP is not set
7CONFIG_SYSVIPC=y
8CONFIG_POSIX_MQUEUE=y
9# CONFIG_CROSS_MEMORY_ATTACH is not set
10CONFIG_NO_HZ=y
11CONFIG_HIGH_RES_TIMERS=y
12CONFIG_TASKSTATS=y
13CONFIG_TASK_DELAY_ACCT=y
14CONFIG_TASK_XACCT=y
15CONFIG_TASK_IO_ACCOUNTING=y
16CONFIG_IKCONFIG=y
17CONFIG_IKCONFIG_PROC=y
18CONFIG_LOG_BUF_SHIFT=20
19CONFIG_RELAY=y
20CONFIG_BLK_DEV_INITRD=y
21# CONFIG_RD_GZIP is not set
22# CONFIG_RD_BZIP2 is not set
23# CONFIG_RD_LZMA is not set
24# CONFIG_RD_LZO is not set
25# CONFIG_RD_LZ4 is not set
26CONFIG_CC_OPTIMIZE_FOR_SIZE=y
27CONFIG_PERF_EVENTS=y
28# CONFIG_COMPAT_BRK is not set
29CONFIG_JUMP_LABEL=y
30CONFIG_STRICT_KERNEL_RWX=y
31CONFIG_MODULES=y
32CONFIG_MODULE_UNLOAD=y
33CONFIG_MODULE_SIG=y
34CONFIG_MODULE_SIG_FORCE=y
35CONFIG_MODULE_SIG_SHA512=y
36CONFIG_PARTITION_ADVANCED=y
37# CONFIG_IOSCHED_DEADLINE is not set
38# CONFIG_PPC_PSERIES is not set
39CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
40CONFIG_CPU_IDLE=y
41CONFIG_HZ_100=y
42CONFIG_KEXEC=y
43CONFIG_IRQ_ALL_CPUS=y
44CONFIG_NUMA=y
45# CONFIG_COMPACTION is not set
46# CONFIG_MIGRATION is not set
47# CONFIG_BOUNCE is not set
48CONFIG_PPC_64K_PAGES=y
49CONFIG_SCHED_SMT=y
50CONFIG_CMDLINE_BOOL=y
51CONFIG_CMDLINE="console=tty0 console=hvc0 powersave=off"
52# CONFIG_SECCOMP is not set
53CONFIG_NET=y
54CONFIG_PACKET=y
55CONFIG_UNIX=y
56CONFIG_INET=y
57CONFIG_IP_MULTICAST=y
58CONFIG_NET_IPIP=y
59CONFIG_SYN_COOKIES=y
60# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
61# CONFIG_INET_XFRM_MODE_TUNNEL is not set
62# CONFIG_INET_XFRM_MODE_BEET is not set
63# CONFIG_IPV6 is not set
64CONFIG_DNS_RESOLVER=y
65# CONFIG_WIRELESS is not set
66CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
67CONFIG_DEVTMPFS=y
68CONFIG_DEVTMPFS_MOUNT=y
69CONFIG_MTD=m
70CONFIG_MTD_POWERNV_FLASH=m
71CONFIG_BLK_DEV_LOOP=y
72CONFIG_BLK_DEV_RAM=y
73CONFIG_BLK_DEV_RAM_SIZE=65536
74CONFIG_VIRTIO_BLK=m
75CONFIG_BLK_DEV_NVME=m
76CONFIG_EEPROM_AT24=y
77# CONFIG_CXL is not set
78CONFIG_BLK_DEV_SD=m
79CONFIG_BLK_DEV_SR=m
80CONFIG_BLK_DEV_SR_VENDOR=y
81CONFIG_CHR_DEV_SG=m
82CONFIG_SCSI_CONSTANTS=y
83CONFIG_SCSI_SCAN_ASYNC=y
84CONFIG_SCSI_FC_ATTRS=y
85CONFIG_SCSI_CXGB3_ISCSI=m
86CONFIG_SCSI_CXGB4_ISCSI=m
87CONFIG_SCSI_BNX2_ISCSI=m
88CONFIG_BE2ISCSI=m
89CONFIG_SCSI_AACRAID=m
90CONFIG_MEGARAID_NEWGEN=y
91CONFIG_MEGARAID_MM=m
92CONFIG_MEGARAID_MAILBOX=m
93CONFIG_MEGARAID_SAS=m
94CONFIG_SCSI_MPT2SAS=m
95CONFIG_SCSI_IPR=m
96# CONFIG_SCSI_IPR_TRACE is not set
97# CONFIG_SCSI_IPR_DUMP is not set
98CONFIG_SCSI_QLA_FC=m
99CONFIG_SCSI_QLA_ISCSI=m
100CONFIG_SCSI_LPFC=m
101CONFIG_SCSI_VIRTIO=m
102CONFIG_SCSI_DH=y
103CONFIG_SCSI_DH_ALUA=m
104CONFIG_ATA=y
105CONFIG_SATA_AHCI=y
106# CONFIG_ATA_SFF is not set
107CONFIG_MD=y
108CONFIG_BLK_DEV_MD=m
109CONFIG_MD_LINEAR=m
110CONFIG_MD_RAID0=m
111CONFIG_MD_RAID1=m
112CONFIG_MD_RAID10=m
113CONFIG_MD_RAID456=m
114CONFIG_MD_MULTIPATH=m
115CONFIG_MD_FAULTY=m
116CONFIG_BLK_DEV_DM=m
117CONFIG_DM_CRYPT=m
118CONFIG_DM_SNAPSHOT=m
119CONFIG_DM_MIRROR=m
120CONFIG_DM_ZERO=m
121CONFIG_DM_MULTIPATH=m
122CONFIG_ACENIC=m
123CONFIG_ACENIC_OMIT_TIGON_I=y
124CONFIG_TIGON3=y
125CONFIG_BNX2X=m
126CONFIG_CHELSIO_T1=y
127CONFIG_BE2NET=m
128CONFIG_S2IO=m
129CONFIG_E100=m
130CONFIG_E1000=m
131CONFIG_E1000E=m
132CONFIG_IXGB=m
133CONFIG_IXGBE=m
134CONFIG_MLX4_EN=m
135CONFIG_MLX5_CORE=m
136CONFIG_MLX5_CORE_EN=y
137CONFIG_MYRI10GE=m
138CONFIG_QLGE=m
139CONFIG_NETXEN_NIC=m
140CONFIG_SFC=m
141# CONFIG_USB_NET_DRIVERS is not set
142# CONFIG_WLAN is not set
143CONFIG_INPUT_EVDEV=y
144CONFIG_INPUT_MISC=y
145# CONFIG_SERIO_SERPORT is not set
146# CONFIG_DEVMEM is not set
147CONFIG_SERIAL_8250=y
148CONFIG_SERIAL_8250_CONSOLE=y
149CONFIG_IPMI_HANDLER=y
150CONFIG_IPMI_DEVICE_INTERFACE=y
151CONFIG_IPMI_POWERNV=y
152CONFIG_HW_RANDOM=y
153CONFIG_TCG_TIS_I2C_NUVOTON=y
154# CONFIG_I2C_COMPAT is not set
155CONFIG_I2C_CHARDEV=y
156# CONFIG_I2C_HELPER_AUTO is not set
157CONFIG_DRM=y
158CONFIG_DRM_RADEON=y
159CONFIG_DRM_AST=m
160CONFIG_FIRMWARE_EDID=y
161CONFIG_FB_MODE_HELPERS=y
162CONFIG_FB_OF=y
163CONFIG_FB_MATROX=y
164CONFIG_FB_MATROX_MILLENIUM=y
165CONFIG_FB_MATROX_MYSTIQUE=y
166CONFIG_FB_MATROX_G=y
167# CONFIG_LCD_CLASS_DEVICE is not set
168# CONFIG_BACKLIGHT_GENERIC is not set
169# CONFIG_VGA_CONSOLE is not set
170CONFIG_LOGO=y
171# CONFIG_LOGO_LINUX_MONO is not set
172# CONFIG_LOGO_LINUX_VGA16 is not set
173CONFIG_USB_HIDDEV=y
174CONFIG_USB=y
175CONFIG_USB_MON=y
176CONFIG_USB_XHCI_HCD=y
177CONFIG_USB_EHCI_HCD=y
178# CONFIG_USB_EHCI_HCD_PPC_OF is not set
179CONFIG_USB_OHCI_HCD=y
180CONFIG_USB_STORAGE=y
181CONFIG_RTC_CLASS=y
182CONFIG_RTC_DRV_GENERIC=m
183CONFIG_VIRT_DRIVERS=y
184CONFIG_VIRTIO_PCI=y
185# CONFIG_IOMMU_SUPPORT is not set
186CONFIG_EXT4_FS=m
187CONFIG_EXT4_FS_POSIX_ACL=y
188CONFIG_EXT4_FS_SECURITY=y
189CONFIG_XFS_FS=m
190CONFIG_XFS_POSIX_ACL=y
191CONFIG_BTRFS_FS=m
192CONFIG_BTRFS_FS_POSIX_ACL=y
193CONFIG_ISO9660_FS=m
194CONFIG_UDF_FS=m
195CONFIG_MSDOS_FS=m
196CONFIG_VFAT_FS=m
197CONFIG_PROC_KCORE=y
198CONFIG_TMPFS=y
199CONFIG_TMPFS_POSIX_ACL=y
200# CONFIG_MISC_FILESYSTEMS is not set
201# CONFIG_NETWORK_FILESYSTEMS is not set
202CONFIG_NLS_DEFAULT="utf8"
203CONFIG_NLS_CODEPAGE_437=y
204CONFIG_NLS_ASCII=y
205CONFIG_NLS_ISO8859_1=y
206CONFIG_NLS_UTF8=y
207CONFIG_CRC16=y
208CONFIG_CRC_ITU_T=y
209CONFIG_LIBCRC32C=y
210CONFIG_PRINTK_TIME=y
211CONFIG_MAGIC_SYSRQ=y
212CONFIG_DEBUG_KERNEL=y
213CONFIG_DEBUG_STACKOVERFLOW=y
214CONFIG_SOFTLOCKUP_DETECTOR=y
215CONFIG_HARDLOCKUP_DETECTOR=y
216CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
217CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
218CONFIG_WQ_WATCHDOG=y
219CONFIG_SCHEDSTATS=y
220# CONFIG_FTRACE is not set
221CONFIG_XMON=y
222CONFIG_XMON_DEFAULT=y
223CONFIG_SECURITY=y
224CONFIG_IMA=y
225CONFIG_EVM=y
226# CONFIG_CRYPTO_ECHAINIV is not set
227CONFIG_CRYPTO_ECB=y
228CONFIG_CRYPTO_CMAC=y
229CONFIG_CRYPTO_MD4=y
230CONFIG_CRYPTO_ARC4=y
231CONFIG_CRYPTO_DES=y
232# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 508275bb05d5..e91e115a816f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -606,7 +606,7 @@ extern void slb_set_size(u16 size);
606 606
607/* 4 bits per slice and we have one slice per 1TB */ 607/* 4 bits per slice and we have one slice per 1TB */
608#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) 608#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
609#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.addr_limit >> 41) 609#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41)
610 610
611#ifndef __ASSEMBLY__ 611#ifndef __ASSEMBLY__
612 612
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 37fdede5a24c..c9448e19847a 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -93,7 +93,7 @@ typedef struct {
93#ifdef CONFIG_PPC_MM_SLICES 93#ifdef CONFIG_PPC_MM_SLICES
94 u64 low_slices_psize; /* SLB page size encodings */ 94 u64 low_slices_psize; /* SLB page size encodings */
95 unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; 95 unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
96 unsigned long addr_limit; 96 unsigned long slb_addr_limit;
97#else 97#else
98 u16 sllp; /* SLB page size encoding */ 98 u16 sllp; /* SLB page size encoding */
99#endif 99#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 42178897a050..849ecaae9e79 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -66,6 +66,28 @@ static inline void hash__flush_tlb_mm(struct mm_struct *mm)
66{ 66{
67} 67}
68 68
69static inline void hash__local_flush_all_mm(struct mm_struct *mm)
70{
71 /*
72 * There's no Page Walk Cache for hash, so what is needed is
73 * the same as flush_tlb_mm(), which doesn't really make sense
74 * with hash. So the only thing we could do is flush the
75 * entire LPID! Punt for now, as it's not being used.
76 */
77 WARN_ON_ONCE(1);
78}
79
80static inline void hash__flush_all_mm(struct mm_struct *mm)
81{
82 /*
83 * There's no Page Walk Cache for hash, so what is needed is
84 * the same as flush_tlb_mm(), which doesn't really make sense
85 * with hash. So the only thing we could do is flush the
86 * entire LPID! Punt for now, as it's not being used.
87 */
88 WARN_ON_ONCE(1);
89}
90
69static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma, 91static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma,
70 unsigned long vmaddr) 92 unsigned long vmaddr)
71{ 93{
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index c2115dfcef0c..6a9e68003387 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -22,17 +22,20 @@ extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long sta
22extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end); 22extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
23 23
24extern void radix__local_flush_tlb_mm(struct mm_struct *mm); 24extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
25extern void radix__local_flush_all_mm(struct mm_struct *mm);
25extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); 26extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
26extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 27extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
27 int psize); 28 int psize);
28extern void radix__tlb_flush(struct mmu_gather *tlb); 29extern void radix__tlb_flush(struct mmu_gather *tlb);
29#ifdef CONFIG_SMP 30#ifdef CONFIG_SMP
30extern void radix__flush_tlb_mm(struct mm_struct *mm); 31extern void radix__flush_tlb_mm(struct mm_struct *mm);
32extern void radix__flush_all_mm(struct mm_struct *mm);
31extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); 33extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
32extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 34extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
33 int psize); 35 int psize);
34#else 36#else
35#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) 37#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm)
38#define radix__flush_all_mm(mm) radix__local_flush_all_mm(mm)
36#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) 39#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr)
37#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) 40#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
38#endif 41#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index fcffddbb3102..58b576f654b3 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -58,6 +58,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
58 return hash__local_flush_tlb_page(vma, vmaddr); 58 return hash__local_flush_tlb_page(vma, vmaddr);
59} 59}
60 60
61static inline void local_flush_all_mm(struct mm_struct *mm)
62{
63 if (radix_enabled())
64 return radix__local_flush_all_mm(mm);
65 return hash__local_flush_all_mm(mm);
66}
67
61static inline void tlb_flush(struct mmu_gather *tlb) 68static inline void tlb_flush(struct mmu_gather *tlb)
62{ 69{
63 if (radix_enabled()) 70 if (radix_enabled())
@@ -80,9 +87,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
80 return radix__flush_tlb_page(vma, vmaddr); 87 return radix__flush_tlb_page(vma, vmaddr);
81 return hash__flush_tlb_page(vma, vmaddr); 88 return hash__flush_tlb_page(vma, vmaddr);
82} 89}
90
91static inline void flush_all_mm(struct mm_struct *mm)
92{
93 if (radix_enabled())
94 return radix__flush_all_mm(mm);
95 return hash__flush_all_mm(mm);
96}
83#else 97#else
84#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) 98#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
85#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) 99#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
100#define flush_all_mm(mm) local_flush_all_mm(mm)
86#endif /* CONFIG_SMP */ 101#endif /* CONFIG_SMP */
87/* 102/*
88 * flush the page walk cache for the address 103 * flush the page walk cache for the address
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 53b31c2bcdf4..0546663a98db 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -207,7 +207,7 @@ enum {
207#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000) 207#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
208#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000) 208#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
209#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000) 209#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
210#define CPU_FTR_ICSWX LONG_ASM_CONST(0x0020000000000000) 210/* Free LONG_ASM_CONST(0x0020000000000000) */
211#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000) 211#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
212#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000) 212#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
213#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) 213#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
@@ -216,6 +216,7 @@ enum {
216#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) 216#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
217#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) 217#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
218#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) 218#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
219#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000)
219 220
220#ifndef __ASSEMBLY__ 221#ifndef __ASSEMBLY__
221 222
@@ -452,7 +453,7 @@ enum {
452 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ 453 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
453 CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ 454 CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
454 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 455 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
455 CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | \ 456 CPU_FTR_CFAR | CPU_FTR_HVMODE | \
456 CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX) 457 CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
457#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 458#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
458 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ 459 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
@@ -461,7 +462,7 @@ enum {
461 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ 462 CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
462 CPU_FTR_DSCR | CPU_FTR_SAO | \ 463 CPU_FTR_DSCR | CPU_FTR_SAO | \
463 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 464 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
464 CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ 465 CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
465 CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ 466 CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
466 CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) 467 CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
467#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) 468#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
@@ -478,6 +479,8 @@ enum {
478 CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) 479 CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
479#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ 480#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
480 (~CPU_FTR_SAO)) 481 (~CPU_FTR_SAO))
482#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
483#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
481#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 484#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
482 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 485 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
483 CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ 486 CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -496,7 +499,8 @@ enum {
496 (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ 499 (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
497 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ 500 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
498 CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ 501 CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
499 CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1) 502 CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
503 CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1)
500#endif 504#endif
501#else 505#else
502enum { 506enum {
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9847ae3a12d1..5161c37dd039 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -93,7 +93,7 @@ struct eeh_pe {
93 struct pci_bus *bus; /* Top PCI bus for bus PE */ 93 struct pci_bus *bus; /* Top PCI bus for bus PE */
94 int check_count; /* Times of ignored error */ 94 int check_count; /* Times of ignored error */
95 int freeze_count; /* Times of froze up */ 95 int freeze_count; /* Times of froze up */
96 struct timeval tstamp; /* Time on first-time freeze */ 96 time64_t tstamp; /* Time on first-time freeze */
97 int false_positives; /* Times of reported #ff's */ 97 int false_positives; /* Times of reported #ff's */
98 atomic_t pass_dev_cnt; /* Count of passed through devs */ 98 atomic_t pass_dev_cnt; /* Count of passed through devs */
99 struct eeh_pe *parent; /* Parent PE */ 99 struct eeh_pe *parent; /* Parent PE */
@@ -200,7 +200,6 @@ enum {
200struct eeh_ops { 200struct eeh_ops {
201 char *name; 201 char *name;
202 int (*init)(void); 202 int (*init)(void);
203 int (*post_init)(void);
204 void* (*probe)(struct pci_dn *pdn, void *data); 203 void* (*probe)(struct pci_dn *pdn, void *data);
205 int (*set_option)(struct eeh_pe *pe, int option); 204 int (*set_option)(struct eeh_pe *pe, int option);
206 int (*get_pe_addr)(struct eeh_pe *pe); 205 int (*get_pe_addr)(struct eeh_pe *pe);
@@ -275,7 +274,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
275 274
276struct eeh_dev *eeh_dev_init(struct pci_dn *pdn); 275struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
277void eeh_dev_phb_init_dynamic(struct pci_controller *phb); 276void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
278int eeh_init(void); 277void eeh_probe_devices(void);
279int __init eeh_ops_register(struct eeh_ops *ops); 278int __init eeh_ops_register(struct eeh_ops *ops);
280int __exit eeh_ops_unregister(const char *name); 279int __exit eeh_ops_unregister(const char *name);
281int eeh_check_failure(const volatile void __iomem *token); 280int eeh_check_failure(const volatile void __iomem *token);
@@ -321,10 +320,7 @@ static inline bool eeh_enabled(void)
321 return false; 320 return false;
322} 321}
323 322
324static inline int eeh_init(void) 323static inline void eeh_probe_devices(void) { }
325{
326 return 0;
327}
328 324
329static inline void *eeh_dev_init(struct pci_dn *pdn, void *data) 325static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
330{ 326{
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f00e10e2a335..651e1354498e 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -55,6 +55,10 @@ extern struct ppc_emulated {
55 struct ppc_emulated_entry mfdscr; 55 struct ppc_emulated_entry mfdscr;
56 struct ppc_emulated_entry mtdscr; 56 struct ppc_emulated_entry mtdscr;
57 struct ppc_emulated_entry lq_stq; 57 struct ppc_emulated_entry lq_stq;
58 struct ppc_emulated_entry lxvw4x;
59 struct ppc_emulated_entry lxvh8x;
60 struct ppc_emulated_entry lxvd2x;
61 struct ppc_emulated_entry lxvb16x;
58#endif 62#endif
59} ppc_emulated; 63} ppc_emulated;
60 64
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 334459ad145b..90863245df53 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -508,7 +508,7 @@ static unsigned long epapr_hypercall(unsigned long *in,
508 508
509static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2) 509static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
510{ 510{
511 unsigned long in[8]; 511 unsigned long in[8] = {0};
512 unsigned long out[8]; 512 unsigned long out[8];
513 unsigned long r; 513 unsigned long r;
514 514
@@ -520,7 +520,7 @@ static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
520 520
521static inline long epapr_hypercall0(unsigned int nr) 521static inline long epapr_hypercall0(unsigned int nr)
522{ 522{
523 unsigned long in[8]; 523 unsigned long in[8] = {0};
524 unsigned long out[8]; 524 unsigned long out[8];
525 525
526 return epapr_hypercall(in, out, nr); 526 return epapr_hypercall(in, out, nr);
@@ -528,7 +528,7 @@ static inline long epapr_hypercall0(unsigned int nr)
528 528
529static inline long epapr_hypercall1(unsigned int nr, unsigned long p1) 529static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
530{ 530{
531 unsigned long in[8]; 531 unsigned long in[8] = {0};
532 unsigned long out[8]; 532 unsigned long out[8];
533 533
534 in[0] = p1; 534 in[0] = p1;
@@ -538,7 +538,7 @@ static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
538static inline long epapr_hypercall2(unsigned int nr, unsigned long p1, 538static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
539 unsigned long p2) 539 unsigned long p2)
540{ 540{
541 unsigned long in[8]; 541 unsigned long in[8] = {0};
542 unsigned long out[8]; 542 unsigned long out[8];
543 543
544 in[0] = p1; 544 in[0] = p1;
@@ -549,7 +549,7 @@ static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
549static inline long epapr_hypercall3(unsigned int nr, unsigned long p1, 549static inline long epapr_hypercall3(unsigned int nr, unsigned long p1,
550 unsigned long p2, unsigned long p3) 550 unsigned long p2, unsigned long p3)
551{ 551{
552 unsigned long in[8]; 552 unsigned long in[8] = {0};
553 unsigned long out[8]; 553 unsigned long out[8];
554 554
555 in[0] = p1; 555 in[0] = p1;
@@ -562,7 +562,7 @@ static inline long epapr_hypercall4(unsigned int nr, unsigned long p1,
562 unsigned long p2, unsigned long p3, 562 unsigned long p2, unsigned long p3,
563 unsigned long p4) 563 unsigned long p4)
564{ 564{
565 unsigned long in[8]; 565 unsigned long in[8] = {0};
566 unsigned long out[8]; 566 unsigned long out[8];
567 567
568 in[0] = p1; 568 in[0] = p1;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a318973af05..b27205297e1d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -55,6 +55,11 @@
55#endif 55#endif
56 56
57/* 57/*
58 * maximum recursive depth of MCE exceptions
59 */
60#define MAX_MCE_DEPTH 4
61
62/*
58 * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR 63 * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
59 * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole 64 * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
60 * in the save area so it's not necessary to overlap them. Could be used 65 * in the save area so it's not necessary to overlap them. Could be used
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 93f98239159f..14c9d44f355b 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -41,12 +41,6 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
41 return radix__flush_hugetlb_page(vma, vmaddr); 41 return radix__flush_hugetlb_page(vma, vmaddr);
42} 42}
43 43
44static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma,
45 unsigned long vmaddr)
46{
47 if (radix_enabled())
48 return radix__local_flush_hugetlb_page(vma, vmaddr);
49}
50#else 44#else
51 45
52static inline pte_t *hugepd_page(hugepd_t hpd) 46static inline pte_t *hugepd_page(hugepd_t hpd)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index abd04c36c251..3818fa0164f0 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -32,6 +32,7 @@
32 32
33#ifndef __ASSEMBLY__ 33#ifndef __ASSEMBLY__
34 34
35extern void replay_system_reset(void);
35extern void __replay_interrupt(unsigned int vector); 36extern void __replay_interrupt(unsigned int vector);
36 37
37extern void timer_interrupt(struct pt_regs *); 38extern void timer_interrupt(struct pt_regs *);
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 8814a7249ceb..9f3be5c8a4a3 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,8 +103,8 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
103extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); 103extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
104extern int kprobe_handler(struct pt_regs *regs); 104extern int kprobe_handler(struct pt_regs *regs);
105extern int kprobe_post_handler(struct pt_regs *regs); 105extern int kprobe_post_handler(struct pt_regs *regs);
106extern int is_current_kprobe_addr(unsigned long addr);
107#ifdef CONFIG_KPROBES_ON_FTRACE 106#ifdef CONFIG_KPROBES_ON_FTRACE
107extern int __is_active_jprobe(unsigned long addr);
108extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, 108extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
109 struct kprobe_ctlblk *kcb); 109 struct kprobe_ctlblk *kcb);
110#else 110#else
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 83596f32f50b..7cea76f11c26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -104,10 +104,6 @@ struct kvmppc_host_state {
104 u8 napping; 104 u8 napping;
105 105
106#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 106#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
107 /*
108 * hwthread_req/hwthread_state pair is used to pull sibling threads
109 * out of guest on pre-ISAv3.0B CPUs where threads share MMU.
110 */
111 u8 hwthread_req; 107 u8 hwthread_req;
112 u8 hwthread_state; 108 u8 hwthread_state;
113 u8 host_ipi; 109 u8 host_ipi;
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 190d69a7f701..3a1226e9b465 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -204,12 +204,10 @@ struct mce_error_info {
204 204
205extern void save_mce_event(struct pt_regs *regs, long handled, 205extern void save_mce_event(struct pt_regs *regs, long handled,
206 struct mce_error_info *mce_err, uint64_t nip, 206 struct mce_error_info *mce_err, uint64_t nip,
207 uint64_t addr); 207 uint64_t addr, uint64_t phys_addr);
208extern int get_mce_event(struct machine_check_event *mce, bool release); 208extern int get_mce_event(struct machine_check_event *mce, bool release);
209extern void release_mce_event(void); 209extern void release_mce_event(void);
210extern void machine_check_queue_event(void); 210extern void machine_check_queue_event(void);
211extern void machine_check_print_event_info(struct machine_check_event *evt, 211extern void machine_check_print_event_info(struct machine_check_event *evt,
212 bool user_mode); 212 bool user_mode);
213extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
214
215#endif /* __ASM_PPC64_MCE_H__ */ 213#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 492d8140a395..6177d43f0ce8 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -78,6 +78,52 @@ extern void switch_cop(struct mm_struct *next);
78extern int use_cop(unsigned long acop, struct mm_struct *mm); 78extern int use_cop(unsigned long acop, struct mm_struct *mm);
79extern void drop_cop(unsigned long acop, struct mm_struct *mm); 79extern void drop_cop(unsigned long acop, struct mm_struct *mm);
80 80
81#ifdef CONFIG_PPC_BOOK3S_64
82static inline void inc_mm_active_cpus(struct mm_struct *mm)
83{
84 atomic_inc(&mm->context.active_cpus);
85}
86
87static inline void dec_mm_active_cpus(struct mm_struct *mm)
88{
89 atomic_dec(&mm->context.active_cpus);
90}
91
92static inline void mm_context_add_copro(struct mm_struct *mm)
93{
94 /*
95 * On hash, should only be called once over the lifetime of
96 * the context, as we can't decrement the active cpus count
97 * and flush properly for the time being.
98 */
99 inc_mm_active_cpus(mm);
100}
101
102static inline void mm_context_remove_copro(struct mm_struct *mm)
103{
104 /*
105 * Need to broadcast a global flush of the full mm before
106 * decrementing active_cpus count, as the next TLBI may be
107 * local and the nMMU and/or PSL need to be cleaned up.
108 * Should be rare enough so that it's acceptable.
109 *
110 * Skip on hash, as we don't know how to do the proper flush
111 * for the time being. Invalidations will remain global if
112 * used on hash.
113 */
114 if (radix_enabled()) {
115 flush_all_mm(mm);
116 dec_mm_active_cpus(mm);
117 }
118}
119#else
120static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
121static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
122static inline void mm_context_add_copro(struct mm_struct *mm) { }
123static inline void mm_context_remove_copro(struct mm_struct *mm) { }
124#endif
125
126
81extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 127extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
82 struct task_struct *tsk); 128 struct task_struct *tsk);
83 129
@@ -119,9 +165,13 @@ static inline void arch_dup_mmap(struct mm_struct *oldmm,
119{ 165{
120} 166}
121 167
168#ifndef CONFIG_PPC_BOOK3S_64
122static inline void arch_exit_mmap(struct mm_struct *mm) 169static inline void arch_exit_mmap(struct mm_struct *mm)
123{ 170{
124} 171}
172#else
173extern void arch_exit_mmap(struct mm_struct *mm);
174#endif
125 175
126static inline void arch_unmap(struct mm_struct *mm, 176static inline void arch_unmap(struct mm_struct *mm,
127 struct vm_area_struct *vma, 177 struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 265bbd7cba73..abddf5830ad5 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -204,7 +204,7 @@ static inline unsigned long pte_update(struct mm_struct *mm,
204 if (!huge) 204 if (!huge)
205 assert_pte_locked(mm, addr); 205 assert_pte_locked(mm, addr);
206 206
207#ifdef CONFIG_PPC_STD_MMU_64 207#ifdef CONFIG_PPC_BOOK3S_64
208 if (old & _PAGE_HASHPTE) 208 if (old & _PAGE_HASHPTE)
209 hpte_need_flush(mm, addr, ptep, old, huge); 209 hpte_need_flush(mm, addr, ptep, old, huge);
210#endif 210#endif
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 450a60b81d2a..233c7504b1f2 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -188,6 +188,7 @@
188#define OPAL_XIVE_DUMP 142 188#define OPAL_XIVE_DUMP 142
189#define OPAL_XIVE_RESERVED3 143 189#define OPAL_XIVE_RESERVED3 143
190#define OPAL_XIVE_RESERVED4 144 190#define OPAL_XIVE_RESERVED4 144
191#define OPAL_SIGNAL_SYSTEM_RESET 145
191#define OPAL_NPU_INIT_CONTEXT 146 192#define OPAL_NPU_INIT_CONTEXT 146
192#define OPAL_NPU_DESTROY_CONTEXT 147 193#define OPAL_NPU_DESTROY_CONTEXT 147
193#define OPAL_NPU_MAP_LPAR 148 194#define OPAL_NPU_MAP_LPAR 148
@@ -895,6 +896,8 @@ enum {
895 */ 896 */
896 OPAL_REINIT_CPUS_MMU_HASH = (1 << 2), 897 OPAL_REINIT_CPUS_MMU_HASH = (1 << 2),
897 OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3), 898 OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3),
899
900 OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED = (1 << 4),
898}; 901};
899 902
900typedef struct oppanel_line { 903typedef struct oppanel_line {
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 726c23304a57..0c545f7fc77b 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -281,6 +281,8 @@ int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
281int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); 281int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
282int opal_sensor_group_clear(u32 group_hndl, int token); 282int opal_sensor_group_clear(u32 group_hndl, int token);
283 283
284s64 opal_signal_system_reset(s32 cpu);
285
284/* Internal functions */ 286/* Internal functions */
285extern int early_init_dt_scan_opal(unsigned long node, const char *uname, 287extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
286 int depth, void *data); 288 int depth, void *data);
@@ -304,11 +306,11 @@ extern void opal_notifier_enable(void);
304extern void opal_notifier_disable(void); 306extern void opal_notifier_disable(void);
305extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val); 307extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
306 308
307extern int __opal_async_get_token(void);
308extern int opal_async_get_token_interruptible(void); 309extern int opal_async_get_token_interruptible(void);
309extern int __opal_async_release_token(int token);
310extern int opal_async_release_token(int token); 310extern int opal_async_release_token(int token);
311extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg); 311extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg);
312extern int opal_async_wait_response_interruptible(uint64_t token,
313 struct opal_msg *msg);
312extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); 314extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
313 315
314struct rtc_time; 316struct rtc_time;
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 04b60af027ae..3892db93b837 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -91,14 +91,14 @@ struct paca_struct {
91 u8 cpu_start; /* At startup, processor spins until */ 91 u8 cpu_start; /* At startup, processor spins until */
92 /* this becomes non-zero. */ 92 /* this becomes non-zero. */
93 u8 kexec_state; /* set when kexec down has irqs off */ 93 u8 kexec_state; /* set when kexec down has irqs off */
94#ifdef CONFIG_PPC_STD_MMU_64 94#ifdef CONFIG_PPC_BOOK3S_64
95 struct slb_shadow *slb_shadow_ptr; 95 struct slb_shadow *slb_shadow_ptr;
96 struct dtl_entry *dispatch_log; 96 struct dtl_entry *dispatch_log;
97 struct dtl_entry *dispatch_log_end; 97 struct dtl_entry *dispatch_log_end;
98#endif /* CONFIG_PPC_STD_MMU_64 */ 98#endif
99 u64 dscr_default; /* per-CPU default DSCR */ 99 u64 dscr_default; /* per-CPU default DSCR */
100 100
101#ifdef CONFIG_PPC_STD_MMU_64 101#ifdef CONFIG_PPC_BOOK3S_64
102 /* 102 /*
103 * Now, starting in cacheline 2, the exception save areas 103 * Now, starting in cacheline 2, the exception save areas
104 */ 104 */
@@ -110,7 +110,7 @@ struct paca_struct {
110 u16 vmalloc_sllp; 110 u16 vmalloc_sllp;
111 u16 slb_cache_ptr; 111 u16 slb_cache_ptr;
112 u32 slb_cache[SLB_CACHE_ENTRIES]; 112 u32 slb_cache[SLB_CACHE_ENTRIES];
113#endif /* CONFIG_PPC_STD_MMU_64 */ 113#endif /* CONFIG_PPC_BOOK3S_64 */
114 114
115#ifdef CONFIG_PPC_BOOK3E 115#ifdef CONFIG_PPC_BOOK3E
116 u64 exgen[8] __aligned(0x40); 116 u64 exgen[8] __aligned(0x40);
@@ -143,7 +143,7 @@ struct paca_struct {
143#ifdef CONFIG_PPC_MM_SLICES 143#ifdef CONFIG_PPC_MM_SLICES
144 u64 mm_ctx_low_slices_psize; 144 u64 mm_ctx_low_slices_psize;
145 unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; 145 unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
146 unsigned long addr_limit; 146 unsigned long mm_ctx_slb_addr_limit;
147#else 147#else
148 u16 mm_ctx_user_psize; 148 u16 mm_ctx_user_psize;
149 u16 mm_ctx_sllp; 149 u16 mm_ctx_sllp;
@@ -192,7 +192,7 @@ struct paca_struct {
192 struct stop_sprs stop_sprs; 192 struct stop_sprs stop_sprs;
193#endif 193#endif
194 194
195#ifdef CONFIG_PPC_STD_MMU_64 195#ifdef CONFIG_PPC_BOOK3S_64
196 /* Non-maskable exceptions that are not performance critical */ 196 /* Non-maskable exceptions that are not performance critical */
197 u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */ 197 u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
198 u64 exmc[EX_SIZE]; /* used for machine checks */ 198 u64 exmc[EX_SIZE]; /* used for machine checks */
@@ -210,6 +210,7 @@ struct paca_struct {
210 */ 210 */
211 u16 in_mce; 211 u16 in_mce;
212 u8 hmi_event_available; /* HMI event is available */ 212 u8 hmi_event_available; /* HMI event is available */
213 u8 hmi_p9_special_emu; /* HMI P9 special emulation */
213#endif 214#endif
214 215
215 /* Stuff for accurate time accounting */ 216 /* Stuff for accurate time accounting */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index c4d9654bd637..56234c6fcd61 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -117,21 +117,21 @@ extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
117#endif /* __ASSEMBLY__ */ 117#endif /* __ASSEMBLY__ */
118#else 118#else
119#define slice_init() 119#define slice_init()
120#ifdef CONFIG_PPC_STD_MMU_64 120#ifdef CONFIG_PPC_BOOK3S_64
121#define get_slice_psize(mm, addr) ((mm)->context.user_psize) 121#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
122#define slice_set_user_psize(mm, psize) \ 122#define slice_set_user_psize(mm, psize) \
123do { \ 123do { \
124 (mm)->context.user_psize = (psize); \ 124 (mm)->context.user_psize = (psize); \
125 (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ 125 (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
126} while (0) 126} while (0)
127#else /* CONFIG_PPC_STD_MMU_64 */ 127#else /* !CONFIG_PPC_BOOK3S_64 */
128#ifdef CONFIG_PPC_64K_PAGES 128#ifdef CONFIG_PPC_64K_PAGES
129#define get_slice_psize(mm, addr) MMU_PAGE_64K 129#define get_slice_psize(mm, addr) MMU_PAGE_64K
130#else /* CONFIG_PPC_64K_PAGES */ 130#else /* CONFIG_PPC_64K_PAGES */
131#define get_slice_psize(mm, addr) MMU_PAGE_4K 131#define get_slice_psize(mm, addr) MMU_PAGE_4K
132#endif /* !CONFIG_PPC_64K_PAGES */ 132#endif /* !CONFIG_PPC_64K_PAGES */
133#define slice_set_user_psize(mm, psize) do { BUG(); } while(0) 133#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
134#endif /* !CONFIG_PPC_STD_MMU_64 */ 134#endif /* CONFIG_PPC_BOOK3S_64 */
135 135
136#define slice_set_range_psize(mm, start, len, psize) \ 136#define slice_set_range_psize(mm, start, len, psize) \
137 slice_set_user_psize((mm), (psize)) 137 slice_set_user_psize((mm), (psize))
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 0b8aa1fe2d5f..62ed83db04ae 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -218,6 +218,7 @@ struct pci_dn {
218#endif 218#endif
219 struct list_head child_list; 219 struct list_head child_list;
220 struct list_head list; 220 struct list_head list;
221 struct resource holes[PCI_SRIOV_NUM_BARS];
221}; 222};
222 223
223/* Get the pointer to a device_node's pci_dn */ 224/* Get the pointer to a device_node's pci_dn */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index beb6e3e79788..a89c67b62680 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -77,7 +77,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
77 * With hash config 64k pages additionally define a bigger "real PTE" type that 77 * With hash config 64k pages additionally define a bigger "real PTE" type that
78 * gathers the "second half" part of the PTE for pseudo 64k pages 78 * gathers the "second half" part of the PTE for pseudo 64k pages
79 */ 79 */
80#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) 80#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
81typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; 81typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
82#else 82#else
83typedef struct { pte_t pte; } real_pte_t; 83typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index cfe89a6fc308..eccb30b38b47 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -50,13 +50,13 @@ typedef struct { unsigned long pgprot; } pgprot_t;
50 * With hash config 64k pages additionally define a bigger "real PTE" type that 50 * With hash config 64k pages additionally define a bigger "real PTE" type that
51 * gathers the "second half" part of the PTE for pseudo 64k pages 51 * gathers the "second half" part of the PTE for pseudo 64k pages
52 */ 52 */
53#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) 53#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
54typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; 54typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
55#else 55#else
56typedef struct { pte_t pte; } real_pte_t; 56typedef struct { pte_t pte; } real_pte_t;
57#endif 57#endif
58 58
59#ifdef CONFIG_PPC_STD_MMU_64 59#ifdef CONFIG_PPC_BOOK3S_64
60#include <asm/cmpxchg.h> 60#include <asm/cmpxchg.h>
61 61
62static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) 62static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index f62797702300..dc5f6a5d4575 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -22,6 +22,8 @@ extern void pnv_npu2_destroy_context(struct npu_context *context,
22extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, 22extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
23 unsigned long *flags, unsigned long *status, 23 unsigned long *flags, unsigned long *status,
24 int count); 24 int count);
25
26void pnv_tm_init(void);
25#else 27#else
26static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } 28static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
27static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, 29static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
@@ -36,6 +38,8 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
36 unsigned long *status, int count) { 38 unsigned long *status, int count) {
37 return -ENODEV; 39 return -ENODEV;
38} 40}
41
42static inline void pnv_tm_init(void) { }
39#endif 43#endif
40 44
41#endif /* _ASM_POWERNV_H */ 45#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 36f3e41c9fbe..ae94b3626b6c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -774,9 +774,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
774#ifdef CONFIG_PPC_BOOK3E 774#ifdef CONFIG_PPC_BOOK3E
775#define FIXUP_ENDIAN 775#define FIXUP_ENDIAN
776#else 776#else
777/*
778 * This version may be used in in HV or non-HV context.
779 * MSR[EE] must be disabled.
780 */
777#define FIXUP_ENDIAN \ 781#define FIXUP_ENDIAN \
778 tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \ 782 tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
779 b $+44; /* Skip trampoline if endian is good */ \ 783 b 191f; /* Skip trampoline if endian is good */ \
780 .long 0xa600607d; /* mfmsr r11 */ \ 784 .long 0xa600607d; /* mfmsr r11 */ \
781 .long 0x01006b69; /* xori r11,r11,1 */ \ 785 .long 0x01006b69; /* xori r11,r11,1 */ \
782 .long 0x00004039; /* li r10,0 */ \ 786 .long 0x00004039; /* li r10,0 */ \
@@ -786,7 +790,26 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
786 .long 0x14004a39; /* addi r10,r10,20 */ \ 790 .long 0x14004a39; /* addi r10,r10,20 */ \
787 .long 0xa6035a7d; /* mtsrr0 r10 */ \ 791 .long 0xa6035a7d; /* mtsrr0 r10 */ \
788 .long 0xa6037b7d; /* mtsrr1 r11 */ \ 792 .long 0xa6037b7d; /* mtsrr1 r11 */ \
789 .long 0x2400004c /* rfid */ 793 .long 0x2400004c; /* rfid */ \
794191:
795
796/*
797 * This version that may only be used with MSR[HV]=1
798 * - Does not clear MSR[RI], so more robust.
799 * - Slightly smaller and faster.
800 */
801#define FIXUP_ENDIAN_HV \
802 tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
803 b 191f; /* Skip trampoline if endian is good */ \
804 .long 0xa600607d; /* mfmsr r11 */ \
805 .long 0x01006b69; /* xori r11,r11,1 */ \
806 .long 0x05009f42; /* bcl 20,31,$+4 */ \
807 .long 0xa602487d; /* mflr r10 */ \
808 .long 0x14004a39; /* addi r10,r10,20 */ \
809 .long 0xa64b5a7d; /* mthsrr0 r10 */ \
810 .long 0xa64b7b7d; /* mthsrr1 r11 */ \
811 .long 0x2402004c; /* hrfid */ \
812191:
790 813
791#endif /* !CONFIG_PPC_BOOK3E */ 814#endif /* !CONFIG_PPC_BOOK3E */
792 815
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index fab7ff877304..bdab3b74eb98 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -329,6 +329,7 @@ struct thread_struct {
329 */ 329 */
330 int dscr_inherit; 330 int dscr_inherit;
331 unsigned long ppr; /* used to save/restore SMT priority */ 331 unsigned long ppr; /* used to save/restore SMT priority */
332 unsigned long tidr;
332#endif 333#endif
333#ifdef CONFIG_PPC_BOOK3S_64 334#ifdef CONFIG_PPC_BOOK3S_64
334 unsigned long tar; 335 unsigned long tar;
@@ -340,7 +341,9 @@ struct thread_struct {
340 unsigned long sier; 341 unsigned long sier;
341 unsigned long mmcr2; 342 unsigned long mmcr2;
342 unsigned mmcr0; 343 unsigned mmcr0;
344
343 unsigned used_ebb; 345 unsigned used_ebb;
346 unsigned int used_vas;
344#endif 347#endif
345}; 348};
346 349
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index d98ac188cedb..9b8cedf618f4 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -12,6 +12,7 @@
12#define __HAVE_ARCH_MEMCMP 12#define __HAVE_ARCH_MEMCMP
13#define __HAVE_ARCH_MEMCHR 13#define __HAVE_ARCH_MEMCHR
14#define __HAVE_ARCH_MEMSET16 14#define __HAVE_ARCH_MEMSET16
15#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
15 16
16extern char * strcpy(char *,const char *); 17extern char * strcpy(char *,const char *);
17extern char * strncpy(char *,const char *, __kernel_size_t); 18extern char * strncpy(char *,const char *, __kernel_size_t);
@@ -24,6 +25,7 @@ extern void * memcpy(void *,const void *,__kernel_size_t);
24extern void * memmove(void *,const void *,__kernel_size_t); 25extern void * memmove(void *,const void *,__kernel_size_t);
25extern int memcmp(const void *,const void *,__kernel_size_t); 26extern int memcmp(const void *,const void *,__kernel_size_t);
26extern void * memchr(const void *,int,__kernel_size_t); 27extern void * memchr(const void *,int,__kernel_size_t);
28extern void * memcpy_flushcache(void *,const void *,__kernel_size_t);
27 29
28#ifdef CONFIG_PPC64 30#ifdef CONFIG_PPC64
29#define __HAVE_ARCH_MEMSET32 31#define __HAVE_ARCH_MEMSET32
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index bf820f53e27e..c3ca42cdc9f5 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -92,4 +92,9 @@ static inline void clear_task_ebb(struct task_struct *t)
92#endif 92#endif
93} 93}
94 94
95extern int set_thread_uses_vas(void);
96
97extern int set_thread_tidr(struct task_struct *t);
98extern void clear_thread_tidr(struct task_struct *t);
99
95#endif /* _ASM_POWERPC_SWITCH_TO_H */ 100#endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 13dbcd41885e..7d5a157c7832 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -77,7 +77,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
77 flush_tlb_mm(mm); 77 flush_tlb_mm(mm);
78} 78}
79 79
80#elif defined(CONFIG_PPC_STD_MMU_64) 80#elif defined(CONFIG_PPC_BOOK3S_64)
81#include <asm/book3s/64/tlbflush.h> 81#include <asm/book3s/64/tlbflush.h>
82#else 82#else
83#error Unsupported MMU type 83#error Unsupported MMU type
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index a8bc72a7f4be..b1658c97047c 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -12,12 +12,13 @@
12 12
13extern void tm_enable(void); 13extern void tm_enable(void);
14extern void tm_reclaim(struct thread_struct *thread, 14extern void tm_reclaim(struct thread_struct *thread,
15 unsigned long orig_msr, uint8_t cause); 15 uint8_t cause);
16extern void tm_reclaim_current(uint8_t cause); 16extern void tm_reclaim_current(uint8_t cause);
17extern void tm_recheckpoint(struct thread_struct *thread, 17extern void tm_recheckpoint(struct thread_struct *thread);
18 unsigned long orig_msr);
19extern void tm_abort(uint8_t cause); 18extern void tm_abort(uint8_t cause);
20extern void tm_save_sprs(struct thread_struct *thread); 19extern void tm_save_sprs(struct thread_struct *thread);
21extern void tm_restore_sprs(struct thread_struct *thread); 20extern void tm_restore_sprs(struct thread_struct *thread);
22 21
22extern bool tm_suspend_disabled;
23
23#endif /* __ASSEMBLY__ */ 24#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 023ff9f17501..88187c285c70 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -97,6 +97,14 @@ static inline int prrn_is_enabled(void)
97} 97}
98#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ 98#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
99 99
100#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
101#if defined(CONFIG_PPC_SPLPAR)
102extern int timed_topology_update(int nsecs);
103#else
104#define timed_topology_update(nsecs)
105#endif /* CONFIG_PPC_SPLPAR */
106#endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */
107
100#include <asm-generic/topology.h> 108#include <asm-generic/topology.h>
101 109
102#ifdef CONFIG_SMP 110#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 11f4bd07cce0..51bfeb8777f0 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -174,6 +174,23 @@ do { \
174 174
175extern long __get_user_bad(void); 175extern long __get_user_bad(void);
176 176
177/*
178 * This does an atomic 128 byte aligned load from userspace.
179 * Upto caller to do enable_kernel_vmx() before calling!
180 */
181#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
182 __asm__ __volatile__( \
183 "1: lvx 0,0,%1 # get user\n" \
184 " stvx 0,0,%2 # put kernel\n" \
185 "2:\n" \
186 ".section .fixup,\"ax\"\n" \
187 "3: li %0,%3\n" \
188 " b 2b\n" \
189 ".previous\n" \
190 EX_TABLE(1b, 3b) \
191 : "=r" (err) \
192 : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
193
177#define __get_user_asm(x, addr, err, op) \ 194#define __get_user_asm(x, addr, err, op) \
178 __asm__ __volatile__( \ 195 __asm__ __volatile__( \
179 "1: "op" %1,0(%2) # get_user\n" \ 196 "1: "op" %1,0(%2) # get_user\n" \
@@ -340,4 +357,9 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
340extern long strncpy_from_user(char *dst, const char __user *src, long count); 357extern long strncpy_from_user(char *dst, const char __user *src, long count);
341extern __must_check long strnlen_user(const char __user *str, long n); 358extern __must_check long strnlen_user(const char __user *str, long n);
342 359
360extern long __copy_from_user_flushcache(void *dst, const void __user *src,
361 unsigned size);
362extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
363 size_t len);
364
343#endif /* _ARCH_POWERPC_UACCESS_H */ 365#endif /* _ARCH_POWERPC_UACCESS_H */
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index fd5963acd658..771456227496 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -10,6 +10,8 @@
10#ifndef _ASM_POWERPC_VAS_H 10#ifndef _ASM_POWERPC_VAS_H
11#define _ASM_POWERPC_VAS_H 11#define _ASM_POWERPC_VAS_H
12 12
13struct vas_window;
14
13/* 15/*
14 * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25 16 * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
15 * (Local FIFO Size Register) of the VAS workbook. 17 * (Local FIFO Size Register) of the VAS workbook.
@@ -104,6 +106,15 @@ struct vas_tx_win_attr {
104}; 106};
105 107
106/* 108/*
109 * Helper to map a chip id to VAS id.
110 * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N
111 * mapping in which case, we will need to update this helper.
112 *
113 * Return the VAS id or -1 if no matching vasid is found.
114 */
115int chip_to_vas_id(int chipid);
116
117/*
107 * Helper to initialize receive window attributes to defaults for an 118 * Helper to initialize receive window attributes to defaults for an
108 * NX window. 119 * NX window.
109 */ 120 */
@@ -156,4 +167,14 @@ int vas_copy_crb(void *crb, int offset);
156 */ 167 */
157int vas_paste_crb(struct vas_window *win, int offset, bool re); 168int vas_paste_crb(struct vas_window *win, int offset, bool re);
158 169
170/*
171 * Return a system-wide unique id for the VAS window @win.
172 */
173extern u32 vas_win_id(struct vas_window *win);
174
175/*
176 * Return the power bus paste address associated with @win so the caller
177 * can map that address into their address space.
178 */
179extern u64 vas_win_paste_addr(struct vas_window *win);
159#endif /* __ASM_POWERPC_VAS_H */ 180#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 50bcb4295de4..540592034740 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -49,6 +49,7 @@
49#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */ 49#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */
50#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */ 50#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */
51#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ 51#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
52#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* TM w/out suspended state */
52 53
53/* 54/*
54 * IMPORTANT! 55 * IMPORTANT!
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 6c6cce937dd8..1b6bc7fba996 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -129,7 +129,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
129obj-$(CONFIG_PPC64) += $(obj64-y) 129obj-$(CONFIG_PPC64) += $(obj64-y)
130obj-$(CONFIG_PPC32) += $(obj32-y) 130obj-$(CONFIG_PPC32) += $(obj32-y)
131 131
132ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),) 132ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),)
133obj-y += ppc_save_regs.o 133obj-y += ppc_save_regs.o
134endif 134endif
135 135
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8cfb20e38cfe..9aace433491a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,7 +185,7 @@ int main(void)
185#ifdef CONFIG_PPC_MM_SLICES 185#ifdef CONFIG_PPC_MM_SLICES
186 OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); 186 OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
187 OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); 187 OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
188 DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit)); 188 OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
189 DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); 189 DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
190#endif /* CONFIG_PPC_MM_SLICES */ 190#endif /* CONFIG_PPC_MM_SLICES */
191#endif 191#endif
@@ -208,7 +208,7 @@ int main(void)
208 OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first); 208 OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
209#endif /* CONFIG_PPC_BOOK3E */ 209#endif /* CONFIG_PPC_BOOK3E */
210 210
211#ifdef CONFIG_PPC_STD_MMU_64 211#ifdef CONFIG_PPC_BOOK3S_64
212 OFFSET(PACASLBCACHE, paca_struct, slb_cache); 212 OFFSET(PACASLBCACHE, paca_struct, slb_cache);
213 OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); 213 OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
214 OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); 214 OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
@@ -230,7 +230,7 @@ int main(void)
230 OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); 230 OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
231 OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); 231 OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
232 OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); 232 OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
233#endif /* CONFIG_PPC_STD_MMU_64 */ 233#endif /* CONFIG_PPC_BOOK3S_64 */
234 OFFSET(PACAEMERGSP, paca_struct, emergency_sp); 234 OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
235#ifdef CONFIG_PPC_BOOK3S_64 235#ifdef CONFIG_PPC_BOOK3S_64
236 OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp); 236 OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 760872916013..1350f49d81a8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -547,11 +547,31 @@ static struct cpu_spec __initdata cpu_specs[] = {
547 .machine_check_early = __machine_check_early_realmode_p9, 547 .machine_check_early = __machine_check_early_realmode_p9,
548 .platform = "power9", 548 .platform = "power9",
549 }, 549 },
550 { /* Power9 */ 550 { /* Power9 DD2.0 */
551 .pvr_mask = 0xffffefff,
552 .pvr_value = 0x004e0200,
553 .cpu_name = "POWER9 (raw)",
554 .cpu_features = CPU_FTRS_POWER9_DD2_0,
555 .cpu_user_features = COMMON_USER_POWER9,
556 .cpu_user_features2 = COMMON_USER2_POWER9,
557 .mmu_features = MMU_FTRS_POWER9,
558 .icache_bsize = 128,
559 .dcache_bsize = 128,
560 .num_pmcs = 6,
561 .pmc_type = PPC_PMC_IBM,
562 .oprofile_cpu_type = "ppc64/power9",
563 .oprofile_type = PPC_OPROFILE_INVALID,
564 .cpu_setup = __setup_cpu_power9,
565 .cpu_restore = __restore_cpu_power9,
566 .flush_tlb = __flush_tlb_power9,
567 .machine_check_early = __machine_check_early_realmode_p9,
568 .platform = "power9",
569 },
570 { /* Power9 DD 2.1 or later (see DD2.0 above) */
551 .pvr_mask = 0xffff0000, 571 .pvr_mask = 0xffff0000,
552 .pvr_value = 0x004e0000, 572 .pvr_value = 0x004e0000,
553 .cpu_name = "POWER9 (raw)", 573 .cpu_name = "POWER9 (raw)",
554 .cpu_features = CPU_FTRS_POWER9, 574 .cpu_features = CPU_FTRS_POWER9_DD2_1,
555 .cpu_user_features = COMMON_USER_POWER9, 575 .cpu_user_features = COMMON_USER_POWER9,
556 .cpu_user_features2 = COMMON_USER2_POWER9, 576 .cpu_user_features2 = COMMON_USER2_POWER9,
557 .mmu_features = MMU_FTRS_POWER9, 577 .mmu_features = MMU_FTRS_POWER9,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 7275fed271af..602e0fde19b4 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -634,7 +634,7 @@ static struct dt_cpu_feature_match __initdata
634 {"no-execute", feat_enable, 0}, 634 {"no-execute", feat_enable, 0},
635 {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, 635 {"strong-access-ordering", feat_enable, CPU_FTR_SAO},
636 {"cache-inhibited-large-page", feat_enable_large_ci, 0}, 636 {"cache-inhibited-large-page", feat_enable_large_ci, 0},
637 {"coprocessor-icswx", feat_enable, CPU_FTR_ICSWX}, 637 {"coprocessor-icswx", feat_enable, 0},
638 {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0}, 638 {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
639 {"program-priority-register", feat_enable, CPU_FTR_HAS_PPR}, 639 {"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
640 {"wait", feat_enable, 0}, 640 {"wait", feat_enable, 0},
@@ -735,6 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
735 */ 735 */
736 if ((version & 0xffffff00) == 0x004e0100) 736 if ((version & 0xffffff00) == 0x004e0100)
737 cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; 737 cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
738 else if ((version & 0xffffefff) == 0x004e0200)
739 cur_cpu_spec->cpu_features &= ~CPU_FTR_POWER9_DD2_1;
738} 740}
739 741
740static void __init cpufeatures_setup_finished(void) 742static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 116000b45531..cbca0a667682 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -972,6 +972,18 @@ static struct notifier_block eeh_reboot_nb = {
972 .notifier_call = eeh_reboot_notifier, 972 .notifier_call = eeh_reboot_notifier,
973}; 973};
974 974
975void eeh_probe_devices(void)
976{
977 struct pci_controller *hose, *tmp;
978 struct pci_dn *pdn;
979
980 /* Enable EEH for all adapters */
981 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
982 pdn = hose->pci_data;
983 traverse_pci_dn(pdn, eeh_ops->probe, NULL);
984 }
985}
986
975/** 987/**
976 * eeh_init - EEH initialization 988 * eeh_init - EEH initialization
977 * 989 *
@@ -987,22 +999,11 @@ static struct notifier_block eeh_reboot_nb = {
987 * Even if force-off is set, the EEH hardware is still enabled, so that 999 * Even if force-off is set, the EEH hardware is still enabled, so that
988 * newer systems can boot. 1000 * newer systems can boot.
989 */ 1001 */
990int eeh_init(void) 1002static int eeh_init(void)
991{ 1003{
992 struct pci_controller *hose, *tmp; 1004 struct pci_controller *hose, *tmp;
993 struct pci_dn *pdn;
994 static int cnt = 0;
995 int ret = 0; 1005 int ret = 0;
996 1006
997 /*
998 * We have to delay the initialization on PowerNV after
999 * the PCI hierarchy tree has been built because the PEs
1000 * are figured out based on PCI devices instead of device
1001 * tree nodes
1002 */
1003 if (machine_is(powernv) && cnt++ <= 0)
1004 return ret;
1005
1006 /* Register reboot notifier */ 1007 /* Register reboot notifier */
1007 ret = register_reboot_notifier(&eeh_reboot_nb); 1008 ret = register_reboot_notifier(&eeh_reboot_nb);
1008 if (ret) { 1009 if (ret) {
@@ -1028,22 +1029,7 @@ int eeh_init(void)
1028 if (ret) 1029 if (ret)
1029 return ret; 1030 return ret;
1030 1031
1031 /* Enable EEH for all adapters */ 1032 eeh_probe_devices();
1032 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1033 pdn = hose->pci_data;
1034 traverse_pci_dn(pdn, eeh_ops->probe, NULL);
1035 }
1036
1037 /*
1038 * Call platform post-initialization. Actually, It's good chance
1039 * to inform platform that EEH is ready to supply service if the
1040 * I/O cache stuff has been built up.
1041 */
1042 if (eeh_ops->post_init) {
1043 ret = eeh_ops->post_init();
1044 if (ret)
1045 return ret;
1046 }
1047 1033
1048 if (eeh_enabled()) 1034 if (eeh_enabled())
1049 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1035 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
@@ -1757,10 +1743,6 @@ static int eeh_enable_dbgfs_set(void *data, u64 val)
1757 else 1743 else
1758 eeh_add_flag(EEH_FORCE_DISABLED); 1744 eeh_add_flag(EEH_FORCE_DISABLED);
1759 1745
1760 /* Notify the backend */
1761 if (eeh_ops->post_init)
1762 eeh_ops->post_init();
1763
1764 return 0; 1746 return 0;
1765} 1747}
1766 1748
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 4e1b433f6cb5..4f71e4c9beb7 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -623,7 +623,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
623 struct eeh_rmv_data *rmv_data) 623 struct eeh_rmv_data *rmv_data)
624{ 624{
625 struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); 625 struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
626 struct timeval tstamp; 626 time64_t tstamp;
627 int cnt, rc; 627 int cnt, rc;
628 struct eeh_dev *edev; 628 struct eeh_dev *edev;
629 629
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 2e8d1b2b5af4..2d4956e97aa9 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -526,16 +526,16 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
526 */ 526 */
527void eeh_pe_update_time_stamp(struct eeh_pe *pe) 527void eeh_pe_update_time_stamp(struct eeh_pe *pe)
528{ 528{
529 struct timeval tstamp; 529 time64_t tstamp;
530 530
531 if (!pe) return; 531 if (!pe) return;
532 532
533 if (pe->freeze_count <= 0) { 533 if (pe->freeze_count <= 0) {
534 pe->freeze_count = 0; 534 pe->freeze_count = 0;
535 do_gettimeofday(&pe->tstamp); 535 pe->tstamp = ktime_get_seconds();
536 } else { 536 } else {
537 do_gettimeofday(&tstamp); 537 tstamp = ktime_get_seconds();
538 if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) { 538 if (tstamp - pe->tstamp > 3600) {
539 pe->tstamp = tstamp; 539 pe->tstamp = tstamp;
540 pe->freeze_count = 0; 540 pe->freeze_count = 0;
541 } 541 }
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4a0fd4f40245..3320bcac7192 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -539,7 +539,7 @@ _GLOBAL(_switch)
539 std r6,PACACURRENT(r13) /* Set new 'current' */ 539 std r6,PACACURRENT(r13) /* Set new 'current' */
540 540
541 ld r8,KSP(r4) /* new stack pointer */ 541 ld r8,KSP(r4) /* new stack pointer */
542#ifdef CONFIG_PPC_STD_MMU_64 542#ifdef CONFIG_PPC_BOOK3S_64
543BEGIN_MMU_FTR_SECTION 543BEGIN_MMU_FTR_SECTION
544 b 2f 544 b 2f
545END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) 545END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -588,7 +588,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
588 slbmte r7,r0 588 slbmte r7,r0
589 isync 589 isync
5902: 5902:
591#endif /* CONFIG_PPC_STD_MMU_64 */ 591#endif /* CONFIG_PPC_BOOK3S_64 */
592 592
593 CURRENT_THREAD_INFO(r7, r8) /* base of new stack */ 593 CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
594 /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE 594 /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c80bd292e48..e441b469dc8f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -114,6 +114,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
114 cmpwi cr3,r10,2 ; \ 114 cmpwi cr3,r10,2 ; \
115 BRANCH_TO_C000(r10, system_reset_idle_common) ; \ 115 BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1161: \ 1161: \
117 KVMTEST_PR(n) ; \
117 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) 118 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
118#else 119#else
119#define IDLETEST NOTEST 120#define IDLETEST NOTEST
@@ -130,6 +131,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
130 131
131EXC_REAL_END(system_reset, 0x100, 0x100) 132EXC_REAL_END(system_reset, 0x100, 0x100)
132EXC_VIRT_NONE(0x4100, 0x100) 133EXC_VIRT_NONE(0x4100, 0x100)
134TRAMP_KVM(PACA_EXNMI, 0x100)
133 135
134#ifdef CONFIG_PPC_P7_NAP 136#ifdef CONFIG_PPC_P7_NAP
135EXC_COMMON_BEGIN(system_reset_idle_common) 137EXC_COMMON_BEGIN(system_reset_idle_common)
@@ -233,7 +235,7 @@ BEGIN_FTR_SECTION
233 addi r10,r10,1 /* increment paca->in_mce */ 235 addi r10,r10,1 /* increment paca->in_mce */
234 sth r10,PACA_IN_MCE(r13) 236 sth r10,PACA_IN_MCE(r13)
235 /* Limit nested MCE to level 4 to avoid stack overflow */ 237 /* Limit nested MCE to level 4 to avoid stack overflow */
236 cmpwi r10,4 238 cmpwi r10,MAX_MCE_DEPTH
237 bgt 2f /* Check if we hit limit of 4 */ 239 bgt 2f /* Check if we hit limit of 4 */
238 std r11,GPR1(r1) /* Save r1 on the stack. */ 240 std r11,GPR1(r1) /* Save r1 on the stack. */
239 std r11,0(r1) /* make stack chain pointer */ 241 std r11,0(r1) /* make stack chain pointer */
@@ -542,7 +544,7 @@ EXC_COMMON_BEGIN(instruction_access_common)
542 RECONCILE_IRQ_STATE(r10, r11) 544 RECONCILE_IRQ_STATE(r10, r11)
543 ld r12,_MSR(r1) 545 ld r12,_MSR(r1)
544 ld r3,_NIP(r1) 546 ld r3,_NIP(r1)
545 andis. r4,r12,DSISR_BAD_FAULT_64S@h 547 andis. r4,r12,DSISR_SRR1_MATCH_64S@h
546 li r5,0x400 548 li r5,0x400
547 std r3,_DAR(r1) 549 std r3,_DAR(r1)
548 std r4,_DSISR(r1) 550 std r4,_DSISR(r1)
@@ -606,7 +608,7 @@ EXC_COMMON_BEGIN(slb_miss_common)
606 cmpdi cr5,r11,MSR_RI 608 cmpdi cr5,r11,MSR_RI
607 609
608 crset 4*cr0+eq 610 crset 4*cr0+eq
609#ifdef CONFIG_PPC_STD_MMU_64 611#ifdef CONFIG_PPC_BOOK3S_64
610BEGIN_MMU_FTR_SECTION 612BEGIN_MMU_FTR_SECTION
611 bl slb_allocate 613 bl slb_allocate
612END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) 614END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
@@ -888,12 +890,6 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
888#define LOAD_SYSCALL_HANDLER(reg) \ 890#define LOAD_SYSCALL_HANDLER(reg) \
889 __LOAD_HANDLER(reg, system_call_common) 891 __LOAD_HANDLER(reg, system_call_common)
890 892
891#define SYSCALL_FASTENDIAN_TEST \
892BEGIN_FTR_SECTION \
893 cmpdi r0,0x1ebe ; \
894 beq- 1f ; \
895END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
896
897/* 893/*
898 * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9, 894 * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
899 * and HMT_MEDIUM. 895 * and HMT_MEDIUM.
@@ -908,6 +904,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
908 rfid ; \ 904 rfid ; \
909 b . ; /* prevent speculative execution */ 905 b . ; /* prevent speculative execution */
910 906
907#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
908#define SYSCALL_FASTENDIAN_TEST \
909BEGIN_FTR_SECTION \
910 cmpdi r0,0x1ebe ; \
911 beq- 1f ; \
912END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
913
911#define SYSCALL_FASTENDIAN \ 914#define SYSCALL_FASTENDIAN \
912 /* Fast LE/BE switch system call */ \ 915 /* Fast LE/BE switch system call */ \
9131: mfspr r12,SPRN_SRR1 ; \ 9161: mfspr r12,SPRN_SRR1 ; \
@@ -916,6 +919,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
916 mr r13,r9 ; \ 919 mr r13,r9 ; \
917 rfid ; /* return to userspace */ \ 920 rfid ; /* return to userspace */ \
918 b . ; /* prevent speculative execution */ 921 b . ; /* prevent speculative execution */
922#else
923#define SYSCALL_FASTENDIAN_TEST
924#define SYSCALL_FASTENDIAN
925#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */
919 926
920#if defined(CONFIG_RELOCATABLE) 927#if defined(CONFIG_RELOCATABLE)
921 /* 928 /*
@@ -1033,6 +1040,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
1033 EXCEPTION_PROLOG_COMMON_3(0xe60) 1040 EXCEPTION_PROLOG_COMMON_3(0xe60)
1034 addi r3,r1,STACK_FRAME_OVERHEAD 1041 addi r3,r1,STACK_FRAME_OVERHEAD
1035 BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */ 1042 BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
1043 cmpdi cr0,r3,0
1044
1036 /* Windup the stack. */ 1045 /* Windup the stack. */
1037 /* Move original HSRR0 and HSRR1 into the respective regs */ 1046 /* Move original HSRR0 and HSRR1 into the respective regs */
1038 ld r9,_MSR(r1) 1047 ld r9,_MSR(r1)
@@ -1049,10 +1058,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
1049 REST_8GPRS(2, r1) 1058 REST_8GPRS(2, r1)
1050 REST_GPR(10, r1) 1059 REST_GPR(10, r1)
1051 ld r11,_CCR(r1) 1060 ld r11,_CCR(r1)
1061 REST_2GPRS(12, r1)
1062 bne 1f
1052 mtcr r11 1063 mtcr r11
1053 REST_GPR(11, r1) 1064 REST_GPR(11, r1)
1054 REST_2GPRS(12, r1) 1065 ld r1,GPR1(r1)
1055 /* restore original r1. */ 1066 hrfid
1067
10681: mtcr r11
1069 REST_GPR(11, r1)
1056 ld r1,GPR1(r1) 1070 ld r1,GPR1(r1)
1057 1071
1058 /* 1072 /*
@@ -1065,8 +1079,9 @@ hmi_exception_after_realmode:
1065 EXCEPTION_PROLOG_0(PACA_EXGEN) 1079 EXCEPTION_PROLOG_0(PACA_EXGEN)
1066 b tramp_real_hmi_exception 1080 b tramp_real_hmi_exception
1067 1081
1068EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) 1082EXC_COMMON_BEGIN(hmi_exception_common)
1069 1083EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
1084 ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
1070 1085
1071EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) 1086EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
1072EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) 1087EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
@@ -1505,8 +1520,8 @@ USE_TEXT_SECTION()
1505 */ 1520 */
1506 .balign IFETCH_ALIGN_BYTES 1521 .balign IFETCH_ALIGN_BYTES
1507do_hash_page: 1522do_hash_page:
1508 #ifdef CONFIG_PPC_STD_MMU_64 1523#ifdef CONFIG_PPC_BOOK3S_64
1509 lis r0,DSISR_BAD_FAULT_64S@h 1524 lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
1510 ori r0,r0,DSISR_BAD_FAULT_64S@l 1525 ori r0,r0,DSISR_BAD_FAULT_64S@l
1511 and. r0,r4,r0 /* weird error? */ 1526 and. r0,r4,r0 /* weird error? */
1512 bne- handle_page_fault /* if not, try to insert a HPTE */ 1527 bne- handle_page_fault /* if not, try to insert a HPTE */
@@ -1536,7 +1551,7 @@ do_hash_page:
1536 1551
1537 /* Reload DSISR into r4 for the DABR check below */ 1552 /* Reload DSISR into r4 for the DABR check below */
1538 ld r4,_DSISR(r1) 1553 ld r4,_DSISR(r1)
1539#endif /* CONFIG_PPC_STD_MMU_64 */ 1554#endif /* CONFIG_PPC_BOOK3S_64 */
1540 1555
1541/* Here we have a page fault that hash_page can't handle. */ 1556/* Here we have a page fault that hash_page can't handle. */
1542handle_page_fault: 1557handle_page_fault:
@@ -1565,7 +1580,7 @@ handle_dabr_fault:
156512: b ret_from_except_lite 158012: b ret_from_except_lite
1566 1581
1567 1582
1568#ifdef CONFIG_PPC_STD_MMU_64 1583#ifdef CONFIG_PPC_BOOK3S_64
1569/* We have a page fault that hash_page could handle but HV refused 1584/* We have a page fault that hash_page could handle but HV refused
1570 * the PTE insertion 1585 * the PTE insertion
1571 */ 1586 */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index e1431800bfb9..04ea5c04fd24 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1270,10 +1270,15 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
1270 struct kobj_attribute *attr, 1270 struct kobj_attribute *attr,
1271 const char *buf, size_t count) 1271 const char *buf, size_t count)
1272{ 1272{
1273 int input = -1;
1274
1273 if (!fw_dump.dump_active) 1275 if (!fw_dump.dump_active)
1274 return -EPERM; 1276 return -EPERM;
1275 1277
1276 if (buf[0] == '1') { 1278 if (kstrtoint(buf, 0, &input))
1279 return -EINVAL;
1280
1281 if (input == 1) {
1277 /* 1282 /*
1278 * Take away the '/proc/vmcore'. We are releasing the dump 1283 * Take away the '/proc/vmcore'. We are releasing the dump
1279 * memory, hence it will not be valid anymore. 1284 * memory, hence it will not be valid anymore.
@@ -1307,21 +1312,25 @@ static ssize_t fadump_register_store(struct kobject *kobj,
1307 const char *buf, size_t count) 1312 const char *buf, size_t count)
1308{ 1313{
1309 int ret = 0; 1314 int ret = 0;
1315 int input = -1;
1310 1316
1311 if (!fw_dump.fadump_enabled || fdm_active) 1317 if (!fw_dump.fadump_enabled || fdm_active)
1312 return -EPERM; 1318 return -EPERM;
1313 1319
1320 if (kstrtoint(buf, 0, &input))
1321 return -EINVAL;
1322
1314 mutex_lock(&fadump_mutex); 1323 mutex_lock(&fadump_mutex);
1315 1324
1316 switch (buf[0]) { 1325 switch (input) {
1317 case '0': 1326 case 0:
1318 if (fw_dump.dump_registered == 0) { 1327 if (fw_dump.dump_registered == 0) {
1319 goto unlock_out; 1328 goto unlock_out;
1320 } 1329 }
1321 /* Un-register Firmware-assisted dump */ 1330 /* Un-register Firmware-assisted dump */
1322 fadump_unregister_dump(&fdm); 1331 fadump_unregister_dump(&fdm);
1323 break; 1332 break;
1324 case '1': 1333 case 1:
1325 if (fw_dump.dump_registered == 1) { 1334 if (fw_dump.dump_registered == 1) {
1326 ret = -EEXIST; 1335 ret = -EEXIST;
1327 goto unlock_out; 1336 goto unlock_out;
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 8c54166491e7..29b2fed93289 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -388,7 +388,7 @@ DataAccess:
388 EXCEPTION_PROLOG 388 EXCEPTION_PROLOG
389 mfspr r10,SPRN_DSISR 389 mfspr r10,SPRN_DSISR
390 stw r10,_DSISR(r11) 390 stw r10,_DSISR(r11)
391 andis. r0,r10,DSISR_BAD_FAULT_32S@h 391 andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
392 bne 1f /* if not, try to put a PTE */ 392 bne 1f /* if not, try to put a PTE */
393 mfspr r4,SPRN_DAR /* into the hash table */ 393 mfspr r4,SPRN_DAR /* into the hash table */
394 rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ 394 rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ff8511d6d8ea..aa71a90f5222 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -55,12 +55,18 @@
55 * 55 *
56 * For pSeries or server processors: 56 * For pSeries or server processors:
57 * 1. The MMU is off & open firmware is running in real mode. 57 * 1. The MMU is off & open firmware is running in real mode.
58 * 2. The kernel is entered at __start 58 * 2. The primary CPU enters at __start.
59 * 3. If the RTAS supports "query-cpu-stopped-state", then secondary
60 * CPUs will enter as directed by "start-cpu" RTAS call, which is
61 * generic_secondary_smp_init, with PIR in r3.
62 * 4. Else the secondary CPUs will enter at secondary_hold (0x60) as
63 * directed by the "start-cpu" RTS call, with PIR in r3.
59 * -or- For OPAL entry: 64 * -or- For OPAL entry:
60 * 1. The MMU is off, processor in HV mode, primary CPU enters at 0 65 * 1. The MMU is off, processor in HV mode.
61 * with device-tree in gpr3. We also get OPAL base in r8 and 66 * 2. The primary CPU enters at 0 with device-tree in r3, OPAL base
62 * entry in r9 for debugging purposes 67 * in r8, and entry in r9 for debugging purposes.
63 * 2. Secondary processors enter at 0x60 with PIR in gpr3 68 * 3. Secondary CPUs enter as directed by OPAL_START_CPU call, which
69 * is at generic_secondary_smp_init, with PIR in r3.
64 * 70 *
65 * For Book3E processors: 71 * For Book3E processors:
66 * 1. The MMU is on running in AS0 in a state defined in ePAPR 72 * 1. The MMU is on running in AS0 in a state defined in ePAPR
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1125c9be9e06..01e1c1997893 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -112,12 +112,14 @@ power9_save_additional_sprs:
112 std r4, STOP_HFSCR(r13) 112 std r4, STOP_HFSCR(r13)
113 113
114 mfspr r3, SPRN_MMCRA 114 mfspr r3, SPRN_MMCRA
115 mfspr r4, SPRN_MMCR1 115 mfspr r4, SPRN_MMCR0
116 std r3, STOP_MMCRA(r13) 116 std r3, STOP_MMCRA(r13)
117 std r4, STOP_MMCR1(r13) 117 std r4, _MMCR0(r1)
118 118
119 mfspr r3, SPRN_MMCR2 119 mfspr r3, SPRN_MMCR1
120 std r3, STOP_MMCR2(r13) 120 mfspr r4, SPRN_MMCR2
121 std r3, STOP_MMCR1(r13)
122 std r4, STOP_MMCR2(r13)
121 blr 123 blr
122 124
123power9_restore_additional_sprs: 125power9_restore_additional_sprs:
@@ -135,11 +137,14 @@ power9_restore_additional_sprs:
135 ld r4, STOP_MMCRA(r13) 137 ld r4, STOP_MMCRA(r13)
136 mtspr SPRN_HFSCR, r3 138 mtspr SPRN_HFSCR, r3
137 mtspr SPRN_MMCRA, r4 139 mtspr SPRN_MMCRA, r4
138 /* We have already restored PACA_MMCR0 */ 140
139 ld r3, STOP_MMCR1(r13) 141 ld r3, _MMCR0(r1)
140 ld r4, STOP_MMCR2(r13) 142 ld r4, STOP_MMCR1(r13)
141 mtspr SPRN_MMCR1, r3 143 mtspr SPRN_MMCR0, r3
142 mtspr SPRN_MMCR2, r4 144 mtspr SPRN_MMCR1, r4
145
146 ld r3, STOP_MMCR2(r13)
147 mtspr SPRN_MMCR2, r3
143 blr 148 blr
144 149
145/* 150/*
@@ -319,20 +324,13 @@ enter_winkle:
319/* 324/*
320 * r3 - PSSCR value corresponding to the requested stop state. 325 * r3 - PSSCR value corresponding to the requested stop state.
321 */ 326 */
327power_enter_stop:
322#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 328#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
323power_enter_stop_kvm_rm: 329 /* Tell KVM we're entering idle */
324 /*
325 * This is currently unused because POWER9 KVM does not have to
326 * gather secondary threads into sibling mode, but the code is
327 * here in case that function is required.
328 *
329 * Tell KVM we're entering idle.
330 */
331 li r4,KVM_HWTHREAD_IN_IDLE 330 li r4,KVM_HWTHREAD_IN_IDLE
332 /* DO THIS IN REAL MODE! See comment above. */ 331 /* DO THIS IN REAL MODE! See comment above. */
333 stb r4,HSTATE_HWTHREAD_STATE(r13) 332 stb r4,HSTATE_HWTHREAD_STATE(r13)
334#endif 333#endif
335power_enter_stop:
336/* 334/*
337 * Check if we are executing the lite variant with ESL=EC=0 335 * Check if we are executing the lite variant with ESL=EC=0
338 */ 336 */
@@ -357,13 +355,15 @@ power_enter_stop:
357 b pnv_wakeup_noloss 355 b pnv_wakeup_noloss
358 356
359.Lhandle_esl_ec_set: 357.Lhandle_esl_ec_set:
358BEGIN_FTR_SECTION
360 /* 359 /*
361 * POWER9 DD2 can incorrectly set PMAO when waking up after a 360 * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
362 * state-loss idle. Saving and restoring MMCR0 over idle is a 361 * a state-loss idle. Saving and restoring MMCR0 over idle is a
363 * workaround. 362 * workaround.
364 */ 363 */
365 mfspr r4,SPRN_MMCR0 364 mfspr r4,SPRN_MMCR0
366 std r4,_MMCR0(r1) 365 std r4,_MMCR0(r1)
366END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
367 367
368/* 368/*
369 * Check if the requested state is a deep idle state. 369 * Check if the requested state is a deep idle state.
@@ -496,18 +496,6 @@ pnv_powersave_wakeup_mce:
496 496
497 b pnv_powersave_wakeup 497 b pnv_powersave_wakeup
498 498
499#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
500kvm_start_guest_check:
501 li r0,KVM_HWTHREAD_IN_KERNEL
502 stb r0,HSTATE_HWTHREAD_STATE(r13)
503 /* Order setting hwthread_state vs. testing hwthread_req */
504 sync
505 lbz r0,HSTATE_HWTHREAD_REQ(r13)
506 cmpwi r0,0
507 beqlr
508 b kvm_start_guest
509#endif
510
511/* 499/*
512 * Called from reset vector for powersave wakeups. 500 * Called from reset vector for powersave wakeups.
513 * cr3 - set to gt if waking up with partial/complete hypervisor state loss 501 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
@@ -532,9 +520,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
532 mr r3,r12 520 mr r3,r12
533 521
534#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 522#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
535BEGIN_FTR_SECTION 523 li r0,KVM_HWTHREAD_IN_KERNEL
536 bl kvm_start_guest_check 524 stb r0,HSTATE_HWTHREAD_STATE(r13)
537END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 525 /* Order setting hwthread_state vs. testing hwthread_req */
526 sync
527 lbz r0,HSTATE_HWTHREAD_REQ(r13)
528 cmpwi r0,0
529 beq 1f
530 b kvm_start_guest
5311:
538#endif 532#endif
539 533
540 /* Return SRR1 from power7_nap() */ 534 /* Return SRR1 from power7_nap() */
@@ -555,15 +549,17 @@ pnv_restore_hyp_resource_arch300:
555 * then clear bit 60 in MMCRA to ensure the PMU starts running. 549 * then clear bit 60 in MMCRA to ensure the PMU starts running.
556 */ 550 */
557 blt cr3,1f 551 blt cr3,1f
552BEGIN_FTR_SECTION
558 PPC_INVALIDATE_ERAT 553 PPC_INVALIDATE_ERAT
559 ld r1,PACAR1(r13) 554 ld r1,PACAR1(r13)
555 ld r4,_MMCR0(r1)
556 mtspr SPRN_MMCR0,r4
557END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
560 mfspr r4,SPRN_MMCRA 558 mfspr r4,SPRN_MMCRA
561 ori r4,r4,(1 << (63-60)) 559 ori r4,r4,(1 << (63-60))
562 mtspr SPRN_MMCRA,r4 560 mtspr SPRN_MMCRA,r4
563 xori r4,r4,(1 << (63-60)) 561 xori r4,r4,(1 << (63-60))
564 mtspr SPRN_MMCRA,r4 562 mtspr SPRN_MMCRA,r4
565 ld r4,_MMCR0(r1)
566 mtspr SPRN_MMCR0,r4
5671: 5631:
568 /* 564 /*
569 * POWER ISA 3. Use PSSCR to determine if we 565 * POWER ISA 3. Use PSSCR to determine if we
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4e65bf82f5e0..b7a84522e652 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -143,6 +143,13 @@ notrace unsigned int __check_irq_replay(void)
143 */ 143 */
144 unsigned char happened = local_paca->irq_happened; 144 unsigned char happened = local_paca->irq_happened;
145 145
146 /*
147 * We are responding to the next interrupt, so interrupt-off
148 * latencies should be reset here.
149 */
150 trace_hardirqs_on();
151 trace_hardirqs_off();
152
146 if (happened & PACA_IRQ_HARD_DIS) { 153 if (happened & PACA_IRQ_HARD_DIS) {
147 /* Clear bit 0 which we wouldn't clear otherwise */ 154 /* Clear bit 0 which we wouldn't clear otherwise */
148 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; 155 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
@@ -270,6 +277,7 @@ notrace void arch_local_irq_restore(unsigned long en)
270#endif /* CONFIG_TRACE_IRQFLAGS */ 277#endif /* CONFIG_TRACE_IRQFLAGS */
271 278
272 set_soft_enabled(0); 279 set_soft_enabled(0);
280 trace_hardirqs_off();
273 281
274 /* 282 /*
275 * Check if anything needs to be re-emitted. We haven't 283 * Check if anything needs to be re-emitted. We haven't
@@ -279,6 +287,7 @@ notrace void arch_local_irq_restore(unsigned long en)
279 replay = __check_irq_replay(); 287 replay = __check_irq_replay();
280 288
281 /* We can soft-enable now */ 289 /* We can soft-enable now */
290 trace_hardirqs_on();
282 set_soft_enabled(1); 291 set_soft_enabled(1);
283 292
284 /* 293 /*
@@ -394,11 +403,19 @@ bool prep_irq_for_idle_irqsoff(void)
394/* 403/*
395 * Take the SRR1 wakeup reason, index into this table to find the 404 * Take the SRR1 wakeup reason, index into this table to find the
396 * appropriate irq_happened bit. 405 * appropriate irq_happened bit.
406 *
407 * Sytem reset exceptions taken in idle state also come through here,
408 * but they are NMI interrupts so do not need to wait for IRQs to be
409 * restored, and should be taken as early as practical. These are marked
410 * with 0xff in the table. The Power ISA specifies 0100b as the system
411 * reset interrupt reason.
397 */ 412 */
413#define IRQ_SYSTEM_RESET 0xff
414
398static const u8 srr1_to_lazyirq[0x10] = { 415static const u8 srr1_to_lazyirq[0x10] = {
399 0, 0, 0, 416 0, 0, 0,
400 PACA_IRQ_DBELL, 417 PACA_IRQ_DBELL,
401 0, 418 IRQ_SYSTEM_RESET,
402 PACA_IRQ_DBELL, 419 PACA_IRQ_DBELL,
403 PACA_IRQ_DEC, 420 PACA_IRQ_DEC,
404 0, 421 0,
@@ -407,15 +424,43 @@ static const u8 srr1_to_lazyirq[0x10] = {
407 PACA_IRQ_HMI, 424 PACA_IRQ_HMI,
408 0, 0, 0, 0, 0 }; 425 0, 0, 0, 0, 0 };
409 426
427void replay_system_reset(void)
428{
429 struct pt_regs regs;
430
431 ppc_save_regs(&regs);
432 regs.trap = 0x100;
433 get_paca()->in_nmi = 1;
434 system_reset_exception(&regs);
435 get_paca()->in_nmi = 0;
436}
437EXPORT_SYMBOL_GPL(replay_system_reset);
438
410void irq_set_pending_from_srr1(unsigned long srr1) 439void irq_set_pending_from_srr1(unsigned long srr1)
411{ 440{
412 unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; 441 unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
442 u8 reason = srr1_to_lazyirq[idx];
443
444 /*
445 * Take the system reset now, which is immediately after registers
446 * are restored from idle. It's an NMI, so interrupts need not be
447 * re-enabled before it is taken.
448 */
449 if (unlikely(reason == IRQ_SYSTEM_RESET)) {
450 replay_system_reset();
451 return;
452 }
413 453
414 /* 454 /*
415 * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, 455 * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
416 * so this can be called unconditionally with srr1 wake reason. 456 * so this can be called unconditionally with the SRR1 wake
457 * reason as returned by the idle code, which uses 0 to mean no
458 * interrupt.
459 *
460 * If a future CPU was to designate this as an interrupt reason,
461 * then a new index for no interrupt must be assigned.
417 */ 462 */
418 local_paca->irq_happened |= srr1_to_lazyirq[idx]; 463 local_paca->irq_happened |= reason;
419} 464}
420#endif /* CONFIG_PPC_BOOK3S */ 465#endif /* CONFIG_PPC_BOOK3S */
421 466
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 6c089d9757c9..7a1f99f1b47f 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -25,6 +25,21 @@
25#include <linux/preempt.h> 25#include <linux/preempt.h>
26#include <linux/ftrace.h> 26#include <linux/ftrace.h>
27 27
28/*
29 * This is called from ftrace code after invoking registered handlers to
30 * disambiguate regs->nip changes done by jprobes and livepatch. We check if
31 * there is an active jprobe at the provided address (mcount location).
32 */
33int __is_active_jprobe(unsigned long addr)
34{
35 if (!preemptible()) {
36 struct kprobe *p = raw_cpu_read(current_kprobe);
37 return (p && (unsigned long)p->addr == addr) ? 1 : 0;
38 }
39
40 return 0;
41}
42
28static nokprobe_inline 43static nokprobe_inline
29int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, 44int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
30 struct kprobe_ctlblk *kcb, unsigned long orig_nip) 45 struct kprobe_ctlblk *kcb, unsigned long orig_nip)
@@ -60,11 +75,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
60{ 75{
61 struct kprobe *p; 76 struct kprobe *p;
62 struct kprobe_ctlblk *kcb; 77 struct kprobe_ctlblk *kcb;
63 unsigned long flags;
64 78
65 /* Disable irq for emulating a breakpoint and avoiding preempt */ 79 preempt_disable();
66 local_irq_save(flags);
67 hard_irq_disable();
68 80
69 p = get_kprobe((kprobe_opcode_t *)nip); 81 p = get_kprobe((kprobe_opcode_t *)nip);
70 if (unlikely(!p) || kprobe_disabled(p)) 82 if (unlikely(!p) || kprobe_disabled(p))
@@ -86,13 +98,17 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
86 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 98 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
87 if (!p->pre_handler || !p->pre_handler(p, regs)) 99 if (!p->pre_handler || !p->pre_handler(p, regs))
88 __skip_singlestep(p, regs, kcb, orig_nip); 100 __skip_singlestep(p, regs, kcb, orig_nip);
89 /* 101 else {
90 * If pre_handler returns !0, it sets regs->nip and 102 /*
91 * resets current kprobe. 103 * If pre_handler returns !0, it sets regs->nip and
92 */ 104 * resets current kprobe. In this case, we should not
105 * re-enable preemption.
106 */
107 return;
108 }
93 } 109 }
94end: 110end:
95 local_irq_restore(flags); 111 preempt_enable_no_resched();
96} 112}
97NOKPROBE_SYMBOL(kprobe_ftrace_handler); 113NOKPROBE_SYMBOL(kprobe_ftrace_handler);
98 114
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bebc3007a793..ca5d5a081e75 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,12 +43,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
43 43
44struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; 44struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
45 45
46int is_current_kprobe_addr(unsigned long addr)
47{
48 struct kprobe *p = kprobe_running();
49 return (p && (unsigned long)p->addr == addr) ? 1 : 0;
50}
51
52bool arch_within_kprobe_blacklist(unsigned long addr) 46bool arch_within_kprobe_blacklist(unsigned long addr)
53{ 47{
54 return (addr >= (unsigned long)__kprobes_text_start && 48 return (addr >= (unsigned long)__kprobes_text_start &&
@@ -59,7 +53,7 @@ bool arch_within_kprobe_blacklist(unsigned long addr)
59 53
60kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) 54kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
61{ 55{
62 kprobe_opcode_t *addr; 56 kprobe_opcode_t *addr = NULL;
63 57
64#ifdef PPC64_ELF_ABI_v2 58#ifdef PPC64_ELF_ABI_v2
65 /* PPC64 ABIv2 needs local entry point */ 59 /* PPC64 ABIv2 needs local entry point */
@@ -91,36 +85,29 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
91 * Also handle <module:symbol> format. 85 * Also handle <module:symbol> format.
92 */ 86 */
93 char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; 87 char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN];
94 const char *modsym;
95 bool dot_appended = false; 88 bool dot_appended = false;
96 if ((modsym = strchr(name, ':')) != NULL) { 89 const char *c;
97 modsym++; 90 ssize_t ret = 0;
98 if (*modsym != '\0' && *modsym != '.') { 91 int len = 0;
99 /* Convert to <module:.symbol> */ 92
100 strncpy(dot_name, name, modsym - name); 93 if ((c = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) {
101 dot_name[modsym - name] = '.'; 94 c++;
102 dot_name[modsym - name + 1] = '\0'; 95 len = c - name;
103 strncat(dot_name, modsym, 96 memcpy(dot_name, name, len);
104 sizeof(dot_name) - (modsym - name) - 2); 97 } else
105 dot_appended = true; 98 c = name;
106 } else { 99
107 dot_name[0] = '\0'; 100 if (*c != '\0' && *c != '.') {
108 strncat(dot_name, name, sizeof(dot_name) - 1); 101 dot_name[len++] = '.';
109 }
110 } else if (name[0] != '.') {
111 dot_name[0] = '.';
112 dot_name[1] = '\0';
113 strncat(dot_name, name, KSYM_NAME_LEN - 2);
114 dot_appended = true; 102 dot_appended = true;
115 } else {
116 dot_name[0] = '\0';
117 strncat(dot_name, name, KSYM_NAME_LEN - 1);
118 } 103 }
119 addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); 104 ret = strscpy(dot_name + len, c, KSYM_NAME_LEN);
120 if (!addr && dot_appended) { 105 if (ret > 0)
121 /* Let's try the original non-dot symbol lookup */ 106 addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
107
108 /* Fallback to the original non-dot symbol lookup */
109 if (!addr && dot_appended)
122 addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); 110 addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
123 }
124#else 111#else
125 addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); 112 addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
126#endif 113#endif
@@ -239,7 +226,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
239} 226}
240NOKPROBE_SYMBOL(arch_prepare_kretprobe); 227NOKPROBE_SYMBOL(arch_prepare_kretprobe);
241 228
242int try_to_emulate(struct kprobe *p, struct pt_regs *regs) 229static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
243{ 230{
244 int ret; 231 int ret;
245 unsigned int insn = *p->ainsn.insn; 232 unsigned int insn = *p->ainsn.insn;
@@ -261,9 +248,20 @@ int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
261 */ 248 */
262 printk("Can't step on instruction %x\n", insn); 249 printk("Can't step on instruction %x\n", insn);
263 BUG(); 250 BUG();
264 } else if (ret == 0) 251 } else {
265 /* This instruction can't be boosted */ 252 /*
266 p->ainsn.boostable = -1; 253 * If we haven't previously emulated this instruction, then it
254 * can't be boosted. Note it down so we don't try to do so again.
255 *
256 * If, however, we had emulated this instruction in the past,
257 * then this is just an error with the current run (for
258 * instance, exceptions due to a load/store). We return 0 so
259 * that this is now single-stepped, but continue to try
260 * emulating it in subsequent probe hits.
261 */
262 if (unlikely(p->ainsn.boostable != 1))
263 p->ainsn.boostable = -1;
264 }
267 265
268 return ret; 266 return ret;
269} 267}
@@ -639,24 +637,22 @@ NOKPROBE_SYMBOL(setjmp_pre_handler);
639 637
640void __used jprobe_return(void) 638void __used jprobe_return(void)
641{ 639{
642 asm volatile("trap" ::: "memory"); 640 asm volatile("jprobe_return_trap:\n"
641 "trap\n"
642 ::: "memory");
643} 643}
644NOKPROBE_SYMBOL(jprobe_return); 644NOKPROBE_SYMBOL(jprobe_return);
645 645
646static void __used jprobe_return_end(void)
647{
648}
649NOKPROBE_SYMBOL(jprobe_return_end);
650
651int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 646int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
652{ 647{
653 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 648 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
654 649
655 /* 650 if (regs->nip != ppc_kallsyms_lookup_name("jprobe_return_trap")) {
656 * FIXME - we should ideally be validating that we got here 'cos 651 pr_debug("longjmp_break_handler NIP (0x%lx) does not match jprobe_return_trap (0x%lx)\n",
657 * of the "trap" in jprobe_return() above, before restoring the 652 regs->nip, ppc_kallsyms_lookup_name("jprobe_return_trap"));
658 * saved regs... 653 return 0;
659 */ 654 }
655
660 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); 656 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
661 /* It's OK to start function graph tracing again */ 657 /* It's OK to start function graph tracing again */
662 unpause_graph_tracing(); 658 unpause_graph_tracing();
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 5c12e21d0d1a..49d34d7271e7 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -360,7 +360,7 @@ void default_machine_kexec(struct kimage *image)
360 /* NOTREACHED */ 360 /* NOTREACHED */
361} 361}
362 362
363#ifdef CONFIG_PPC_STD_MMU_64 363#ifdef CONFIG_PPC_BOOK3S_64
364/* Values we need to export to the second kernel via the device tree. */ 364/* Values we need to export to the second kernel via the device tree. */
365static unsigned long htab_base; 365static unsigned long htab_base;
366static unsigned long htab_size; 366static unsigned long htab_size;
@@ -402,4 +402,4 @@ static int __init export_htab_values(void)
402 return 0; 402 return 0;
403} 403}
404late_initcall(export_htab_values); 404late_initcall(export_htab_values);
405#endif /* CONFIG_PPC_STD_MMU_64 */ 405#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9b2ea7e71c06..742e4658c5dc 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -39,11 +39,21 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
39static DEFINE_PER_CPU(int, mce_queue_count); 39static DEFINE_PER_CPU(int, mce_queue_count);
40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41 41
42/* Queue for delayed MCE UE events. */
43static DEFINE_PER_CPU(int, mce_ue_count);
44static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 mce_ue_event_queue);
46
42static void machine_check_process_queued_event(struct irq_work *work); 47static void machine_check_process_queued_event(struct irq_work *work);
48void machine_check_ue_event(struct machine_check_event *evt);
49static void machine_process_ue_event(struct work_struct *work);
50
43static struct irq_work mce_event_process_work = { 51static struct irq_work mce_event_process_work = {
44 .func = machine_check_process_queued_event, 52 .func = machine_check_process_queued_event,
45}; 53};
46 54
55DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56
47static void mce_set_error_info(struct machine_check_event *mce, 57static void mce_set_error_info(struct machine_check_event *mce,
48 struct mce_error_info *mce_err) 58 struct mce_error_info *mce_err)
49{ 59{
@@ -82,7 +92,7 @@ static void mce_set_error_info(struct machine_check_event *mce,
82 */ 92 */
83void save_mce_event(struct pt_regs *regs, long handled, 93void save_mce_event(struct pt_regs *regs, long handled,
84 struct mce_error_info *mce_err, 94 struct mce_error_info *mce_err,
85 uint64_t nip, uint64_t addr) 95 uint64_t nip, uint64_t addr, uint64_t phys_addr)
86{ 96{
87 int index = __this_cpu_inc_return(mce_nest_count) - 1; 97 int index = __this_cpu_inc_return(mce_nest_count) - 1;
88 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); 98 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
@@ -140,6 +150,11 @@ void save_mce_event(struct pt_regs *regs, long handled,
140 } else if (mce->error_type == MCE_ERROR_TYPE_UE) { 150 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
141 mce->u.ue_error.effective_address_provided = true; 151 mce->u.ue_error.effective_address_provided = true;
142 mce->u.ue_error.effective_address = addr; 152 mce->u.ue_error.effective_address = addr;
153 if (phys_addr != ULONG_MAX) {
154 mce->u.ue_error.physical_address_provided = true;
155 mce->u.ue_error.physical_address = phys_addr;
156 machine_check_ue_event(mce);
157 }
143 } 158 }
144 return; 159 return;
145} 160}
@@ -193,6 +208,26 @@ void release_mce_event(void)
193 get_mce_event(NULL, true); 208 get_mce_event(NULL, true);
194} 209}
195 210
211
212/*
213 * Queue up the MCE event which then can be handled later.
214 */
215void machine_check_ue_event(struct machine_check_event *evt)
216{
217 int index;
218
219 index = __this_cpu_inc_return(mce_ue_count) - 1;
220 /* If queue is full, just return for now. */
221 if (index >= MAX_MC_EVT) {
222 __this_cpu_dec(mce_ue_count);
223 return;
224 }
225 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226
227 /* Queue work to process this event later. */
228 schedule_work(&mce_ue_event_work);
229}
230
196/* 231/*
197 * Queue up the MCE event which then can be handled later. 232 * Queue up the MCE event which then can be handled later.
198 */ 233 */
@@ -215,7 +250,39 @@ void machine_check_queue_event(void)
215 /* Queue irq work to process this event later. */ 250 /* Queue irq work to process this event later. */
216 irq_work_queue(&mce_event_process_work); 251 irq_work_queue(&mce_event_process_work);
217} 252}
218 253/*
254 * process pending MCE event from the mce event queue. This function will be
255 * called during syscall exit.
256 */
257static void machine_process_ue_event(struct work_struct *work)
258{
259 int index;
260 struct machine_check_event *evt;
261
262 while (__this_cpu_read(mce_ue_count) > 0) {
263 index = __this_cpu_read(mce_ue_count) - 1;
264 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265#ifdef CONFIG_MEMORY_FAILURE
266 /*
267 * This should probably queued elsewhere, but
268 * oh! well
269 */
270 if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 if (evt->u.ue_error.physical_address_provided) {
272 unsigned long pfn;
273
274 pfn = evt->u.ue_error.physical_address >>
275 PAGE_SHIFT;
276 memory_failure(pfn, SIGBUS, 0);
277 } else
278 pr_warn("Failed to identify bad address from "
279 "where the uncorrectable error (UE) "
280 "was generated\n");
281 }
282#endif
283 __this_cpu_dec(mce_ue_count);
284 }
285}
219/* 286/*
220 * process pending MCE event from the mce event queue. This function will be 287 * process pending MCE event from the mce event queue. This function will be
221 * called during syscall exit. 288 * called during syscall exit.
@@ -223,6 +290,7 @@ void machine_check_queue_event(void)
223static void machine_check_process_queued_event(struct irq_work *work) 290static void machine_check_process_queued_event(struct irq_work *work)
224{ 291{
225 int index; 292 int index;
293 struct machine_check_event *evt;
226 294
227 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 295 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
228 296
@@ -232,8 +300,8 @@ static void machine_check_process_queued_event(struct irq_work *work)
232 */ 300 */
233 while (__this_cpu_read(mce_queue_count) > 0) { 301 while (__this_cpu_read(mce_queue_count) > 0) {
234 index = __this_cpu_read(mce_queue_count) - 1; 302 index = __this_cpu_read(mce_queue_count) - 1;
235 machine_check_print_event_info( 303 evt = this_cpu_ptr(&mce_event_queue[index]);
236 this_cpu_ptr(&mce_event_queue[index]), false); 304 machine_check_print_event_info(evt, false);
237 __this_cpu_dec(mce_queue_count); 305 __this_cpu_dec(mce_queue_count);
238 } 306 }
239} 307}
@@ -340,7 +408,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
340 printk("%s Effective address: %016llx\n", 408 printk("%s Effective address: %016llx\n",
341 level, evt->u.ue_error.effective_address); 409 level, evt->u.ue_error.effective_address);
342 if (evt->u.ue_error.physical_address_provided) 410 if (evt->u.ue_error.physical_address_provided)
343 printk("%s Physical address: %016llx\n", 411 printk("%s Physical address: %016llx\n",
344 level, evt->u.ue_error.physical_address); 412 level, evt->u.ue_error.physical_address);
345 break; 413 break;
346 case MCE_ERROR_TYPE_SLB: 414 case MCE_ERROR_TYPE_SLB:
@@ -411,45 +479,6 @@ void machine_check_print_event_info(struct machine_check_event *evt,
411} 479}
412EXPORT_SYMBOL_GPL(machine_check_print_event_info); 480EXPORT_SYMBOL_GPL(machine_check_print_event_info);
413 481
414uint64_t get_mce_fault_addr(struct machine_check_event *evt)
415{
416 switch (evt->error_type) {
417 case MCE_ERROR_TYPE_UE:
418 if (evt->u.ue_error.effective_address_provided)
419 return evt->u.ue_error.effective_address;
420 break;
421 case MCE_ERROR_TYPE_SLB:
422 if (evt->u.slb_error.effective_address_provided)
423 return evt->u.slb_error.effective_address;
424 break;
425 case MCE_ERROR_TYPE_ERAT:
426 if (evt->u.erat_error.effective_address_provided)
427 return evt->u.erat_error.effective_address;
428 break;
429 case MCE_ERROR_TYPE_TLB:
430 if (evt->u.tlb_error.effective_address_provided)
431 return evt->u.tlb_error.effective_address;
432 break;
433 case MCE_ERROR_TYPE_USER:
434 if (evt->u.user_error.effective_address_provided)
435 return evt->u.user_error.effective_address;
436 break;
437 case MCE_ERROR_TYPE_RA:
438 if (evt->u.ra_error.effective_address_provided)
439 return evt->u.ra_error.effective_address;
440 break;
441 case MCE_ERROR_TYPE_LINK:
442 if (evt->u.link_error.effective_address_provided)
443 return evt->u.link_error.effective_address;
444 break;
445 default:
446 case MCE_ERROR_TYPE_UNKNOWN:
447 break;
448 }
449 return 0;
450}
451EXPORT_SYMBOL(get_mce_fault_addr);
452
453/* 482/*
454 * This function is called in real mode. Strictly no printk's please. 483 * This function is called in real mode. Strictly no printk's please.
455 * 484 *
@@ -470,6 +499,34 @@ long hmi_exception_realmode(struct pt_regs *regs)
470{ 499{
471 __this_cpu_inc(irq_stat.hmi_exceptions); 500 __this_cpu_inc(irq_stat.hmi_exceptions);
472 501
502#ifdef CONFIG_PPC_BOOK3S_64
503 /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
504 if (pvr_version_is(PVR_POWER9)) {
505 unsigned long hmer = mfspr(SPRN_HMER);
506
507 /* Do we have the debug bit set */
508 if (hmer & PPC_BIT(17)) {
509 hmer &= ~PPC_BIT(17);
510 mtspr(SPRN_HMER, hmer);
511
512 /*
513 * Now to avoid problems with soft-disable we
514 * only do the emulation if we are coming from
515 * user space
516 */
517 if (user_mode(regs))
518 local_paca->hmi_p9_special_emu = 1;
519
520 /*
521 * Don't bother going to OPAL if that's the
522 * only relevant bit.
523 */
524 if (!(hmer & mfspr(SPRN_HMEER)))
525 return local_paca->hmi_p9_special_emu;
526 }
527 }
528#endif /* CONFIG_PPC_BOOK3S_64 */
529
473 wait_for_subcore_guest_exit(); 530 wait_for_subcore_guest_exit();
474 531
475 if (ppc_md.hmi_exception_early) 532 if (ppc_md.hmi_exception_early)
@@ -477,5 +534,5 @@ long hmi_exception_realmode(struct pt_regs *regs)
477 534
478 wait_for_tb_resync(); 535 wait_for_tb_resync();
479 536
480 return 0; 537 return 1;
481} 538}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 72f153c6f3fa..644f7040b91c 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -27,6 +27,36 @@
27#include <asm/mmu.h> 27#include <asm/mmu.h>
28#include <asm/mce.h> 28#include <asm/mce.h>
29#include <asm/machdep.h> 29#include <asm/machdep.h>
30#include <asm/pgtable.h>
31#include <asm/pte-walk.h>
32#include <asm/sstep.h>
33#include <asm/exception-64s.h>
34
35/*
36 * Convert an address related to an mm to a PFN. NOTE: we are in real
37 * mode, we could potentially race with page table updates.
38 */
39static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
40{
41 pte_t *ptep;
42 unsigned long flags;
43 struct mm_struct *mm;
44
45 if (user_mode(regs))
46 mm = current->mm;
47 else
48 mm = &init_mm;
49
50 local_irq_save(flags);
51 if (mm == current->mm)
52 ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
53 else
54 ptep = find_init_mm_pte(addr, NULL);
55 local_irq_restore(flags);
56 if (!ptep || pte_special(*ptep))
57 return ULONG_MAX;
58 return pte_pfn(*ptep);
59}
30 60
31static void flush_tlb_206(unsigned int num_sets, unsigned int action) 61static void flush_tlb_206(unsigned int num_sets, unsigned int action)
32{ 62{
@@ -128,7 +158,7 @@ void __flush_tlb_power9(unsigned int action)
128{ 158{
129 unsigned int num_sets; 159 unsigned int num_sets;
130 160
131 if (radix_enabled()) 161 if (early_radix_enabled())
132 num_sets = POWER9_TLB_SETS_RADIX; 162 num_sets = POWER9_TLB_SETS_RADIX;
133 else 163 else
134 num_sets = POWER9_TLB_SETS_HASH; 164 num_sets = POWER9_TLB_SETS_HASH;
@@ -138,7 +168,7 @@ void __flush_tlb_power9(unsigned int action)
138 168
139 169
140/* flush SLBs and reload */ 170/* flush SLBs and reload */
141#ifdef CONFIG_PPC_STD_MMU_64 171#ifdef CONFIG_PPC_BOOK3S_64
142static void flush_and_reload_slb(void) 172static void flush_and_reload_slb(void)
143{ 173{
144 struct slb_shadow *slb; 174 struct slb_shadow *slb;
@@ -185,7 +215,7 @@ static void flush_erat(void)
185 215
186static int mce_flush(int what) 216static int mce_flush(int what)
187{ 217{
188#ifdef CONFIG_PPC_STD_MMU_64 218#ifdef CONFIG_PPC_BOOK3S_64
189 if (what == MCE_FLUSH_SLB) { 219 if (what == MCE_FLUSH_SLB) {
190 flush_and_reload_slb(); 220 flush_and_reload_slb();
191 return 1; 221 return 1;
@@ -421,9 +451,45 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
421 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 451 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
422{ 0, false, 0, 0, 0, 0 } }; 452{ 0, false, 0, 0, 0, 0 } };
423 453
454static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
455 uint64_t *phys_addr)
456{
457 /*
458 * Carefully look at the NIP to determine
459 * the instruction to analyse. Reading the NIP
460 * in real-mode is tricky and can lead to recursive
461 * faults
462 */
463 int instr;
464 unsigned long pfn, instr_addr;
465 struct instruction_op op;
466 struct pt_regs tmp = *regs;
467
468 pfn = addr_to_pfn(regs, regs->nip);
469 if (pfn != ULONG_MAX) {
470 instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
471 instr = *(unsigned int *)(instr_addr);
472 if (!analyse_instr(&op, &tmp, instr)) {
473 pfn = addr_to_pfn(regs, op.ea);
474 *addr = op.ea;
475 *phys_addr = (pfn << PAGE_SHIFT);
476 return 0;
477 }
478 /*
479 * analyse_instr() might fail if the instruction
480 * is not a load/store, although this is unexpected
481 * for load/store errors or if we got the NIP
482 * wrong
483 */
484 }
485 *addr = 0;
486 return -1;
487}
488
424static int mce_handle_ierror(struct pt_regs *regs, 489static int mce_handle_ierror(struct pt_regs *regs,
425 const struct mce_ierror_table table[], 490 const struct mce_ierror_table table[],
426 struct mce_error_info *mce_err, uint64_t *addr) 491 struct mce_error_info *mce_err, uint64_t *addr,
492 uint64_t *phys_addr)
427{ 493{
428 uint64_t srr1 = regs->msr; 494 uint64_t srr1 = regs->msr;
429 int handled = 0; 495 int handled = 0;
@@ -475,8 +541,22 @@ static int mce_handle_ierror(struct pt_regs *regs,
475 } 541 }
476 mce_err->severity = table[i].severity; 542 mce_err->severity = table[i].severity;
477 mce_err->initiator = table[i].initiator; 543 mce_err->initiator = table[i].initiator;
478 if (table[i].nip_valid) 544 if (table[i].nip_valid) {
479 *addr = regs->nip; 545 *addr = regs->nip;
546 if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
547 table[i].error_type == MCE_ERROR_TYPE_UE) {
548 unsigned long pfn;
549
550 if (get_paca()->in_mce < MAX_MCE_DEPTH) {
551 pfn = addr_to_pfn(regs, regs->nip);
552 if (pfn != ULONG_MAX) {
553 *phys_addr =
554 (pfn << PAGE_SHIFT);
555 handled = 1;
556 }
557 }
558 }
559 }
480 return handled; 560 return handled;
481 } 561 }
482 562
@@ -489,7 +569,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
489 569
490static int mce_handle_derror(struct pt_regs *regs, 570static int mce_handle_derror(struct pt_regs *regs,
491 const struct mce_derror_table table[], 571 const struct mce_derror_table table[],
492 struct mce_error_info *mce_err, uint64_t *addr) 572 struct mce_error_info *mce_err, uint64_t *addr,
573 uint64_t *phys_addr)
493{ 574{
494 uint64_t dsisr = regs->dsisr; 575 uint64_t dsisr = regs->dsisr;
495 int handled = 0; 576 int handled = 0;
@@ -555,7 +636,17 @@ static int mce_handle_derror(struct pt_regs *regs,
555 mce_err->initiator = table[i].initiator; 636 mce_err->initiator = table[i].initiator;
556 if (table[i].dar_valid) 637 if (table[i].dar_valid)
557 *addr = regs->dar; 638 *addr = regs->dar;
558 639 else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
640 table[i].error_type == MCE_ERROR_TYPE_UE) {
641 /*
642 * We do a maximum of 4 nested MCE calls, see
643 * kernel/exception-64s.h
644 */
645 if (get_paca()->in_mce < MAX_MCE_DEPTH)
646 if (!mce_find_instr_ea_and_pfn(regs, addr,
647 phys_addr))
648 handled = 1;
649 }
559 found = 1; 650 found = 1;
560 } 651 }
561 652
@@ -592,19 +683,21 @@ static long mce_handle_error(struct pt_regs *regs,
592 const struct mce_ierror_table itable[]) 683 const struct mce_ierror_table itable[])
593{ 684{
594 struct mce_error_info mce_err = { 0 }; 685 struct mce_error_info mce_err = { 0 };
595 uint64_t addr; 686 uint64_t addr, phys_addr;
596 uint64_t srr1 = regs->msr; 687 uint64_t srr1 = regs->msr;
597 long handled; 688 long handled;
598 689
599 if (SRR1_MC_LOADSTORE(srr1)) 690 if (SRR1_MC_LOADSTORE(srr1))
600 handled = mce_handle_derror(regs, dtable, &mce_err, &addr); 691 handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
692 &phys_addr);
601 else 693 else
602 handled = mce_handle_ierror(regs, itable, &mce_err, &addr); 694 handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
695 &phys_addr);
603 696
604 if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) 697 if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
605 handled = mce_handle_ue_error(regs); 698 handled = mce_handle_ue_error(regs);
606 699
607 save_mce_event(regs, handled, &mce_err, regs->nip, addr); 700 save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
608 701
609 return handled; 702 return handled;
610} 703}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 0b0f89685b67..759104b99f9f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -429,7 +429,8 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
429 /* Find this stub, or if that fails, the next avail. entry */ 429 /* Find this stub, or if that fails, the next avail. entry */
430 stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; 430 stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
431 for (i = 0; stub_func_addr(stubs[i].funcdata); i++) { 431 for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
432 BUG_ON(i >= num_stubs); 432 if (WARN_ON(i >= num_stubs))
433 return 0;
433 434
434 if (stub_func_addr(stubs[i].funcdata) == func_addr(addr)) 435 if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
435 return (unsigned long)&stubs[i]; 436 return (unsigned long)&stubs[i];
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 91e037ab20a1..8237884ca389 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -115,32 +115,23 @@ static unsigned long can_optimize(struct kprobe *p)
115static void optimized_callback(struct optimized_kprobe *op, 115static void optimized_callback(struct optimized_kprobe *op,
116 struct pt_regs *regs) 116 struct pt_regs *regs)
117{ 117{
118 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
119 unsigned long flags;
120
121 /* This is possible if op is under delayed unoptimizing */ 118 /* This is possible if op is under delayed unoptimizing */
122 if (kprobe_disabled(&op->kp)) 119 if (kprobe_disabled(&op->kp))
123 return; 120 return;
124 121
125 local_irq_save(flags); 122 preempt_disable();
126 hard_irq_disable();
127 123
128 if (kprobe_running()) { 124 if (kprobe_running()) {
129 kprobes_inc_nmissed_count(&op->kp); 125 kprobes_inc_nmissed_count(&op->kp);
130 } else { 126 } else {
131 __this_cpu_write(current_kprobe, &op->kp); 127 __this_cpu_write(current_kprobe, &op->kp);
132 regs->nip = (unsigned long)op->kp.addr; 128 regs->nip = (unsigned long)op->kp.addr;
133 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 129 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
134 opt_pre_handler(&op->kp, regs); 130 opt_pre_handler(&op->kp, regs);
135 __this_cpu_write(current_kprobe, NULL); 131 __this_cpu_write(current_kprobe, NULL);
136 } 132 }
137 133
138 /* 134 preempt_enable_no_resched();
139 * No need for an explicit __hard_irq_enable() here.
140 * local_irq_restore() will re-enable interrupts,
141 * if they were hard disabled.
142 */
143 local_irq_restore(flags);
144} 135}
145NOKPROBE_SYMBOL(optimized_callback); 136NOKPROBE_SYMBOL(optimized_callback);
146 137
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 2ff2b8a19f71..d6597038931d 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -90,7 +90,7 @@ static inline void free_lppacas(void) { }
90 90
91#endif /* CONFIG_PPC_BOOK3S */ 91#endif /* CONFIG_PPC_BOOK3S */
92 92
93#ifdef CONFIG_PPC_STD_MMU_64 93#ifdef CONFIG_PPC_BOOK3S_64
94 94
95/* 95/*
96 * 3 persistent SLBs are registered here. The buffer will be zero 96 * 3 persistent SLBs are registered here. The buffer will be zero
@@ -135,11 +135,11 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
135 return s; 135 return s;
136} 136}
137 137
138#else /* CONFIG_PPC_STD_MMU_64 */ 138#else /* !CONFIG_PPC_BOOK3S_64 */
139 139
140static void __init allocate_slb_shadows(int nr_cpus, int limit) { } 140static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
141 141
142#endif /* CONFIG_PPC_STD_MMU_64 */ 142#endif /* CONFIG_PPC_BOOK3S_64 */
143 143
144/* The Paca is an array with one entry per processor. Each contains an 144/* The Paca is an array with one entry per processor. Each contains an
145 * lppaca, which contains the information shared between the 145 * lppaca, which contains the information shared between the
@@ -170,9 +170,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
170 new_paca->kexec_state = KEXEC_STATE_NONE; 170 new_paca->kexec_state = KEXEC_STATE_NONE;
171 new_paca->__current = &init_task; 171 new_paca->__current = &init_task;
172 new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; 172 new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
173#ifdef CONFIG_PPC_STD_MMU_64 173#ifdef CONFIG_PPC_BOOK3S_64
174 new_paca->slb_shadow_ptr = init_slb_shadow(cpu); 174 new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
175#endif /* CONFIG_PPC_STD_MMU_64 */ 175#endif
176 176
177#ifdef CONFIG_PPC_BOOK3E 177#ifdef CONFIG_PPC_BOOK3E
178 /* For now -- if we have threads this will be adjusted later */ 178 /* For now -- if we have threads this will be adjusted later */
@@ -262,8 +262,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
262 262
263 get_paca()->mm_ctx_id = context->id; 263 get_paca()->mm_ctx_id = context->id;
264#ifdef CONFIG_PPC_MM_SLICES 264#ifdef CONFIG_PPC_MM_SLICES
265 VM_BUG_ON(!mm->context.addr_limit); 265 VM_BUG_ON(!mm->context.slb_addr_limit);
266 get_paca()->addr_limit = mm->context.addr_limit; 266 get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
267 get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; 267 get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
268 memcpy(&get_paca()->mm_ctx_high_slices_psize, 268 memcpy(&get_paca()->mm_ctx_high_slices_psize,
269 &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); 269 &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
@@ -271,7 +271,7 @@ void copy_mm_to_paca(struct mm_struct *mm)
271 get_paca()->mm_ctx_user_psize = context->user_psize; 271 get_paca()->mm_ctx_user_psize = context->user_psize;
272 get_paca()->mm_ctx_sllp = context->sllp; 272 get_paca()->mm_ctx_sllp = context->sllp;
273#endif 273#endif
274#else /* CONFIG_PPC_BOOK3S */ 274#else /* !CONFIG_PPC_BOOK3S */
275 return; 275 return;
276#endif 276#endif
277} 277}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 932b9741aa8f..15ce0306b092 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -90,14 +90,14 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
90 * to do an appropriate TLB flush here too 90 * to do an appropriate TLB flush here too
91 */ 91 */
92 if (bus->self) { 92 if (bus->self) {
93#ifdef CONFIG_PPC_STD_MMU_64 93#ifdef CONFIG_PPC_BOOK3S_64
94 struct resource *res = bus->resource[0]; 94 struct resource *res = bus->resource[0];
95#endif 95#endif
96 96
97 pr_debug("IO unmapping for PCI-PCI bridge %s\n", 97 pr_debug("IO unmapping for PCI-PCI bridge %s\n",
98 pci_name(bus->self)); 98 pci_name(bus->self));
99 99
100#ifdef CONFIG_PPC_STD_MMU_64 100#ifdef CONFIG_PPC_BOOK3S_64
101 __flush_hash_table_range(&init_mm, res->start + _IO_BASE, 101 __flush_hash_table_range(&init_mm, res->start + _IO_BASE,
102 res->end + _IO_BASE + 1); 102 res->end + _IO_BASE + 1);
103#endif 103#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a0c74bbf3454..bfdd783e3916 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -77,6 +77,13 @@
77extern unsigned long _get_SP(void); 77extern unsigned long _get_SP(void);
78 78
79#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 79#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
80/*
81 * Are we running in "Suspend disabled" mode? If so we have to block any
82 * sigreturn that would get us into suspended state, and we also warn in some
83 * other paths that we should never reach with suspend disabled.
84 */
85bool tm_suspend_disabled __ro_after_init = false;
86
80static void check_if_tm_restore_required(struct task_struct *tsk) 87static void check_if_tm_restore_required(struct task_struct *tsk)
81{ 88{
82 /* 89 /*
@@ -97,9 +104,23 @@ static inline bool msr_tm_active(unsigned long msr)
97{ 104{
98 return MSR_TM_ACTIVE(msr); 105 return MSR_TM_ACTIVE(msr);
99} 106}
107
108static bool tm_active_with_fp(struct task_struct *tsk)
109{
110 return msr_tm_active(tsk->thread.regs->msr) &&
111 (tsk->thread.ckpt_regs.msr & MSR_FP);
112}
113
114static bool tm_active_with_altivec(struct task_struct *tsk)
115{
116 return msr_tm_active(tsk->thread.regs->msr) &&
117 (tsk->thread.ckpt_regs.msr & MSR_VEC);
118}
100#else 119#else
101static inline bool msr_tm_active(unsigned long msr) { return false; } 120static inline bool msr_tm_active(unsigned long msr) { return false; }
102static inline void check_if_tm_restore_required(struct task_struct *tsk) { } 121static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
122static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
123static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
103#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 124#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
104 125
105bool strict_msr_control; 126bool strict_msr_control;
@@ -232,7 +253,7 @@ EXPORT_SYMBOL(enable_kernel_fp);
232 253
233static int restore_fp(struct task_struct *tsk) 254static int restore_fp(struct task_struct *tsk)
234{ 255{
235 if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) { 256 if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
236 load_fp_state(&current->thread.fp_state); 257 load_fp_state(&current->thread.fp_state);
237 current->thread.load_fp++; 258 current->thread.load_fp++;
238 return 1; 259 return 1;
@@ -314,7 +335,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
314static int restore_altivec(struct task_struct *tsk) 335static int restore_altivec(struct task_struct *tsk)
315{ 336{
316 if (cpu_has_feature(CPU_FTR_ALTIVEC) && 337 if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
317 (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) { 338 (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
318 load_vr_state(&tsk->thread.vr_state); 339 load_vr_state(&tsk->thread.vr_state);
319 tsk->thread.used_vr = 1; 340 tsk->thread.used_vr = 1;
320 tsk->thread.load_vec++; 341 tsk->thread.load_vec++;
@@ -853,6 +874,10 @@ static void tm_reclaim_thread(struct thread_struct *thr,
853 if (!MSR_TM_SUSPENDED(mfmsr())) 874 if (!MSR_TM_SUSPENDED(mfmsr()))
854 return; 875 return;
855 876
877 giveup_all(container_of(thr, struct task_struct, thread));
878
879 tm_reclaim(thr, cause);
880
856 /* 881 /*
857 * If we are in a transaction and FP is off then we can't have 882 * If we are in a transaction and FP is off then we can't have
858 * used FP inside that transaction. Hence the checkpointed 883 * used FP inside that transaction. Hence the checkpointed
@@ -871,10 +896,6 @@ static void tm_reclaim_thread(struct thread_struct *thr,
871 if ((thr->ckpt_regs.msr & MSR_VEC) == 0) 896 if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
872 memcpy(&thr->ckvr_state, &thr->vr_state, 897 memcpy(&thr->ckvr_state, &thr->vr_state,
873 sizeof(struct thread_vr_state)); 898 sizeof(struct thread_vr_state));
874
875 giveup_all(container_of(thr, struct task_struct, thread));
876
877 tm_reclaim(thr, thr->ckpt_regs.msr, cause);
878} 899}
879 900
880void tm_reclaim_current(uint8_t cause) 901void tm_reclaim_current(uint8_t cause)
@@ -903,6 +924,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
903 if (!MSR_TM_ACTIVE(thr->regs->msr)) 924 if (!MSR_TM_ACTIVE(thr->regs->msr))
904 goto out_and_saveregs; 925 goto out_and_saveregs;
905 926
927 WARN_ON(tm_suspend_disabled);
928
906 TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " 929 TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
907 "ccr=%lx, msr=%lx, trap=%lx)\n", 930 "ccr=%lx, msr=%lx, trap=%lx)\n",
908 tsk->pid, thr->regs->nip, 931 tsk->pid, thr->regs->nip,
@@ -923,11 +946,9 @@ out_and_saveregs:
923 tm_save_sprs(thr); 946 tm_save_sprs(thr);
924} 947}
925 948
926extern void __tm_recheckpoint(struct thread_struct *thread, 949extern void __tm_recheckpoint(struct thread_struct *thread);
927 unsigned long orig_msr);
928 950
929void tm_recheckpoint(struct thread_struct *thread, 951void tm_recheckpoint(struct thread_struct *thread)
930 unsigned long orig_msr)
931{ 952{
932 unsigned long flags; 953 unsigned long flags;
933 954
@@ -946,15 +967,13 @@ void tm_recheckpoint(struct thread_struct *thread,
946 */ 967 */
947 tm_restore_sprs(thread); 968 tm_restore_sprs(thread);
948 969
949 __tm_recheckpoint(thread, orig_msr); 970 __tm_recheckpoint(thread);
950 971
951 local_irq_restore(flags); 972 local_irq_restore(flags);
952} 973}
953 974
954static inline void tm_recheckpoint_new_task(struct task_struct *new) 975static inline void tm_recheckpoint_new_task(struct task_struct *new)
955{ 976{
956 unsigned long msr;
957
958 if (!cpu_has_feature(CPU_FTR_TM)) 977 if (!cpu_has_feature(CPU_FTR_TM))
959 return; 978 return;
960 979
@@ -973,13 +992,11 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
973 tm_restore_sprs(&new->thread); 992 tm_restore_sprs(&new->thread);
974 return; 993 return;
975 } 994 }
976 msr = new->thread.ckpt_regs.msr;
977 /* Recheckpoint to restore original checkpointed register state. */ 995 /* Recheckpoint to restore original checkpointed register state. */
978 TM_DEBUG("*** tm_recheckpoint of pid %d " 996 TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n",
979 "(new->msr 0x%lx, new->origmsr 0x%lx)\n", 997 new->pid, new->thread.regs->msr);
980 new->pid, new->thread.regs->msr, msr);
981 998
982 tm_recheckpoint(&new->thread, msr); 999 tm_recheckpoint(&new->thread);
983 1000
984 /* 1001 /*
985 * The checkpointed state has been restored but the live state has 1002 * The checkpointed state has been restored but the live state has
@@ -1119,6 +1136,10 @@ static inline void restore_sprs(struct thread_struct *old_thread,
1119 if (old_thread->tar != new_thread->tar) 1136 if (old_thread->tar != new_thread->tar)
1120 mtspr(SPRN_TAR, new_thread->tar); 1137 mtspr(SPRN_TAR, new_thread->tar);
1121 } 1138 }
1139
1140 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1141 old_thread->tidr != new_thread->tidr)
1142 mtspr(SPRN_TIDR, new_thread->tidr);
1122#endif 1143#endif
1123} 1144}
1124 1145
@@ -1155,7 +1176,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
1155 } 1176 }
1156#endif /* CONFIG_PPC64 */ 1177#endif /* CONFIG_PPC64 */
1157 1178
1158#ifdef CONFIG_PPC_STD_MMU_64 1179#ifdef CONFIG_PPC_BOOK3S_64
1159 batch = this_cpu_ptr(&ppc64_tlb_batch); 1180 batch = this_cpu_ptr(&ppc64_tlb_batch);
1160 if (batch->active) { 1181 if (batch->active) {
1161 current_thread_info()->local_flags |= _TLF_LAZY_MMU; 1182 current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1163,7 +1184,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
1163 __flush_tlb_pending(batch); 1184 __flush_tlb_pending(batch);
1164 batch->active = 0; 1185 batch->active = 0;
1165 } 1186 }
1166#endif /* CONFIG_PPC_STD_MMU_64 */ 1187#endif /* CONFIG_PPC_BOOK3S_64 */
1167 1188
1168#ifdef CONFIG_PPC_ADV_DEBUG_REGS 1189#ifdef CONFIG_PPC_ADV_DEBUG_REGS
1169 switch_booke_debug_regs(&new->thread.debug); 1190 switch_booke_debug_regs(&new->thread.debug);
@@ -1209,7 +1230,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
1209 1230
1210 last = _switch(old_thread, new_thread); 1231 last = _switch(old_thread, new_thread);
1211 1232
1212#ifdef CONFIG_PPC_STD_MMU_64 1233#ifdef CONFIG_PPC_BOOK3S_64
1213 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { 1234 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
1214 current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; 1235 current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
1215 batch = this_cpu_ptr(&ppc64_tlb_batch); 1236 batch = this_cpu_ptr(&ppc64_tlb_batch);
@@ -1223,22 +1244,22 @@ struct task_struct *__switch_to(struct task_struct *prev,
1223 * The copy-paste buffer can only store into foreign real 1244 * The copy-paste buffer can only store into foreign real
1224 * addresses, so unprivileged processes can not see the 1245 * addresses, so unprivileged processes can not see the
1225 * data or use it in any way unless they have foreign real 1246 * data or use it in any way unless they have foreign real
1226 * mappings. We don't have a VAS driver that allocates those 1247 * mappings. If the new process has the foreign real address
1227 * yet, so no cpabort is required. 1248 * mappings, we must issue a cp_abort to clear any state and
1249 * prevent snooping, corruption or a covert channel.
1250 *
1251 * DD1 allows paste into normal system memory so we do an
1252 * unpaired copy, rather than cp_abort, to clear the buffer,
1253 * since cp_abort is quite expensive.
1228 */ 1254 */
1229 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { 1255 if (current_thread_info()->task->thread.used_vas) {
1230 /* 1256 asm volatile(PPC_CP_ABORT);
1231 * DD1 allows paste into normal system memory, so we 1257 } else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
1232 * do an unpaired copy here to clear the buffer and
1233 * prevent a covert channel being set up.
1234 *
1235 * cpabort is not used because it is quite expensive.
1236 */
1237 asm volatile(PPC_COPY(%0, %1) 1258 asm volatile(PPC_COPY(%0, %1)
1238 : : "r"(dummy_copy_buffer), "r"(0)); 1259 : : "r"(dummy_copy_buffer), "r"(0));
1239 } 1260 }
1240 } 1261 }
1241#endif /* CONFIG_PPC_STD_MMU_64 */ 1262#endif /* CONFIG_PPC_BOOK3S_64 */
1242 1263
1243 return last; 1264 return last;
1244} 1265}
@@ -1434,6 +1455,137 @@ void flush_thread(void)
1434#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 1455#endif /* CONFIG_HAVE_HW_BREAKPOINT */
1435} 1456}
1436 1457
1458int set_thread_uses_vas(void)
1459{
1460#ifdef CONFIG_PPC_BOOK3S_64
1461 if (!cpu_has_feature(CPU_FTR_ARCH_300))
1462 return -EINVAL;
1463
1464 current->thread.used_vas = 1;
1465
1466 /*
1467 * Even a process that has no foreign real address mapping can use
1468 * an unpaired COPY instruction (to no real effect). Issue CP_ABORT
1469 * to clear any pending COPY and prevent a covert channel.
1470 *
1471 * __switch_to() will issue CP_ABORT on future context switches.
1472 */
1473 asm volatile(PPC_CP_ABORT);
1474
1475#endif /* CONFIG_PPC_BOOK3S_64 */
1476 return 0;
1477}
1478
1479#ifdef CONFIG_PPC64
1480static DEFINE_SPINLOCK(vas_thread_id_lock);
1481static DEFINE_IDA(vas_thread_ida);
1482
1483/*
1484 * We need to assign a unique thread id to each thread in a process.
1485 *
1486 * This thread id, referred to as TIDR, and separate from the Linux's tgid,
1487 * is intended to be used to direct an ASB_Notify from the hardware to the
1488 * thread, when a suitable event occurs in the system.
1489 *
1490 * One such event is a "paste" instruction in the context of Fast Thread
1491 * Wakeup (aka Core-to-core wake up in the Virtual Accelerator Switchboard
1492 * (VAS) in POWER9.
1493 *
1494 * To get a unique TIDR per process we could simply reuse task_pid_nr() but
1495 * the problem is that task_pid_nr() is not yet available copy_thread() is
1496 * called. Fixing that would require changing more intrusive arch-neutral
1497 * code in code path in copy_process()?.
1498 *
1499 * Further, to assign unique TIDRs within each process, we need an atomic
1500 * field (or an IDR) in task_struct, which again intrudes into the arch-
1501 * neutral code. So try to assign globally unique TIDRs for now.
1502 *
1503 * NOTE: TIDR 0 indicates that the thread does not need a TIDR value.
1504 * For now, only threads that expect to be notified by the VAS
1505 * hardware need a TIDR value and we assign values > 0 for those.
1506 */
1507#define MAX_THREAD_CONTEXT ((1 << 16) - 1)
1508static int assign_thread_tidr(void)
1509{
1510 int index;
1511 int err;
1512
1513again:
1514 if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL))
1515 return -ENOMEM;
1516
1517 spin_lock(&vas_thread_id_lock);
1518 err = ida_get_new_above(&vas_thread_ida, 1, &index);
1519 spin_unlock(&vas_thread_id_lock);
1520
1521 if (err == -EAGAIN)
1522 goto again;
1523 else if (err)
1524 return err;
1525
1526 if (index > MAX_THREAD_CONTEXT) {
1527 spin_lock(&vas_thread_id_lock);
1528 ida_remove(&vas_thread_ida, index);
1529 spin_unlock(&vas_thread_id_lock);
1530 return -ENOMEM;
1531 }
1532
1533 return index;
1534}
1535
1536static void free_thread_tidr(int id)
1537{
1538 spin_lock(&vas_thread_id_lock);
1539 ida_remove(&vas_thread_ida, id);
1540 spin_unlock(&vas_thread_id_lock);
1541}
1542
1543/*
1544 * Clear any TIDR value assigned to this thread.
1545 */
1546void clear_thread_tidr(struct task_struct *t)
1547{
1548 if (!t->thread.tidr)
1549 return;
1550
1551 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1552 WARN_ON_ONCE(1);
1553 return;
1554 }
1555
1556 mtspr(SPRN_TIDR, 0);
1557 free_thread_tidr(t->thread.tidr);
1558 t->thread.tidr = 0;
1559}
1560
1561void arch_release_task_struct(struct task_struct *t)
1562{
1563 clear_thread_tidr(t);
1564}
1565
1566/*
1567 * Assign a unique TIDR (thread id) for task @t and set it in the thread
1568 * structure. For now, we only support setting TIDR for 'current' task.
1569 */
1570int set_thread_tidr(struct task_struct *t)
1571{
1572 if (!cpu_has_feature(CPU_FTR_ARCH_300))
1573 return -EINVAL;
1574
1575 if (t != current)
1576 return -EINVAL;
1577
1578 t->thread.tidr = assign_thread_tidr();
1579 if (t->thread.tidr < 0)
1580 return t->thread.tidr;
1581
1582 mtspr(SPRN_TIDR, t->thread.tidr);
1583
1584 return 0;
1585}
1586
1587#endif /* CONFIG_PPC64 */
1588
1437void 1589void
1438release_thread(struct task_struct *t) 1590release_thread(struct task_struct *t)
1439{ 1591{
@@ -1467,7 +1619,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
1467 1619
1468static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) 1620static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
1469{ 1621{
1470#ifdef CONFIG_PPC_STD_MMU_64 1622#ifdef CONFIG_PPC_BOOK3S_64
1471 unsigned long sp_vsid; 1623 unsigned long sp_vsid;
1472 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; 1624 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
1473 1625
@@ -1580,6 +1732,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
1580 } 1732 }
1581 if (cpu_has_feature(CPU_FTR_HAS_PPR)) 1733 if (cpu_has_feature(CPU_FTR_HAS_PPR))
1582 p->thread.ppr = INIT_PPR; 1734 p->thread.ppr = INIT_PPR;
1735
1736 p->thread.tidr = 0;
1583#endif 1737#endif
1584 kregs->nip = ppc_function_entry(f); 1738 kregs->nip = ppc_function_entry(f);
1585 return 0; 1739 return 0;
@@ -1898,7 +2052,8 @@ unsigned long get_wchan(struct task_struct *p)
1898 2052
1899 do { 2053 do {
1900 sp = *(unsigned long *)sp; 2054 sp = *(unsigned long *)sp;
1901 if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) 2055 if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
2056 p->state == TASK_RUNNING)
1902 return 0; 2057 return 0;
1903 if (count > 0) { 2058 if (count > 0) {
1904 ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; 2059 ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
@@ -2046,7 +2201,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
2046 unsigned long base = mm->brk; 2201 unsigned long base = mm->brk;
2047 unsigned long ret; 2202 unsigned long ret;
2048 2203
2049#ifdef CONFIG_PPC_STD_MMU_64 2204#ifdef CONFIG_PPC_BOOK3S_64
2050 /* 2205 /*
2051 * If we are using 1TB segments and we are allowed to randomise 2206 * If we are using 1TB segments and we are allowed to randomise
2052 * the heap, we can put it above 1TB so it is backed by a 1TB 2207 * the heap, we can put it above 1TB so it is backed by a 1TB
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f83056297441..b15bae265c90 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -47,6 +47,7 @@
47#include <asm/mmu.h> 47#include <asm/mmu.h>
48#include <asm/paca.h> 48#include <asm/paca.h>
49#include <asm/pgtable.h> 49#include <asm/pgtable.h>
50#include <asm/powernv.h>
50#include <asm/iommu.h> 51#include <asm/iommu.h>
51#include <asm/btext.h> 52#include <asm/btext.h>
52#include <asm/sections.h> 53#include <asm/sections.h>
@@ -228,7 +229,7 @@ static void __init check_cpu_pa_features(unsigned long node)
228 ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); 229 ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
229} 230}
230 231
231#ifdef CONFIG_PPC_STD_MMU_64 232#ifdef CONFIG_PPC_BOOK3S_64
232static void __init init_mmu_slb_size(unsigned long node) 233static void __init init_mmu_slb_size(unsigned long node)
233{ 234{
234 const __be32 *slb_size_ptr; 235 const __be32 *slb_size_ptr;
@@ -658,6 +659,38 @@ static void __init early_reserve_mem(void)
658#endif 659#endif
659} 660}
660 661
662#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
663static bool tm_disabled __initdata;
664
665static int __init parse_ppc_tm(char *str)
666{
667 bool res;
668
669 if (kstrtobool(str, &res))
670 return -EINVAL;
671
672 tm_disabled = !res;
673
674 return 0;
675}
676early_param("ppc_tm", parse_ppc_tm);
677
678static void __init tm_init(void)
679{
680 if (tm_disabled) {
681 pr_info("Disabling hardware transactional memory (HTM)\n");
682 cur_cpu_spec->cpu_user_features2 &=
683 ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
684 cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
685 return;
686 }
687
688 pnv_tm_init();
689}
690#else
691static void tm_init(void) { }
692#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
693
661void __init early_init_devtree(void *params) 694void __init early_init_devtree(void *params)
662{ 695{
663 phys_addr_t limit; 696 phys_addr_t limit;
@@ -767,6 +800,8 @@ void __init early_init_devtree(void *params)
767 powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; 800 powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
768#endif 801#endif
769 802
803 tm_init();
804
770 DBG(" <- early_init_devtree()\n"); 805 DBG(" <- early_init_devtree()\n");
771} 806}
772 807
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2e3bc16d02b2..2075322cd225 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -773,7 +773,7 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
773static __init void print_system_info(void) 773static __init void print_system_info(void)
774{ 774{
775 pr_info("-----------------------------------------------------\n"); 775 pr_info("-----------------------------------------------------\n");
776#ifdef CONFIG_PPC_STD_MMU_64 776#ifdef CONFIG_PPC_BOOK3S_64
777 pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); 777 pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
778#endif 778#endif
779#ifdef CONFIG_PPC_STD_MMU_32 779#ifdef CONFIG_PPC_STD_MMU_32
@@ -800,7 +800,7 @@ static __init void print_system_info(void)
800 pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); 800 pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
801#endif 801#endif
802 802
803#ifdef CONFIG_PPC_STD_MMU_64 803#ifdef CONFIG_PPC_BOOK3S_64
804 if (htab_address) 804 if (htab_address)
805 pr_info("htab_address = 0x%p\n", htab_address); 805 pr_info("htab_address = 0x%p\n", htab_address);
806 if (htab_hash_mask) 806 if (htab_hash_mask)
@@ -898,7 +898,8 @@ void __init setup_arch(char **cmdline_p)
898 898
899#ifdef CONFIG_PPC_MM_SLICES 899#ifdef CONFIG_PPC_MM_SLICES
900#ifdef CONFIG_PPC64 900#ifdef CONFIG_PPC64
901 init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64; 901 if (!radix_enabled())
902 init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
902#else 903#else
903#error "context.addr_limit not initialized." 904#error "context.addr_limit not initialized."
904#endif 905#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index cfba134b3024..21c18071d9d5 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -45,6 +45,12 @@ void emergency_stack_init(void);
45static inline void emergency_stack_init(void) { }; 45static inline void emergency_stack_init(void) { };
46#endif 46#endif
47 47
48#ifdef CONFIG_PPC64
49void record_spr_defaults(void);
50#else
51static inline void record_spr_defaults(void) { };
52#endif
53
48/* 54/*
49 * Having this in kvm_ppc.h makes include dependencies too 55 * Having this in kvm_ppc.h makes include dependencies too
50 * tricky to solve for setup-common.c so have it here. 56 * tricky to solve for setup-common.c so have it here.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b89c6aac48c9..8956a9856604 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -69,6 +69,8 @@
69#include <asm/opal.h> 69#include <asm/opal.h>
70#include <asm/cputhreads.h> 70#include <asm/cputhreads.h>
71 71
72#include "setup.h"
73
72#ifdef DEBUG 74#ifdef DEBUG
73#define DBG(fmt...) udbg_printf(fmt) 75#define DBG(fmt...) udbg_printf(fmt)
74#else 76#else
@@ -317,6 +319,13 @@ void __init early_setup(unsigned long dt_ptr)
317 early_init_mmu(); 319 early_init_mmu();
318 320
319 /* 321 /*
322 * After firmware and early platform setup code has set things up,
323 * we note the SPR values for configurable control/performance
324 * registers, and use those as initial defaults.
325 */
326 record_spr_defaults();
327
328 /*
320 * At this point, we can let interrupts switch to virtual mode 329 * At this point, we can let interrupts switch to virtual mode
321 * (the MMU has been setup), so adjust the MSR in the PACA to 330 * (the MMU has been setup), so adjust the MSR in the PACA to
322 * have IR and DR set and enable AIL if it exists 331 * have IR and DR set and enable AIL if it exists
@@ -360,8 +369,16 @@ void early_setup_secondary(void)
360#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) 369#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
361static bool use_spinloop(void) 370static bool use_spinloop(void)
362{ 371{
363 if (!IS_ENABLED(CONFIG_PPC_BOOK3E)) 372 if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
373 /*
374 * See comments in head_64.S -- not all platforms insert
375 * secondaries at __secondary_hold and wait at the spin
376 * loop.
377 */
378 if (firmware_has_feature(FW_FEATURE_OPAL))
379 return false;
364 return true; 380 return true;
381 }
365 382
366 /* 383 /*
367 * When book3e boots from kexec, the ePAPR spin table does 384 * When book3e boots from kexec, the ePAPR spin table does
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e9436c5e1e09..3d7539b90010 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -103,7 +103,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
103static void do_signal(struct task_struct *tsk) 103static void do_signal(struct task_struct *tsk)
104{ 104{
105 sigset_t *oldset = sigmask_to_save(); 105 sigset_t *oldset = sigmask_to_save();
106 struct ksignal ksig; 106 struct ksignal ksig = { .sig = 0 };
107 int ret; 107 int ret;
108 int is32 = is_32bit_task(); 108 int is32 = is_32bit_task();
109 109
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 92fb1c8dbbd8..16d16583cf11 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -519,6 +519,8 @@ static int save_tm_user_regs(struct pt_regs *regs,
519{ 519{
520 unsigned long msr = regs->msr; 520 unsigned long msr = regs->msr;
521 521
522 WARN_ON(tm_suspend_disabled);
523
522 /* Remove TM bits from thread's MSR. The MSR in the sigcontext 524 /* Remove TM bits from thread's MSR. The MSR in the sigcontext
523 * just indicates to userland that we were doing a transaction, but we 525 * just indicates to userland that we were doing a transaction, but we
524 * don't want to return in transactional state. This also ensures 526 * don't want to return in transactional state. This also ensures
@@ -769,6 +771,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
769 int i; 771 int i;
770#endif 772#endif
771 773
774 if (tm_suspend_disabled)
775 return 1;
772 /* 776 /*
773 * restore general registers but not including MSR or SOFTE. Also 777 * restore general registers but not including MSR or SOFTE. Also
774 * take care of keeping r2 (TLS) intact if not a signal. 778 * take care of keeping r2 (TLS) intact if not a signal.
@@ -876,7 +880,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
876 /* Make sure the transaction is marked as failed */ 880 /* Make sure the transaction is marked as failed */
877 current->thread.tm_texasr |= TEXASR_FS; 881 current->thread.tm_texasr |= TEXASR_FS;
878 /* This loads the checkpointed FP/VEC state, if used */ 882 /* This loads the checkpointed FP/VEC state, if used */
879 tm_recheckpoint(&current->thread, msr); 883 tm_recheckpoint(&current->thread);
880 884
881 /* This loads the speculative FP/VEC state, if used */ 885 /* This loads the speculative FP/VEC state, if used */
882 msr_check_and_set(msr & (MSR_FP | MSR_VEC)); 886 msr_check_and_set(msr & (MSR_FP | MSR_VEC));
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index b2c002993d78..4b9ca3570344 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -214,6 +214,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
214 214
215 BUG_ON(!MSR_TM_ACTIVE(regs->msr)); 215 BUG_ON(!MSR_TM_ACTIVE(regs->msr));
216 216
217 WARN_ON(tm_suspend_disabled);
218
217 /* Remove TM bits from thread's MSR. The MSR in the sigcontext 219 /* Remove TM bits from thread's MSR. The MSR in the sigcontext
218 * just indicates to userland that we were doing a transaction, but we 220 * just indicates to userland that we were doing a transaction, but we
219 * don't want to return in transactional state. This also ensures 221 * don't want to return in transactional state. This also ensures
@@ -430,6 +432,9 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
430 432
431 BUG_ON(tsk != current); 433 BUG_ON(tsk != current);
432 434
435 if (tm_suspend_disabled)
436 return -EINVAL;
437
433 /* copy the GPRs */ 438 /* copy the GPRs */
434 err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr)); 439 err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
435 err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs, 440 err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs,
@@ -558,7 +563,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
558 /* Make sure the transaction is marked as failed */ 563 /* Make sure the transaction is marked as failed */
559 tsk->thread.tm_texasr |= TEXASR_FS; 564 tsk->thread.tm_texasr |= TEXASR_FS;
560 /* This loads the checkpointed FP/VEC state, if used */ 565 /* This loads the checkpointed FP/VEC state, if used */
561 tm_recheckpoint(&tsk->thread, msr); 566 tm_recheckpoint(&tsk->thread);
562 567
563 msr_check_and_set(msr & (MSR_FP | MSR_VEC)); 568 msr_check_and_set(msr & (MSR_FP | MSR_VEC));
564 if (msr & MSR_FP) { 569 if (msr & MSR_FP) {
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70c7c2b..b8d4a1dac39f 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -590,6 +590,17 @@ static void sysfs_create_dscr_default(void)
590 if (cpu_has_feature(CPU_FTR_DSCR)) 590 if (cpu_has_feature(CPU_FTR_DSCR))
591 err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); 591 err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
592} 592}
593
594void __init record_spr_defaults(void)
595{
596 int cpu;
597
598 if (cpu_has_feature(CPU_FTR_DSCR)) {
599 dscr_default = mfspr(SPRN_DSCR);
600 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
601 paca[cpu].dscr_default = dscr_default;
602 }
603}
593#endif /* CONFIG_PPC64 */ 604#endif /* CONFIG_PPC64 */
594 605
595#ifdef HAS_PPC_PMC_PA6T 606#ifdef HAS_PPC_PMC_PA6T
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index a3374e8a258c..e3c5f75d137c 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -230,8 +230,7 @@ int __init TAU_init(void)
230 230
231 231
232 /* first, set up the window shrinking timer */ 232 /* first, set up the window shrinking timer */
233 init_timer(&tau_timer); 233 setup_timer(&tau_timer, tau_timeout_smp, 0UL);
234 tau_timer.function = tau_timeout_smp;
235 tau_timer.expires = jiffies + shrink_timer; 234 tau_timer.expires = jiffies + shrink_timer;
236 add_timer(&tau_timer); 235 add_timer(&tau_timer);
237 236
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 1da12f521cb7..b92ac8e711db 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -80,15 +80,12 @@ _GLOBAL(tm_abort)
80 blr 80 blr
81 81
82/* void tm_reclaim(struct thread_struct *thread, 82/* void tm_reclaim(struct thread_struct *thread,
83 * unsigned long orig_msr,
84 * uint8_t cause) 83 * uint8_t cause)
85 * 84 *
86 * - Performs a full reclaim. This destroys outstanding 85 * - Performs a full reclaim. This destroys outstanding
87 * transactions and updates thread->regs.tm_ckpt_* with the 86 * transactions and updates thread->regs.tm_ckpt_* with the
88 * original checkpointed state. Note that thread->regs is 87 * original checkpointed state. Note that thread->regs is
89 * unchanged. 88 * unchanged.
90 * - FP regs are written back to thread->transact_fpr before
91 * reclaiming. These are the transactional (current) versions.
92 * 89 *
93 * Purpose is to both abort transactions of, and preserve the state of, 90 * Purpose is to both abort transactions of, and preserve the state of,
94 * a transactions at a context switch. We preserve/restore both sets of process 91 * a transactions at a context switch. We preserve/restore both sets of process
@@ -99,9 +96,9 @@ _GLOBAL(tm_abort)
99 * Call with IRQs off, stacks get all out of sync for some periods in here! 96 * Call with IRQs off, stacks get all out of sync for some periods in here!
100 */ 97 */
101_GLOBAL(tm_reclaim) 98_GLOBAL(tm_reclaim)
102 mfcr r6 99 mfcr r5
103 mflr r0 100 mflr r0
104 stw r6, 8(r1) 101 stw r5, 8(r1)
105 std r0, 16(r1) 102 std r0, 16(r1)
106 std r2, STK_GOT(r1) 103 std r2, STK_GOT(r1)
107 stdu r1, -TM_FRAME_SIZE(r1) 104 stdu r1, -TM_FRAME_SIZE(r1)
@@ -109,7 +106,6 @@ _GLOBAL(tm_reclaim)
109 /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ 106 /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
110 107
111 std r3, STK_PARAM(R3)(r1) 108 std r3, STK_PARAM(R3)(r1)
112 std r4, STK_PARAM(R4)(r1)
113 SAVE_NVGPRS(r1) 109 SAVE_NVGPRS(r1)
114 110
115 /* We need to setup MSR for VSX register save instructions. */ 111 /* We need to setup MSR for VSX register save instructions. */
@@ -139,8 +135,8 @@ _GLOBAL(tm_reclaim)
139 std r1, PACAR1(r13) 135 std r1, PACAR1(r13)
140 136
141 /* Clear MSR RI since we are about to change r1, EE is already off. */ 137 /* Clear MSR RI since we are about to change r1, EE is already off. */
142 li r4, 0 138 li r5, 0
143 mtmsrd r4, 1 139 mtmsrd r5, 1
144 140
145 /* 141 /*
146 * BE CAREFUL HERE: 142 * BE CAREFUL HERE:
@@ -152,7 +148,7 @@ _GLOBAL(tm_reclaim)
152 * to user register state. (FPRs, CCR etc. also!) 148 * to user register state. (FPRs, CCR etc. also!)
153 * Use an sprg and a tm_scratch in the PACA to shuffle. 149 * Use an sprg and a tm_scratch in the PACA to shuffle.
154 */ 150 */
155 TRECLAIM(R5) /* Cause in r5 */ 151 TRECLAIM(R4) /* Cause in r4 */
156 152
157 /* ******************** GPRs ******************** */ 153 /* ******************** GPRs ******************** */
158 /* Stash the checkpointed r13 away in the scratch SPR and get the real 154 /* Stash the checkpointed r13 away in the scratch SPR and get the real
@@ -243,40 +239,30 @@ _GLOBAL(tm_reclaim)
243 239
244 240
245 /* ******************** FPR/VR/VSRs ************ 241 /* ******************** FPR/VR/VSRs ************
246 * After reclaiming, capture the checkpointed FPRs/VRs /if used/. 242 * After reclaiming, capture the checkpointed FPRs/VRs.
247 *
248 * (If VSX used, FP and VMX are implied. Or, we don't need to look
249 * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
250 *
251 * We're passed the thread's MSR as the second parameter
252 * 243 *
253 * We enabled VEC/FP/VSX in the msr above, so we can execute these 244 * We enabled VEC/FP/VSX in the msr above, so we can execute these
254 * instructions! 245 * instructions!
255 */ 246 */
256 ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */
257 mr r3, r12 247 mr r3, r12
258 andis. r0, r4, MSR_VEC@h
259 beq dont_backup_vec
260 248
249 /* Altivec (VEC/VMX/VR)*/
261 addi r7, r3, THREAD_CKVRSTATE 250 addi r7, r3, THREAD_CKVRSTATE
262 SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ 251 SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
263 mfvscr v0 252 mfvscr v0
264 li r6, VRSTATE_VSCR 253 li r6, VRSTATE_VSCR
265 stvx v0, r7, r6 254 stvx v0, r7, r6
266dont_backup_vec: 255
256 /* VRSAVE */
267 mfspr r0, SPRN_VRSAVE 257 mfspr r0, SPRN_VRSAVE
268 std r0, THREAD_CKVRSAVE(r3) 258 std r0, THREAD_CKVRSAVE(r3)
269 259
270 andi. r0, r4, MSR_FP 260 /* Floating Point (FP) */
271 beq dont_backup_fp
272
273 addi r7, r3, THREAD_CKFPSTATE 261 addi r7, r3, THREAD_CKFPSTATE
274 SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ 262 SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
275
276 mffs fr0 263 mffs fr0
277 stfd fr0,FPSTATE_FPSCR(r7) 264 stfd fr0,FPSTATE_FPSCR(r7)
278 265
279dont_backup_fp:
280 266
281 /* TM regs, incl TEXASR -- these live in thread_struct. Note they've 267 /* TM regs, incl TEXASR -- these live in thread_struct. Note they've
282 * been updated by the treclaim, to explain to userland the failure 268 * been updated by the treclaim, to explain to userland the failure
@@ -344,22 +330,19 @@ _GLOBAL(__tm_recheckpoint)
344 */ 330 */
345 subi r7, r7, STACK_FRAME_OVERHEAD 331 subi r7, r7, STACK_FRAME_OVERHEAD
346 332
333 /* We need to setup MSR for FP/VMX/VSX register save instructions. */
347 mfmsr r6 334 mfmsr r6
348 /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ 335 mr r5, r6
349
350 /* Enable FP/vec in MSR if necessary! */
351 lis r5, MSR_VEC@h
352 ori r5, r5, MSR_FP 336 ori r5, r5, MSR_FP
353 and. r5, r4, r5 337#ifdef CONFIG_ALTIVEC
354 beq restore_gprs /* if neither, skip both */ 338 oris r5, r5, MSR_VEC@h
355 339#endif
356#ifdef CONFIG_VSX 340#ifdef CONFIG_VSX
357 BEGIN_FTR_SECTION 341 BEGIN_FTR_SECTION
358 oris r5, r5, MSR_VSX@h 342 oris r5,r5, MSR_VSX@h
359 END_FTR_SECTION_IFSET(CPU_FTR_VSX) 343 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
360#endif 344#endif
361 or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */ 345 mtmsrd r5
362 mtmsr r5
363 346
364#ifdef CONFIG_ALTIVEC 347#ifdef CONFIG_ALTIVEC
365 /* 348 /*
@@ -368,28 +351,20 @@ _GLOBAL(__tm_recheckpoint)
368 * thread.fp_state[] version holds the 'live' (transactional) 351 * thread.fp_state[] version holds the 'live' (transactional)
369 * and will be loaded subsequently by any FPUnavailable trap. 352 * and will be loaded subsequently by any FPUnavailable trap.
370 */ 353 */
371 andis. r0, r4, MSR_VEC@h
372 beq dont_restore_vec
373
374 addi r8, r3, THREAD_CKVRSTATE 354 addi r8, r3, THREAD_CKVRSTATE
375 li r5, VRSTATE_VSCR 355 li r5, VRSTATE_VSCR
376 lvx v0, r8, r5 356 lvx v0, r8, r5
377 mtvscr v0 357 mtvscr v0
378 REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */ 358 REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
379dont_restore_vec:
380 ld r5, THREAD_CKVRSAVE(r3) 359 ld r5, THREAD_CKVRSAVE(r3)
381 mtspr SPRN_VRSAVE, r5 360 mtspr SPRN_VRSAVE, r5
382#endif 361#endif
383 362
384 andi. r0, r4, MSR_FP
385 beq dont_restore_fp
386
387 addi r8, r3, THREAD_CKFPSTATE 363 addi r8, r3, THREAD_CKFPSTATE
388 lfd fr0, FPSTATE_FPSCR(r8) 364 lfd fr0, FPSTATE_FPSCR(r8)
389 MTFSF_L(fr0) 365 MTFSF_L(fr0)
390 REST_32FPRS_VSRS(0, R4, R8) 366 REST_32FPRS_VSRS(0, R4, R8)
391 367
392dont_restore_fp:
393 mtmsr r6 /* FP/Vec off again! */ 368 mtmsr r6 /* FP/Vec off again! */
394 369
395restore_gprs: 370restore_gprs:
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index b4e2b7165f79..3f3e81852422 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -110,9 +110,9 @@ ftrace_call:
110 /* NIP has not been altered, skip over further checks */ 110 /* NIP has not been altered, skip over further checks */
111 beq 1f 111 beq 1f
112 112
113 /* Check if there is an active kprobe on us */ 113 /* Check if there is an active jprobe on us */
114 subi r3, r14, 4 114 subi r3, r14, 4
115 bl is_current_kprobe_addr 115 bl __is_active_jprobe
116 nop 116 nop
117 117
118 /* 118 /*
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 13c9dcdcba69..f3eb61be0d30 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -37,6 +37,7 @@
37#include <linux/kdebug.h> 37#include <linux/kdebug.h>
38#include <linux/ratelimit.h> 38#include <linux/ratelimit.h>
39#include <linux/context_tracking.h> 39#include <linux/context_tracking.h>
40#include <linux/smp.h>
40 41
41#include <asm/emulated_ops.h> 42#include <asm/emulated_ops.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
@@ -699,6 +700,187 @@ void SMIException(struct pt_regs *regs)
699 die("System Management Interrupt", regs, SIGABRT); 700 die("System Management Interrupt", regs, SIGABRT);
700} 701}
701 702
703#ifdef CONFIG_VSX
704static void p9_hmi_special_emu(struct pt_regs *regs)
705{
706 unsigned int ra, rb, t, i, sel, instr, rc;
707 const void __user *addr;
708 u8 vbuf[16], *vdst;
709 unsigned long ea, msr, msr_mask;
710 bool swap;
711
712 if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip))
713 return;
714
715 /*
716 * lxvb16x opcode: 0x7c0006d8
717 * lxvd2x opcode: 0x7c000698
718 * lxvh8x opcode: 0x7c000658
719 * lxvw4x opcode: 0x7c000618
720 */
721 if ((instr & 0xfc00073e) != 0x7c000618) {
722 pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
723 " instr=%08x\n",
724 smp_processor_id(), current->comm, current->pid,
725 regs->nip, instr);
726 return;
727 }
728
729 /* Grab vector registers into the task struct */
730 msr = regs->msr; /* Grab msr before we flush the bits */
731 flush_vsx_to_thread(current);
732 enable_kernel_altivec();
733
734 /*
735 * Is userspace running with a different endian (this is rare but
736 * not impossible)
737 */
738 swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
739
740 /* Decode the instruction */
741 ra = (instr >> 16) & 0x1f;
742 rb = (instr >> 11) & 0x1f;
743 t = (instr >> 21) & 0x1f;
744 if (instr & 1)
745 vdst = (u8 *)&current->thread.vr_state.vr[t];
746 else
747 vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
748
749 /* Grab the vector address */
750 ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
751 if (is_32bit_task())
752 ea &= 0xfffffffful;
753 addr = (__force const void __user *)ea;
754
755 /* Check it */
756 if (!access_ok(VERIFY_READ, addr, 16)) {
757 pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
758 " instr=%08x addr=%016lx\n",
759 smp_processor_id(), current->comm, current->pid,
760 regs->nip, instr, (unsigned long)addr);
761 return;
762 }
763
764 /* Read the vector */
765 rc = 0;
766 if ((unsigned long)addr & 0xfUL)
767 /* unaligned case */
768 rc = __copy_from_user_inatomic(vbuf, addr, 16);
769 else
770 __get_user_atomic_128_aligned(vbuf, addr, rc);
771 if (rc) {
772 pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
773 " instr=%08x addr=%016lx\n",
774 smp_processor_id(), current->comm, current->pid,
775 regs->nip, instr, (unsigned long)addr);
776 return;
777 }
778
779 pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
780 " instr=%08x addr=%016lx\n",
781 smp_processor_id(), current->comm, current->pid, regs->nip,
782 instr, (unsigned long) addr);
783
784 /* Grab instruction "selector" */
785 sel = (instr >> 6) & 3;
786
787 /*
788 * Check to make sure the facility is actually enabled. This
789 * could happen if we get a false positive hit.
790 *
791 * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
792 * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
793 */
794 msr_mask = MSR_VSX;
795 if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
796 msr_mask = MSR_VEC;
797 if (!(msr & msr_mask)) {
798 pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
799 " instr=%08x msr:%016lx\n",
800 smp_processor_id(), current->comm, current->pid,
801 regs->nip, instr, msr);
802 return;
803 }
804
805 /* Do logging here before we modify sel based on endian */
806 switch (sel) {
807 case 0: /* lxvw4x */
808 PPC_WARN_EMULATED(lxvw4x, regs);
809 break;
810 case 1: /* lxvh8x */
811 PPC_WARN_EMULATED(lxvh8x, regs);
812 break;
813 case 2: /* lxvd2x */
814 PPC_WARN_EMULATED(lxvd2x, regs);
815 break;
816 case 3: /* lxvb16x */
817 PPC_WARN_EMULATED(lxvb16x, regs);
818 break;
819 }
820
821#ifdef __LITTLE_ENDIAN__
822 /*
823 * An LE kernel stores the vector in the task struct as an LE
824 * byte array (effectively swapping both the components and
825 * the content of the components). Those instructions expect
826 * the components to remain in ascending address order, so we
827 * swap them back.
828 *
829 * If we are running a BE user space, the expectation is that
830 * of a simple memcpy, so forcing the emulation to look like
831 * a lxvb16x should do the trick.
832 */
833 if (swap)
834 sel = 3;
835
836 switch (sel) {
837 case 0: /* lxvw4x */
838 for (i = 0; i < 4; i++)
839 ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
840 break;
841 case 1: /* lxvh8x */
842 for (i = 0; i < 8; i++)
843 ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
844 break;
845 case 2: /* lxvd2x */
846 for (i = 0; i < 2; i++)
847 ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
848 break;
849 case 3: /* lxvb16x */
850 for (i = 0; i < 16; i++)
851 vdst[i] = vbuf[15-i];
852 break;
853 }
854#else /* __LITTLE_ENDIAN__ */
855 /* On a big endian kernel, a BE userspace only needs a memcpy */
856 if (!swap)
857 sel = 3;
858
859 /* Otherwise, we need to swap the content of the components */
860 switch (sel) {
861 case 0: /* lxvw4x */
862 for (i = 0; i < 4; i++)
863 ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
864 break;
865 case 1: /* lxvh8x */
866 for (i = 0; i < 8; i++)
867 ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
868 break;
869 case 2: /* lxvd2x */
870 for (i = 0; i < 2; i++)
871 ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
872 break;
873 case 3: /* lxvb16x */
874 memcpy(vdst, vbuf, 16);
875 break;
876 }
877#endif /* !__LITTLE_ENDIAN__ */
878
879 /* Go to next instruction */
880 regs->nip += 4;
881}
882#endif /* CONFIG_VSX */
883
702void handle_hmi_exception(struct pt_regs *regs) 884void handle_hmi_exception(struct pt_regs *regs)
703{ 885{
704 struct pt_regs *old_regs; 886 struct pt_regs *old_regs;
@@ -706,6 +888,21 @@ void handle_hmi_exception(struct pt_regs *regs)
706 old_regs = set_irq_regs(regs); 888 old_regs = set_irq_regs(regs);
707 irq_enter(); 889 irq_enter();
708 890
891#ifdef CONFIG_VSX
892 /* Real mode flagged P9 special emu is needed */
893 if (local_paca->hmi_p9_special_emu) {
894 local_paca->hmi_p9_special_emu = 0;
895
896 /*
897 * We don't want to take page faults while doing the
898 * emulation, we just replay the instruction if necessary.
899 */
900 pagefault_disable();
901 p9_hmi_special_emu(regs);
902 pagefault_enable();
903 }
904#endif /* CONFIG_VSX */
905
709 if (ppc_md.handle_hmi_exception) 906 if (ppc_md.handle_hmi_exception)
710 ppc_md.handle_hmi_exception(regs); 907 ppc_md.handle_hmi_exception(regs);
711 908
@@ -1140,13 +1337,8 @@ void program_check_exception(struct pt_regs *regs)
1140 * - A treclaim is attempted when non transactional. 1337 * - A treclaim is attempted when non transactional.
1141 * - A tend is illegally attempted. 1338 * - A tend is illegally attempted.
1142 * - writing a TM SPR when transactional. 1339 * - writing a TM SPR when transactional.
1143 */ 1340 *
1144 if (!user_mode(regs) && 1341 * If usermode caused this, it's done something illegal and
1145 report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
1146 regs->nip += 4;
1147 goto bail;
1148 }
1149 /* If usermode caused this, it's done something illegal and
1150 * gets a SIGILL slap on the wrist. We call it an illegal 1342 * gets a SIGILL slap on the wrist. We call it an illegal
1151 * operand to distinguish from the instruction just being bad 1343 * operand to distinguish from the instruction just being bad
1152 * (e.g. executing a 'tend' on a CPU without TM!); it's an 1344 * (e.g. executing a 'tend' on a CPU without TM!); it's an
@@ -1487,7 +1679,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
1487 /* Reclaim didn't save out any FPRs to transact_fprs. */ 1679 /* Reclaim didn't save out any FPRs to transact_fprs. */
1488 1680
1489 /* Enable FP for the task: */ 1681 /* Enable FP for the task: */
1490 regs->msr |= (MSR_FP | current->thread.fpexc_mode); 1682 current->thread.load_fp = 1;
1491 1683
1492 /* This loads and recheckpoints the FP registers from 1684 /* This loads and recheckpoints the FP registers from
1493 * thread.fpr[]. They will remain in registers after the 1685 * thread.fpr[]. They will remain in registers after the
@@ -1495,15 +1687,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
1495 * If VMX is in use, the VRs now hold checkpointed values, 1687 * If VMX is in use, the VRs now hold checkpointed values,
1496 * so we don't want to load the VRs from the thread_struct. 1688 * so we don't want to load the VRs from the thread_struct.
1497 */ 1689 */
1498 tm_recheckpoint(&current->thread, MSR_FP); 1690 tm_recheckpoint(&current->thread);
1499
1500 /* If VMX is in use, get the transactional values back */
1501 if (regs->msr & MSR_VEC) {
1502 msr_check_and_set(MSR_VEC);
1503 load_vr_state(&current->thread.vr_state);
1504 /* At this point all the VSX state is loaded, so enable it */
1505 regs->msr |= MSR_VSX;
1506 }
1507} 1691}
1508 1692
1509void altivec_unavailable_tm(struct pt_regs *regs) 1693void altivec_unavailable_tm(struct pt_regs *regs)
@@ -1516,21 +1700,13 @@ void altivec_unavailable_tm(struct pt_regs *regs)
1516 "MSR=%lx\n", 1700 "MSR=%lx\n",
1517 regs->nip, regs->msr); 1701 regs->nip, regs->msr);
1518 tm_reclaim_current(TM_CAUSE_FAC_UNAV); 1702 tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1519 regs->msr |= MSR_VEC; 1703 current->thread.load_vec = 1;
1520 tm_recheckpoint(&current->thread, MSR_VEC); 1704 tm_recheckpoint(&current->thread);
1521 current->thread.used_vr = 1; 1705 current->thread.used_vr = 1;
1522
1523 if (regs->msr & MSR_FP) {
1524 msr_check_and_set(MSR_FP);
1525 load_fp_state(&current->thread.fp_state);
1526 regs->msr |= MSR_VSX;
1527 }
1528} 1706}
1529 1707
1530void vsx_unavailable_tm(struct pt_regs *regs) 1708void vsx_unavailable_tm(struct pt_regs *regs)
1531{ 1709{
1532 unsigned long orig_msr = regs->msr;
1533
1534 /* See the comments in fp_unavailable_tm(). This works similarly, 1710 /* See the comments in fp_unavailable_tm(). This works similarly,
1535 * though we're loading both FP and VEC registers in here. 1711 * though we're loading both FP and VEC registers in here.
1536 * 1712 *
@@ -1544,29 +1720,13 @@ void vsx_unavailable_tm(struct pt_regs *regs)
1544 1720
1545 current->thread.used_vsr = 1; 1721 current->thread.used_vsr = 1;
1546 1722
1547 /* If FP and VMX are already loaded, we have all the state we need */
1548 if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) {
1549 regs->msr |= MSR_VSX;
1550 return;
1551 }
1552
1553 /* This reclaims FP and/or VR regs if they're already enabled */ 1723 /* This reclaims FP and/or VR regs if they're already enabled */
1554 tm_reclaim_current(TM_CAUSE_FAC_UNAV); 1724 tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1555 1725
1556 regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | 1726 current->thread.load_vec = 1;
1557 MSR_VSX; 1727 current->thread.load_fp = 1;
1558
1559 /* This loads & recheckpoints FP and VRs; but we have
1560 * to be sure not to overwrite previously-valid state.
1561 */
1562 tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
1563
1564 msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC));
1565 1728
1566 if (orig_msr & MSR_FP) 1729 tm_recheckpoint(&current->thread);
1567 load_fp_state(&current->thread.fp_state);
1568 if (orig_msr & MSR_VEC)
1569 load_vr_state(&current->thread.vr_state);
1570} 1730}
1571#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 1731#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1572 1732
@@ -1924,6 +2084,10 @@ struct ppc_emulated ppc_emulated = {
1924 WARN_EMULATED_SETUP(mfdscr), 2084 WARN_EMULATED_SETUP(mfdscr),
1925 WARN_EMULATED_SETUP(mtdscr), 2085 WARN_EMULATED_SETUP(mtdscr),
1926 WARN_EMULATED_SETUP(lq_stq), 2086 WARN_EMULATED_SETUP(lq_stq),
2087 WARN_EMULATED_SETUP(lxvw4x),
2088 WARN_EMULATED_SETUP(lxvh8x),
2089 WARN_EMULATED_SETUP(lxvd2x),
2090 WARN_EMULATED_SETUP(lxvb16x),
1927#endif 2091#endif
1928}; 2092};
1929 2093
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 1d89163d67f2..87da80ccced1 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -98,8 +98,7 @@ static void wd_lockup_ipi(struct pt_regs *regs)
98 else 98 else
99 dump_stack(); 99 dump_stack();
100 100
101 if (hardlockup_panic) 101 /* Do not panic from here because that can recurse into NMI IPI layer */
102 nmi_panic(regs, "Hard LOCKUP");
103} 102}
104 103
105static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) 104static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
@@ -135,15 +134,18 @@ static void watchdog_smp_panic(int cpu, u64 tb)
135 pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n", 134 pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
136 cpu, cpumask_pr_args(&wd_smp_cpus_pending)); 135 cpu, cpumask_pr_args(&wd_smp_cpus_pending));
137 136
138 /* 137 if (!sysctl_hardlockup_all_cpu_backtrace) {
139 * Try to trigger the stuck CPUs. 138 /*
140 */ 139 * Try to trigger the stuck CPUs, unless we are going to
141 for_each_cpu(c, &wd_smp_cpus_pending) { 140 * get a backtrace on all of them anyway.
142 if (c == cpu) 141 */
143 continue; 142 for_each_cpu(c, &wd_smp_cpus_pending) {
144 smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); 143 if (c == cpu)
144 continue;
145 smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
146 }
147 smp_flush_nmi_ipi(1000000);
145 } 148 }
146 smp_flush_nmi_ipi(1000000);
147 149
148 /* Take the stuck CPUs out of the watch group */ 150 /* Take the stuck CPUs out of the watch group */
149 set_cpumask_stuck(&wd_smp_cpus_pending, tb); 151 set_cpumask_stuck(&wd_smp_cpus_pending, tb);
@@ -275,9 +277,12 @@ void arch_touch_nmi_watchdog(void)
275{ 277{
276 unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; 278 unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
277 int cpu = smp_processor_id(); 279 int cpu = smp_processor_id();
280 u64 tb = get_tb();
278 281
279 if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks) 282 if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
280 watchdog_timer_interrupt(cpu); 283 per_cpu(wd_timer_tb, cpu) = tb;
284 wd_smp_clear_cpu_pending(cpu, tb);
285 }
281} 286}
282EXPORT_SYMBOL(arch_touch_nmi_watchdog); 287EXPORT_SYMBOL(arch_touch_nmi_watchdog);
283 288
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8d43cf205d34..40e5857c4b1c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -47,6 +47,7 @@
47 47
48#include <asm/reg.h> 48#include <asm/reg.h>
49#include <asm/ppc-opcode.h> 49#include <asm/ppc-opcode.h>
50#include <asm/asm-prototypes.h>
50#include <asm/disassemble.h> 51#include <asm/disassemble.h>
51#include <asm/cputable.h> 52#include <asm/cputable.h>
52#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
@@ -1089,9 +1090,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1089 vcpu->stat.ext_intr_exits++; 1090 vcpu->stat.ext_intr_exits++;
1090 r = RESUME_GUEST; 1091 r = RESUME_GUEST;
1091 break; 1092 break;
1092 /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/ 1093 /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
1093 case BOOK3S_INTERRUPT_HMI: 1094 case BOOK3S_INTERRUPT_HMI:
1094 case BOOK3S_INTERRUPT_PERFMON: 1095 case BOOK3S_INTERRUPT_PERFMON:
1096 case BOOK3S_INTERRUPT_SYSTEM_RESET:
1095 r = RESUME_GUEST; 1097 r = RESUME_GUEST;
1096 break; 1098 break;
1097 case BOOK3S_INTERRUPT_MACHINE_CHECK: 1099 case BOOK3S_INTERRUPT_MACHINE_CHECK:
@@ -2117,15 +2119,6 @@ static int kvmppc_grab_hwthread(int cpu)
2117 struct paca_struct *tpaca; 2119 struct paca_struct *tpaca;
2118 long timeout = 10000; 2120 long timeout = 10000;
2119 2121
2120 /*
2121 * ISA v3.0 idle routines do not set hwthread_state or test
2122 * hwthread_req, so they can not grab idle threads.
2123 */
2124 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
2125 WARN(1, "KVM: can not control sibling threads\n");
2126 return -EBUSY;
2127 }
2128
2129 tpaca = &paca[cpu]; 2122 tpaca = &paca[cpu];
2130 2123
2131 /* Ensure the thread won't go into the kernel if it wakes */ 2124 /* Ensure the thread won't go into the kernel if it wakes */
@@ -2160,12 +2153,10 @@ static void kvmppc_release_hwthread(int cpu)
2160 struct paca_struct *tpaca; 2153 struct paca_struct *tpaca;
2161 2154
2162 tpaca = &paca[cpu]; 2155 tpaca = &paca[cpu];
2156 tpaca->kvm_hstate.hwthread_req = 0;
2163 tpaca->kvm_hstate.kvm_vcpu = NULL; 2157 tpaca->kvm_hstate.kvm_vcpu = NULL;
2164 tpaca->kvm_hstate.kvm_vcore = NULL; 2158 tpaca->kvm_hstate.kvm_vcore = NULL;
2165 tpaca->kvm_hstate.kvm_split_mode = NULL; 2159 tpaca->kvm_hstate.kvm_split_mode = NULL;
2166 if (!cpu_has_feature(CPU_FTR_ARCH_300))
2167 tpaca->kvm_hstate.hwthread_req = 0;
2168
2169} 2160}
2170 2161
2171static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) 2162static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
@@ -2615,6 +2606,9 @@ static void set_irq_happened(int trap)
2615 case BOOK3S_INTERRUPT_HMI: 2606 case BOOK3S_INTERRUPT_HMI:
2616 local_paca->irq_happened |= PACA_IRQ_HMI; 2607 local_paca->irq_happened |= PACA_IRQ_HMI;
2617 break; 2608 break;
2609 case BOOK3S_INTERRUPT_SYSTEM_RESET:
2610 replay_system_reset();
2611 break;
2618 } 2612 }
2619} 2613}
2620 2614
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 42639fba89e8..68bf0f14a962 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -149,11 +149,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
149 subf r4, r4, r3 149 subf r4, r4, r3
150 mtspr SPRN_DEC, r4 150 mtspr SPRN_DEC, r4
151 151
152BEGIN_FTR_SECTION
153 /* hwthread_req may have got set by cede or no vcpu, so clear it */ 152 /* hwthread_req may have got set by cede or no vcpu, so clear it */
154 li r0, 0 153 li r0, 0
155 stb r0, HSTATE_HWTHREAD_REQ(r13) 154 stb r0, HSTATE_HWTHREAD_REQ(r13)
156END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
157 155
158 /* 156 /*
159 * For external interrupts we need to call the Linux 157 * For external interrupts we need to call the Linux
@@ -316,7 +314,6 @@ kvm_novcpu_exit:
316 * Relocation is off and most register values are lost. 314 * Relocation is off and most register values are lost.
317 * r13 points to the PACA. 315 * r13 points to the PACA.
318 * r3 contains the SRR1 wakeup value, SRR1 is trashed. 316 * r3 contains the SRR1 wakeup value, SRR1 is trashed.
319 * This is not used by ISAv3.0B processors.
320 */ 317 */
321 .globl kvm_start_guest 318 .globl kvm_start_guest
322kvm_start_guest: 319kvm_start_guest:
@@ -435,9 +432,6 @@ kvm_secondary_got_guest:
435 * While waiting we also need to check if we get given a vcpu to run. 432 * While waiting we also need to check if we get given a vcpu to run.
436 */ 433 */
437kvm_no_guest: 434kvm_no_guest:
438BEGIN_FTR_SECTION
439 twi 31,0,0
440END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
441 lbz r3, HSTATE_HWTHREAD_REQ(r13) 435 lbz r3, HSTATE_HWTHREAD_REQ(r13)
442 cmpwi r3, 0 436 cmpwi r3, 0
443 bne 53f 437 bne 53f
@@ -2546,10 +2540,8 @@ kvm_do_nap:
2546 clrrdi r0, r0, 1 2540 clrrdi r0, r0, 1
2547 mtspr SPRN_CTRLT, r0 2541 mtspr SPRN_CTRLT, r0
2548 2542
2549BEGIN_FTR_SECTION
2550 li r0,1 2543 li r0,1
2551 stb r0,HSTATE_HWTHREAD_REQ(r13) 2544 stb r0,HSTATE_HWTHREAD_REQ(r13)
2552END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
2553 mfspr r5,SPRN_LPCR 2545 mfspr r5,SPRN_LPCR
2554 ori r5,r5,LPCR_PECE0 | LPCR_PECE1 2546 ori r5,r5,LPCR_PECE0 | LPCR_PECE1
2555BEGIN_FTR_SECTION 2547BEGIN_FTR_SECTION
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ee279c7f4802..1abe6eb51335 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -644,7 +644,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
644 break; 644 break;
645#endif 645#endif
646 case KVM_CAP_PPC_HTM: 646 case KVM_CAP_PPC_HTM:
647 r = cpu_has_feature(CPU_FTR_TM_COMP) && hv_enabled; 647 r = hv_enabled &&
648 (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
648 break; 649 break;
649 default: 650 default:
650 r = 0; 651 r = 0;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index c66c3626a216..3c29c9009bbf 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -24,7 +24,7 @@ endif
24 24
25obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ 25obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
26 copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \ 26 copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
27 memcpy_64.o memcmp_64.o 27 memcpy_64.o memcmp_64.o pmem.o
28 28
29obj64-$(CONFIG_SMP) += locks.o 29obj64-$(CONFIG_SMP) += locks.o
30obj64-$(CONFIG_ALTIVEC) += vmx-helper.o 30obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
new file mode 100644
index 000000000000..53c018762e1c
--- /dev/null
+++ b/arch/powerpc/lib/pmem.c
@@ -0,0 +1,67 @@
1/*
2 * Copyright(c) 2017 IBM Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <linux/string.h>
15#include <linux/export.h>
16#include <linux/uaccess.h>
17
18#include <asm/cacheflush.h>
19
20/*
21 * CONFIG_ARCH_HAS_PMEM_API symbols
22 */
23void arch_wb_cache_pmem(void *addr, size_t size)
24{
25 unsigned long start = (unsigned long) addr;
26 flush_inval_dcache_range(start, start + size);
27}
28EXPORT_SYMBOL(arch_wb_cache_pmem);
29
30void arch_invalidate_pmem(void *addr, size_t size)
31{
32 unsigned long start = (unsigned long) addr;
33 flush_inval_dcache_range(start, start + size);
34}
35EXPORT_SYMBOL(arch_invalidate_pmem);
36
37/*
38 * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
39 */
40long __copy_from_user_flushcache(void *dest, const void __user *src,
41 unsigned size)
42{
43 unsigned long copied, start = (unsigned long) dest;
44
45 copied = __copy_from_user(dest, src, size);
46 flush_inval_dcache_range(start, start + size);
47
48 return copied;
49}
50
51void *memcpy_flushcache(void *dest, const void *src, size_t size)
52{
53 unsigned long start = (unsigned long) dest;
54
55 memcpy(dest, src, size);
56 flush_inval_dcache_range(start, start + size);
57
58 return dest;
59}
60EXPORT_SYMBOL(memcpy_flushcache);
61
62void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
63 size_t len)
64{
65 memcpy_flushcache(to, page_to_virt(page) + offset, len);
66}
67EXPORT_SYMBOL(memcpy_page_flushcache);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index f208f560aecd..70274b7b4773 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -31,6 +31,8 @@ extern char system_call_common[];
31#define XER_SO 0x80000000U 31#define XER_SO 0x80000000U
32#define XER_OV 0x40000000U 32#define XER_OV 0x40000000U
33#define XER_CA 0x20000000U 33#define XER_CA 0x20000000U
34#define XER_OV32 0x00080000U
35#define XER_CA32 0x00040000U
34 36
35#ifdef CONFIG_PPC_FPU 37#ifdef CONFIG_PPC_FPU
36/* 38/*
@@ -962,6 +964,16 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs,
962 op->ccval |= 0x20000000; 964 op->ccval |= 0x20000000;
963} 965}
964 966
967static nokprobe_inline void set_ca32(struct instruction_op *op, bool val)
968{
969 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
970 if (val)
971 op->xerval |= XER_CA32;
972 else
973 op->xerval &= ~XER_CA32;
974 }
975}
976
965static nokprobe_inline void add_with_carry(const struct pt_regs *regs, 977static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
966 struct instruction_op *op, int rd, 978 struct instruction_op *op, int rd,
967 unsigned long val1, unsigned long val2, 979 unsigned long val1, unsigned long val2,
@@ -985,6 +997,9 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
985 op->xerval |= XER_CA; 997 op->xerval |= XER_CA;
986 else 998 else
987 op->xerval &= ~XER_CA; 999 op->xerval &= ~XER_CA;
1000
1001 set_ca32(op, (unsigned int)val < (unsigned int)val1 ||
1002 (carry_in && (unsigned int)val == (unsigned int)val1));
988} 1003}
989 1004
990static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs, 1005static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
@@ -1791,6 +1806,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
1791 op->xerval |= XER_CA; 1806 op->xerval |= XER_CA;
1792 else 1807 else
1793 op->xerval &= ~XER_CA; 1808 op->xerval &= ~XER_CA;
1809 set_ca32(op, op->xerval & XER_CA);
1794 goto logical_done; 1810 goto logical_done;
1795 1811
1796 case 824: /* srawi */ 1812 case 824: /* srawi */
@@ -1803,6 +1819,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
1803 op->xerval |= XER_CA; 1819 op->xerval |= XER_CA;
1804 else 1820 else
1805 op->xerval &= ~XER_CA; 1821 op->xerval &= ~XER_CA;
1822 set_ca32(op, op->xerval & XER_CA);
1806 goto logical_done; 1823 goto logical_done;
1807 1824
1808#ifdef __powerpc64__ 1825#ifdef __powerpc64__
@@ -1832,6 +1849,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
1832 op->xerval |= XER_CA; 1849 op->xerval |= XER_CA;
1833 else 1850 else
1834 op->xerval &= ~XER_CA; 1851 op->xerval &= ~XER_CA;
1852 set_ca32(op, op->xerval & XER_CA);
1835 goto logical_done; 1853 goto logical_done;
1836 1854
1837 case 826: /* sradi with sh_5 = 0 */ 1855 case 826: /* sradi with sh_5 = 0 */
@@ -1845,6 +1863,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
1845 op->xerval |= XER_CA; 1863 op->xerval |= XER_CA;
1846 else 1864 else
1847 op->xerval &= ~XER_CA; 1865 op->xerval &= ~XER_CA;
1866 set_ca32(op, op->xerval & XER_CA);
1848 goto logical_done; 1867 goto logical_done;
1849#endif /* __powerpc64__ */ 1868#endif /* __powerpc64__ */
1850 1869
@@ -2698,6 +2717,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
2698 } 2717 }
2699 regs->nip = next_pc; 2718 regs->nip = next_pc;
2700} 2719}
2720NOKPROBE_SYMBOL(emulate_update_regs);
2701 2721
2702/* 2722/*
2703 * Emulate a previously-analysed load or store instruction. 2723 * Emulate a previously-analysed load or store instruction.
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index a0c327d544d1..76a6b057d454 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -15,11 +15,11 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
15obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o 15obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
16hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o 16hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
17obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o 17obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
18obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o 18obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
19obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o 19obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
20obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o 20obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
21obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o 21obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
22ifeq ($(CONFIG_PPC_STD_MMU_64),y) 22ifeq ($(CONFIG_PPC_BOOK3S_64),y)
23obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o 23obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
24obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o 24obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
25endif 25endif
@@ -32,7 +32,7 @@ obj-$(CONFIG_PPC_SPLPAR) += vphn.o
32obj-$(CONFIG_PPC_MM_SLICES) += slice.o 32obj-$(CONFIG_PPC_MM_SLICES) += slice.o
33obj-y += hugetlbpage.o 33obj-y += hugetlbpage.o
34ifeq ($(CONFIG_HUGETLB_PAGE),y) 34ifeq ($(CONFIG_HUGETLB_PAGE),y)
35obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o 35obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
36obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o 36obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
37obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o 37obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
38endif 38endif
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index 5c4c93dcff19..14cfb11b09d0 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
500 address_markers[6].start_address = PHB_IO_END; 500 address_markers[6].start_address = PHB_IO_END;
501 address_markers[7].start_address = IOREMAP_BASE; 501 address_markers[7].start_address = IOREMAP_BASE;
502 address_markers[8].start_address = IOREMAP_END; 502 address_markers[8].start_address = IOREMAP_END;
503#ifdef CONFIG_PPC_STD_MMU_64 503#ifdef CONFIG_PPC_BOOK3S_64
504 address_markers[9].start_address = H_VMEMMAP_BASE; 504 address_markers[9].start_address = H_VMEMMAP_BASE;
505#else 505#else
506 address_markers[9].start_address = VMEMMAP_BASE; 506 address_markers[9].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index c9282d27b203..c2e7dea59490 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -112,7 +112,7 @@ struct flag_info {
112 112
113static const struct flag_info flag_array[] = { 113static const struct flag_info flag_array[] = {
114 { 114 {
115#ifdef CONFIG_PPC_STD_MMU_64 115#ifdef CONFIG_PPC_BOOK3S_64
116 .mask = _PAGE_PRIVILEGED, 116 .mask = _PAGE_PRIVILEGED,
117 .val = 0, 117 .val = 0,
118#else 118#else
@@ -147,7 +147,7 @@ static const struct flag_info flag_array[] = {
147 .set = "present", 147 .set = "present",
148 .clear = " ", 148 .clear = " ",
149 }, { 149 }, {
150#ifdef CONFIG_PPC_STD_MMU_64 150#ifdef CONFIG_PPC_BOOK3S_64
151 .mask = H_PAGE_HASHPTE, 151 .mask = H_PAGE_HASHPTE,
152 .val = H_PAGE_HASHPTE, 152 .val = H_PAGE_HASHPTE,
153#else 153#else
@@ -157,7 +157,7 @@ static const struct flag_info flag_array[] = {
157 .set = "hpte", 157 .set = "hpte",
158 .clear = " ", 158 .clear = " ",
159 }, { 159 }, {
160#ifndef CONFIG_PPC_STD_MMU_64 160#ifndef CONFIG_PPC_BOOK3S_64
161 .mask = _PAGE_GUARDED, 161 .mask = _PAGE_GUARDED,
162 .val = _PAGE_GUARDED, 162 .val = _PAGE_GUARDED,
163 .set = "guarded", 163 .set = "guarded",
@@ -174,7 +174,7 @@ static const struct flag_info flag_array[] = {
174 .set = "accessed", 174 .set = "accessed",
175 .clear = " ", 175 .clear = " ",
176 }, { 176 }, {
177#ifndef CONFIG_PPC_STD_MMU_64 177#ifndef CONFIG_PPC_BOOK3S_64
178 .mask = _PAGE_WRITETHRU, 178 .mask = _PAGE_WRITETHRU,
179 .val = _PAGE_WRITETHRU, 179 .val = _PAGE_WRITETHRU,
180 .set = "write through", 180 .set = "write through",
@@ -450,7 +450,7 @@ static void populate_markers(void)
450 address_markers[i++].start_address = PHB_IO_END; 450 address_markers[i++].start_address = PHB_IO_END;
451 address_markers[i++].start_address = IOREMAP_BASE; 451 address_markers[i++].start_address = IOREMAP_BASE;
452 address_markers[i++].start_address = IOREMAP_END; 452 address_markers[i++].start_address = IOREMAP_END;
453#ifdef CONFIG_PPC_STD_MMU_64 453#ifdef CONFIG_PPC_BOOK3S_64
454 address_markers[i++].start_address = H_VMEMMAP_BASE; 454 address_markers[i++].start_address = H_VMEMMAP_BASE;
455#else 455#else
456 address_markers[i++].start_address = VMEMMAP_BASE; 456 address_markers[i++].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 67ec2e927253..655a5a9a183d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -21,6 +21,7 @@
21#undef DEBUG 21#undef DEBUG
22#undef DEBUG_LOW 22#undef DEBUG_LOW
23 23
24#define pr_fmt(fmt) "hash-mmu: " fmt
24#include <linux/spinlock.h> 25#include <linux/spinlock.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/sched/mm.h> 27#include <linux/sched/mm.h>
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 558e9d3891bf..2486bee0f93e 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -49,17 +49,22 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
49 struct mm_struct *mm = current->mm; 49 struct mm_struct *mm = current->mm;
50 struct vm_area_struct *vma; 50 struct vm_area_struct *vma;
51 struct hstate *h = hstate_file(file); 51 struct hstate *h = hstate_file(file);
52 int fixed = (flags & MAP_FIXED);
53 unsigned long high_limit;
52 struct vm_unmapped_area_info info; 54 struct vm_unmapped_area_info info;
53 55
54 if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE)) 56 high_limit = DEFAULT_MAP_WINDOW;
55 mm->context.addr_limit = TASK_SIZE; 57 if (addr >= high_limit || (fixed && (addr + len > high_limit)))
58 high_limit = TASK_SIZE;
56 59
57 if (len & ~huge_page_mask(h)) 60 if (len & ~huge_page_mask(h))
58 return -EINVAL; 61 return -EINVAL;
59 if (len > mm->task_size) 62 if (len > high_limit)
60 return -ENOMEM; 63 return -ENOMEM;
61 64
62 if (flags & MAP_FIXED) { 65 if (fixed) {
66 if (addr > high_limit - len)
67 return -ENOMEM;
63 if (prepare_hugepage_range(file, addr, len)) 68 if (prepare_hugepage_range(file, addr, len))
64 return -EINVAL; 69 return -EINVAL;
65 return addr; 70 return addr;
@@ -68,7 +73,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
68 if (addr) { 73 if (addr) {
69 addr = ALIGN(addr, huge_page_size(h)); 74 addr = ALIGN(addr, huge_page_size(h));
70 vma = find_vma(mm, addr); 75 vma = find_vma(mm, addr);
71 if (mm->task_size - len >= addr && 76 if (high_limit - len >= addr &&
72 (!vma || addr + len <= vm_start_gap(vma))) 77 (!vma || addr + len <= vm_start_gap(vma)))
73 return addr; 78 return addr;
74 } 79 }
@@ -79,12 +84,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
79 info.flags = VM_UNMAPPED_AREA_TOPDOWN; 84 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
80 info.length = len; 85 info.length = len;
81 info.low_limit = PAGE_SIZE; 86 info.low_limit = PAGE_SIZE;
82 info.high_limit = current->mm->mmap_base; 87 info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
83 info.align_mask = PAGE_MASK & ~huge_page_mask(h); 88 info.align_mask = PAGE_MASK & ~huge_page_mask(h);
84 info.align_offset = 0; 89 info.align_offset = 0;
85 90
86 if (addr > DEFAULT_MAP_WINDOW)
87 info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
88
89 return vm_unmapped_area(&info); 91 return vm_unmapped_area(&info);
90} 92}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 588a521966ec..a07722531b32 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,11 +68,11 @@
68 68
69#include "mmu_decl.h" 69#include "mmu_decl.h"
70 70
71#ifdef CONFIG_PPC_STD_MMU_64 71#ifdef CONFIG_PPC_BOOK3S_64
72#if H_PGTABLE_RANGE > USER_VSID_RANGE 72#if H_PGTABLE_RANGE > USER_VSID_RANGE
73#warning Limited user VSID range means pagetable space is wasted 73#warning Limited user VSID range means pagetable space is wasted
74#endif 74#endif
75#endif /* CONFIG_PPC_STD_MMU_64 */ 75#endif /* CONFIG_PPC_BOOK3S_64 */
76 76
77phys_addr_t memstart_addr = ~0; 77phys_addr_t memstart_addr = ~0;
78EXPORT_SYMBOL_GPL(memstart_addr); 78EXPORT_SYMBOL_GPL(memstart_addr);
@@ -367,11 +367,20 @@ EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
367 367
368#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 368#endif /* CONFIG_SPARSEMEM_VMEMMAP */
369 369
370#ifdef CONFIG_PPC_STD_MMU_64 370#ifdef CONFIG_PPC_BOOK3S_64
371static bool disable_radix; 371static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
372
372static int __init parse_disable_radix(char *p) 373static int __init parse_disable_radix(char *p)
373{ 374{
374 disable_radix = true; 375 bool val;
376
377 if (strlen(p) == 0)
378 val = true;
379 else if (kstrtobool(p, &val))
380 return -EINVAL;
381
382 disable_radix = val;
383
375 return 0; 384 return 0;
376} 385}
377early_param("disable_radix", parse_disable_radix); 386early_param("disable_radix", parse_disable_radix);
@@ -444,4 +453,4 @@ void __init mmu_early_init_devtree(void)
444 else 453 else
445 hash__early_init_devtree(); 454 hash__early_init_devtree();
446} 455}
447#endif /* CONFIG_PPC_STD_MMU_64 */ 456#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 5d78b193fec4..d503f344e476 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -106,22 +106,27 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
106{ 106{
107 struct mm_struct *mm = current->mm; 107 struct mm_struct *mm = current->mm;
108 struct vm_area_struct *vma; 108 struct vm_area_struct *vma;
109 int fixed = (flags & MAP_FIXED);
110 unsigned long high_limit;
109 struct vm_unmapped_area_info info; 111 struct vm_unmapped_area_info info;
110 112
111 if (unlikely(addr > mm->context.addr_limit && 113 high_limit = DEFAULT_MAP_WINDOW;
112 mm->context.addr_limit != TASK_SIZE)) 114 if (addr >= high_limit || (fixed && (addr + len > high_limit)))
113 mm->context.addr_limit = TASK_SIZE; 115 high_limit = TASK_SIZE;
114 116
115 if (len > mm->task_size - mmap_min_addr) 117 if (len > high_limit)
116 return -ENOMEM; 118 return -ENOMEM;
117 119
118 if (flags & MAP_FIXED) 120 if (fixed) {
121 if (addr > high_limit - len)
122 return -ENOMEM;
119 return addr; 123 return addr;
124 }
120 125
121 if (addr) { 126 if (addr) {
122 addr = PAGE_ALIGN(addr); 127 addr = PAGE_ALIGN(addr);
123 vma = find_vma(mm, addr); 128 vma = find_vma(mm, addr);
124 if (mm->task_size - len >= addr && addr >= mmap_min_addr && 129 if (high_limit - len >= addr && addr >= mmap_min_addr &&
125 (!vma || addr + len <= vm_start_gap(vma))) 130 (!vma || addr + len <= vm_start_gap(vma)))
126 return addr; 131 return addr;
127 } 132 }
@@ -129,13 +134,9 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
129 info.flags = 0; 134 info.flags = 0;
130 info.length = len; 135 info.length = len;
131 info.low_limit = mm->mmap_base; 136 info.low_limit = mm->mmap_base;
137 info.high_limit = high_limit;
132 info.align_mask = 0; 138 info.align_mask = 0;
133 139
134 if (unlikely(addr > DEFAULT_MAP_WINDOW))
135 info.high_limit = mm->context.addr_limit;
136 else
137 info.high_limit = DEFAULT_MAP_WINDOW;
138
139 return vm_unmapped_area(&info); 140 return vm_unmapped_area(&info);
140} 141}
141 142
@@ -149,37 +150,37 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
149 struct vm_area_struct *vma; 150 struct vm_area_struct *vma;
150 struct mm_struct *mm = current->mm; 151 struct mm_struct *mm = current->mm;
151 unsigned long addr = addr0; 152 unsigned long addr = addr0;
153 int fixed = (flags & MAP_FIXED);
154 unsigned long high_limit;
152 struct vm_unmapped_area_info info; 155 struct vm_unmapped_area_info info;
153 156
154 if (unlikely(addr > mm->context.addr_limit && 157 high_limit = DEFAULT_MAP_WINDOW;
155 mm->context.addr_limit != TASK_SIZE)) 158 if (addr >= high_limit || (fixed && (addr + len > high_limit)))
156 mm->context.addr_limit = TASK_SIZE; 159 high_limit = TASK_SIZE;
157 160
158 /* requested length too big for entire address space */ 161 if (len > high_limit)
159 if (len > mm->task_size - mmap_min_addr)
160 return -ENOMEM; 162 return -ENOMEM;
161 163
162 if (flags & MAP_FIXED) 164 if (fixed) {
165 if (addr > high_limit - len)
166 return -ENOMEM;
163 return addr; 167 return addr;
168 }
164 169
165 /* requesting a specific address */
166 if (addr) { 170 if (addr) {
167 addr = PAGE_ALIGN(addr); 171 addr = PAGE_ALIGN(addr);
168 vma = find_vma(mm, addr); 172 vma = find_vma(mm, addr);
169 if (mm->task_size - len >= addr && addr >= mmap_min_addr && 173 if (high_limit - len >= addr && addr >= mmap_min_addr &&
170 (!vma || addr + len <= vm_start_gap(vma))) 174 (!vma || addr + len <= vm_start_gap(vma)))
171 return addr; 175 return addr;
172 } 176 }
173 177
174 info.flags = VM_UNMAPPED_AREA_TOPDOWN; 178 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
175 info.length = len; 179 info.length = len;
176 info.low_limit = max(PAGE_SIZE, mmap_min_addr); 180 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
177 info.high_limit = mm->mmap_base; 181 info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
178 info.align_mask = 0; 182 info.align_mask = 0;
179 183
180 if (addr > DEFAULT_MAP_WINDOW)
181 info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
182
183 addr = vm_unmapped_area(&info); 184 addr = vm_unmapped_area(&info);
184 if (!(addr & ~PAGE_MASK)) 185 if (!(addr & ~PAGE_MASK))
185 return addr; 186 return addr;
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0f613bc63c50..d60a62bf4fc7 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
34 struct mm_struct *mm) { } 34 struct mm_struct *mm) { }
35#endif 35#endif
36 36
37#ifdef CONFIG_PPC_BOOK3S_64
38static inline void inc_mm_active_cpus(struct mm_struct *mm)
39{
40 atomic_inc(&mm->context.active_cpus);
41}
42#else
43static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
44#endif
45
46void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 37void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
47 struct task_struct *tsk) 38 struct task_struct *tsk)
48{ 39{
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index a7e998158f37..59c0766ae4e0 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -93,11 +93,11 @@ static int hash__init_new_context(struct mm_struct *mm)
93 return index; 93 return index;
94 94
95 /* 95 /*
96 * We do switch_slb() early in fork, even before we setup the 96 * In the case of exec, use the default limit,
97 * mm->context.addr_limit. Default to max task size so that we copy the 97 * otherwise inherit it from the mm we are duplicating.
98 * default values to paca which will help us to handle slb miss early.
99 */ 98 */
100 mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64; 99 if (!mm->context.slb_addr_limit)
100 mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
101 101
102 /* 102 /*
103 * The old code would re-promote on fork, we don't do that when using 103 * The old code would re-promote on fork, we don't do that when using
@@ -216,19 +216,34 @@ void destroy_context(struct mm_struct *mm)
216#ifdef CONFIG_SPAPR_TCE_IOMMU 216#ifdef CONFIG_SPAPR_TCE_IOMMU
217 WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); 217 WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
218#endif 218#endif
219 if (radix_enabled())
220 WARN_ON(process_tb[mm->context.id].prtb0 != 0);
221 else
222 subpage_prot_free(mm);
223 destroy_pagetable_page(mm);
224 __destroy_context(mm->context.id);
225 mm->context.id = MMU_NO_CONTEXT;
226}
227
228void arch_exit_mmap(struct mm_struct *mm)
229{
219 if (radix_enabled()) { 230 if (radix_enabled()) {
220 /* 231 /*
221 * Radix doesn't have a valid bit in the process table 232 * Radix doesn't have a valid bit in the process table
222 * entries. However we know that at least P9 implementation 233 * entries. However we know that at least P9 implementation
223 * will avoid caching an entry with an invalid RTS field, 234 * will avoid caching an entry with an invalid RTS field,
224 * and 0 is invalid. So this will do. 235 * and 0 is invalid. So this will do.
236 *
237 * This runs before the "fullmm" tlb flush in exit_mmap,
238 * which does a RIC=2 tlbie to clear the process table
239 * entry. See the "fullmm" comments in tlb-radix.c.
240 *
241 * No barrier required here after the store because
242 * this process will do the invalidate, which starts with
243 * ptesync.
225 */ 244 */
226 process_tb[mm->context.id].prtb0 = 0; 245 process_tb[mm->context.id].prtb0 = 0;
227 } else 246 }
228 subpage_prot_free(mm);
229 destroy_pagetable_page(mm);
230 __destroy_context(mm->context.id);
231 mm->context.id = MMU_NO_CONTEXT;
232} 247}
233 248
234#ifdef CONFIG_PPC_RADIX_MMU 249#ifdef CONFIG_PPC_RADIX_MMU
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 73016451f330..adb6364f4091 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1148,11 +1148,33 @@ struct topology_update_data {
1148 int new_nid; 1148 int new_nid;
1149}; 1149};
1150 1150
1151#define TOPOLOGY_DEF_TIMER_SECS 60
1152
1151static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; 1153static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
1152static cpumask_t cpu_associativity_changes_mask; 1154static cpumask_t cpu_associativity_changes_mask;
1153static int vphn_enabled; 1155static int vphn_enabled;
1154static int prrn_enabled; 1156static int prrn_enabled;
1155static void reset_topology_timer(void); 1157static void reset_topology_timer(void);
1158static int topology_timer_secs = 1;
1159static int topology_inited;
1160static int topology_update_needed;
1161
1162/*
1163 * Change polling interval for associativity changes.
1164 */
1165int timed_topology_update(int nsecs)
1166{
1167 if (vphn_enabled) {
1168 if (nsecs > 0)
1169 topology_timer_secs = nsecs;
1170 else
1171 topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
1172
1173 reset_topology_timer();
1174 }
1175
1176 return 0;
1177}
1156 1178
1157/* 1179/*
1158 * Store the current values of the associativity change counters in the 1180 * Store the current values of the associativity change counters in the
@@ -1246,6 +1268,11 @@ static long vphn_get_associativity(unsigned long cpu,
1246 "hcall_vphn() experienced a hardware fault " 1268 "hcall_vphn() experienced a hardware fault "
1247 "preventing VPHN. Disabling polling...\n"); 1269 "preventing VPHN. Disabling polling...\n");
1248 stop_topology_update(); 1270 stop_topology_update();
1271 break;
1272 case H_SUCCESS:
1273 dbg("VPHN hcall succeeded. Reset polling...\n");
1274 timed_topology_update(0);
1275 break;
1249 } 1276 }
1250 1277
1251 return rc; 1278 return rc;
@@ -1323,8 +1350,11 @@ int numa_update_cpu_topology(bool cpus_locked)
1323 struct device *dev; 1350 struct device *dev;
1324 int weight, new_nid, i = 0; 1351 int weight, new_nid, i = 0;
1325 1352
1326 if (!prrn_enabled && !vphn_enabled) 1353 if (!prrn_enabled && !vphn_enabled) {
1354 if (!topology_inited)
1355 topology_update_needed = 1;
1327 return 0; 1356 return 0;
1357 }
1328 1358
1329 weight = cpumask_weight(&cpu_associativity_changes_mask); 1359 weight = cpumask_weight(&cpu_associativity_changes_mask);
1330 if (!weight) 1360 if (!weight)
@@ -1363,22 +1393,30 @@ int numa_update_cpu_topology(bool cpus_locked)
1363 cpumask_andnot(&cpu_associativity_changes_mask, 1393 cpumask_andnot(&cpu_associativity_changes_mask,
1364 &cpu_associativity_changes_mask, 1394 &cpu_associativity_changes_mask,
1365 cpu_sibling_mask(cpu)); 1395 cpu_sibling_mask(cpu));
1396 dbg("Assoc chg gives same node %d for cpu%d\n",
1397 new_nid, cpu);
1366 cpu = cpu_last_thread_sibling(cpu); 1398 cpu = cpu_last_thread_sibling(cpu);
1367 continue; 1399 continue;
1368 } 1400 }
1369 1401
1370 for_each_cpu(sibling, cpu_sibling_mask(cpu)) { 1402 for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
1371 ud = &updates[i++]; 1403 ud = &updates[i++];
1404 ud->next = &updates[i];
1372 ud->cpu = sibling; 1405 ud->cpu = sibling;
1373 ud->new_nid = new_nid; 1406 ud->new_nid = new_nid;
1374 ud->old_nid = numa_cpu_lookup_table[sibling]; 1407 ud->old_nid = numa_cpu_lookup_table[sibling];
1375 cpumask_set_cpu(sibling, &updated_cpus); 1408 cpumask_set_cpu(sibling, &updated_cpus);
1376 if (i < weight)
1377 ud->next = &updates[i];
1378 } 1409 }
1379 cpu = cpu_last_thread_sibling(cpu); 1410 cpu = cpu_last_thread_sibling(cpu);
1380 } 1411 }
1381 1412
1413 /*
1414 * Prevent processing of 'updates' from overflowing array
1415 * where last entry filled in a 'next' pointer.
1416 */
1417 if (i)
1418 updates[i-1].next = NULL;
1419
1382 pr_debug("Topology update for the following CPUs:\n"); 1420 pr_debug("Topology update for the following CPUs:\n");
1383 if (cpumask_weight(&updated_cpus)) { 1421 if (cpumask_weight(&updated_cpus)) {
1384 for (ud = &updates[0]; ud; ud = ud->next) { 1422 for (ud = &updates[0]; ud; ud = ud->next) {
@@ -1433,6 +1471,7 @@ int numa_update_cpu_topology(bool cpus_locked)
1433 1471
1434out: 1472out:
1435 kfree(updates); 1473 kfree(updates);
1474 topology_update_needed = 0;
1436 return changed; 1475 return changed;
1437} 1476}
1438 1477
@@ -1466,7 +1505,7 @@ static struct timer_list topology_timer;
1466 1505
1467static void reset_topology_timer(void) 1506static void reset_topology_timer(void)
1468{ 1507{
1469 mod_timer(&topology_timer, jiffies + 60 * HZ); 1508 mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
1470} 1509}
1471 1510
1472#ifdef CONFIG_SMP 1511#ifdef CONFIG_SMP
@@ -1515,15 +1554,14 @@ int start_topology_update(void)
1515 if (firmware_has_feature(FW_FEATURE_PRRN)) { 1554 if (firmware_has_feature(FW_FEATURE_PRRN)) {
1516 if (!prrn_enabled) { 1555 if (!prrn_enabled) {
1517 prrn_enabled = 1; 1556 prrn_enabled = 1;
1518 vphn_enabled = 0;
1519#ifdef CONFIG_SMP 1557#ifdef CONFIG_SMP
1520 rc = of_reconfig_notifier_register(&dt_update_nb); 1558 rc = of_reconfig_notifier_register(&dt_update_nb);
1521#endif 1559#endif
1522 } 1560 }
1523 } else if (firmware_has_feature(FW_FEATURE_VPHN) && 1561 }
1562 if (firmware_has_feature(FW_FEATURE_VPHN) &&
1524 lppaca_shared_proc(get_lppaca())) { 1563 lppaca_shared_proc(get_lppaca())) {
1525 if (!vphn_enabled) { 1564 if (!vphn_enabled) {
1526 prrn_enabled = 0;
1527 vphn_enabled = 1; 1565 vphn_enabled = 1;
1528 setup_cpu_associativity_change_counters(); 1566 setup_cpu_associativity_change_counters();
1529 timer_setup(&topology_timer, topology_timer_fn, 1567 timer_setup(&topology_timer, topology_timer_fn,
@@ -1547,7 +1585,8 @@ int stop_topology_update(void)
1547#ifdef CONFIG_SMP 1585#ifdef CONFIG_SMP
1548 rc = of_reconfig_notifier_unregister(&dt_update_nb); 1586 rc = of_reconfig_notifier_unregister(&dt_update_nb);
1549#endif 1587#endif
1550 } else if (vphn_enabled) { 1588 }
1589 if (vphn_enabled) {
1551 vphn_enabled = 0; 1590 vphn_enabled = 0;
1552 rc = del_timer_sync(&topology_timer); 1591 rc = del_timer_sync(&topology_timer);
1553 } 1592 }
@@ -1610,9 +1649,17 @@ static int topology_update_init(void)
1610 if (topology_updates_enabled) 1649 if (topology_updates_enabled)
1611 start_topology_update(); 1650 start_topology_update();
1612 1651
1652 if (vphn_enabled)
1653 topology_schedule_update();
1654
1613 if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) 1655 if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
1614 return -ENOMEM; 1656 return -ENOMEM;
1615 1657
1658 topology_inited = 1;
1659 if (topology_update_needed)
1660 bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
1661 nr_cpumask_bits);
1662
1616 return 0; 1663 return 0;
1617} 1664}
1618device_initcall(topology_update_init); 1665device_initcall(topology_update_init);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 39c252b54d16..cfbbee941a76 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -169,6 +169,16 @@ void radix__mark_rodata_ro(void)
169{ 169{
170 unsigned long start, end; 170 unsigned long start, end;
171 171
172 /*
173 * mark_rodata_ro() will mark itself as !writable at some point.
174 * Due to DD1 workaround in radix__pte_update(), we'll end up with
175 * an invalid pte and the system will crash quite severly.
176 */
177 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
178 pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n");
179 return;
180 }
181
172 start = (unsigned long)_stext; 182 start = (unsigned long)_stext;
173 end = (unsigned long)__init_begin; 183 end = (unsigned long)__init_begin;
174 184
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 1ec3aee43624..813ea22c3e00 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,7 +57,7 @@
57 57
58#include "mmu_decl.h" 58#include "mmu_decl.h"
59 59
60#ifdef CONFIG_PPC_STD_MMU_64 60#ifdef CONFIG_PPC_BOOK3S_64
61#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) 61#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
62#error TASK_SIZE_USER64 exceeds user VSID range 62#error TASK_SIZE_USER64 exceeds user VSID range
63#endif 63#endif
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 906a86fe457b..2cf5ef3fc50d 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -167,7 +167,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
167 /* 167 /*
168 * user space make sure we are within the allowed limit 168 * user space make sure we are within the allowed limit
169 */ 169 */
170 ld r11,PACA_ADDR_LIMIT(r13) 170 ld r11,PACA_SLB_ADDR_LIMIT(r13)
171 cmpld r3,r11 171 cmpld r3,r11
172 bge- 8f 172 bge- 8f
173 173
@@ -309,10 +309,6 @@ slb_compare_rr_to_size:
309 srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ 309 srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
310 rldimi r10,r9,ESID_BITS_1T,0 310 rldimi r10,r9,ESID_BITS_1T,0
311 ASM_VSID_SCRAMBLE(r10,r9,r11,1T) 311 ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
312 /*
313 * bits above VSID_BITS_1T need to be ignored from r10
314 * also combine VSID and flags
315 */
316 312
317 li r10,MMU_SEGSIZE_1T 313 li r10,MMU_SEGSIZE_1T
318 rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ 314 rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 45f6740dd407..564fff06f5c1 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -96,7 +96,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
96{ 96{
97 struct vm_area_struct *vma; 97 struct vm_area_struct *vma;
98 98
99 if ((mm->task_size - len) < addr) 99 if ((mm->context.slb_addr_limit - len) < addr)
100 return 0; 100 return 0;
101 vma = find_vma(mm, addr); 101 vma = find_vma(mm, addr);
102 return (!vma || (addr + len) <= vm_start_gap(vma)); 102 return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -133,10 +133,10 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
133 if (!slice_low_has_vma(mm, i)) 133 if (!slice_low_has_vma(mm, i))
134 ret->low_slices |= 1u << i; 134 ret->low_slices |= 1u << i;
135 135
136 if (mm->task_size <= SLICE_LOW_TOP) 136 if (mm->context.slb_addr_limit <= SLICE_LOW_TOP)
137 return; 137 return;
138 138
139 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) 139 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++)
140 if (!slice_high_has_vma(mm, i)) 140 if (!slice_high_has_vma(mm, i))
141 __set_bit(i, ret->high_slices); 141 __set_bit(i, ret->high_slices);
142} 142}
@@ -157,7 +157,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
157 ret->low_slices |= 1u << i; 157 ret->low_slices |= 1u << i;
158 158
159 hpsizes = mm->context.high_slices_psize; 159 hpsizes = mm->context.high_slices_psize;
160 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) { 160 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
161 mask_index = i & 0x1; 161 mask_index = i & 0x1;
162 index = i >> 1; 162 index = i >> 1;
163 if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) 163 if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
@@ -169,7 +169,7 @@ static int slice_check_fit(struct mm_struct *mm,
169 struct slice_mask mask, struct slice_mask available) 169 struct slice_mask mask, struct slice_mask available)
170{ 170{
171 DECLARE_BITMAP(result, SLICE_NUM_HIGH); 171 DECLARE_BITMAP(result, SLICE_NUM_HIGH);
172 unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.addr_limit); 172 unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
173 173
174 bitmap_and(result, mask.high_slices, 174 bitmap_and(result, mask.high_slices,
175 available.high_slices, slice_count); 175 available.high_slices, slice_count);
@@ -219,7 +219,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
219 mm->context.low_slices_psize = lpsizes; 219 mm->context.low_slices_psize = lpsizes;
220 220
221 hpsizes = mm->context.high_slices_psize; 221 hpsizes = mm->context.high_slices_psize;
222 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) { 222 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
223 mask_index = i & 0x1; 223 mask_index = i & 0x1;
224 index = i >> 1; 224 index = i >> 1;
225 if (test_bit(i, mask.high_slices)) 225 if (test_bit(i, mask.high_slices))
@@ -329,8 +329,8 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
329 * Only for that request for which high_limit is above 329 * Only for that request for which high_limit is above
330 * DEFAULT_MAP_WINDOW we should apply this. 330 * DEFAULT_MAP_WINDOW we should apply this.
331 */ 331 */
332 if (high_limit > DEFAULT_MAP_WINDOW) 332 if (high_limit > DEFAULT_MAP_WINDOW)
333 addr += mm->context.addr_limit - DEFAULT_MAP_WINDOW; 333 addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
334 334
335 while (addr > PAGE_SIZE) { 335 while (addr > PAGE_SIZE) {
336 info.high_limit = addr; 336 info.high_limit = addr;
@@ -412,25 +412,31 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
412 struct slice_mask compat_mask; 412 struct slice_mask compat_mask;
413 int fixed = (flags & MAP_FIXED); 413 int fixed = (flags & MAP_FIXED);
414 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 414 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
415 unsigned long page_size = 1UL << pshift;
415 struct mm_struct *mm = current->mm; 416 struct mm_struct *mm = current->mm;
416 unsigned long newaddr; 417 unsigned long newaddr;
417 unsigned long high_limit; 418 unsigned long high_limit;
418 419
419 /* 420 high_limit = DEFAULT_MAP_WINDOW;
420 * Check if we need to expland slice area. 421 if (addr >= high_limit || (fixed && (addr + len > high_limit)))
421 */ 422 high_limit = TASK_SIZE;
422 if (unlikely(addr > mm->context.addr_limit && 423
423 mm->context.addr_limit != TASK_SIZE)) { 424 if (len > high_limit)
424 mm->context.addr_limit = TASK_SIZE; 425 return -ENOMEM;
426 if (len & (page_size - 1))
427 return -EINVAL;
428 if (fixed) {
429 if (addr & (page_size - 1))
430 return -EINVAL;
431 if (addr > high_limit - len)
432 return -ENOMEM;
433 }
434
435 if (high_limit > mm->context.slb_addr_limit) {
436 mm->context.slb_addr_limit = high_limit;
425 on_each_cpu(slice_flush_segments, mm, 1); 437 on_each_cpu(slice_flush_segments, mm, 1);
426 } 438 }
427 /* 439
428 * This mmap request can allocate upt to 512TB
429 */
430 if (addr > DEFAULT_MAP_WINDOW)
431 high_limit = mm->context.addr_limit;
432 else
433 high_limit = DEFAULT_MAP_WINDOW;
434 /* 440 /*
435 * init different masks 441 * init different masks
436 */ 442 */
@@ -446,27 +452,19 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
446 452
447 /* Sanity checks */ 453 /* Sanity checks */
448 BUG_ON(mm->task_size == 0); 454 BUG_ON(mm->task_size == 0);
455 BUG_ON(mm->context.slb_addr_limit == 0);
449 VM_BUG_ON(radix_enabled()); 456 VM_BUG_ON(radix_enabled());
450 457
451 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); 458 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
452 slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n", 459 slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
453 addr, len, flags, topdown); 460 addr, len, flags, topdown);
454 461
455 if (len > mm->task_size)
456 return -ENOMEM;
457 if (len & ((1ul << pshift) - 1))
458 return -EINVAL;
459 if (fixed && (addr & ((1ul << pshift) - 1)))
460 return -EINVAL;
461 if (fixed && addr > (mm->task_size - len))
462 return -ENOMEM;
463
464 /* If hint, make sure it matches our alignment restrictions */ 462 /* If hint, make sure it matches our alignment restrictions */
465 if (!fixed && addr) { 463 if (!fixed && addr) {
466 addr = _ALIGN_UP(addr, 1ul << pshift); 464 addr = _ALIGN_UP(addr, page_size);
467 slice_dbg(" aligned addr=%lx\n", addr); 465 slice_dbg(" aligned addr=%lx\n", addr);
468 /* Ignore hint if it's too large or overlaps a VMA */ 466 /* Ignore hint if it's too large or overlaps a VMA */
469 if (addr > mm->task_size - len || 467 if (addr > high_limit - len ||
470 !slice_area_is_free(mm, addr, len)) 468 !slice_area_is_free(mm, addr, len))
471 addr = 0; 469 addr = 0;
472 } 470 }
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index d304028641a2..884f4b705b57 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -39,6 +39,20 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
39 trace_tlbie(0, 1, rb, rs, ric, prs, r); 39 trace_tlbie(0, 1, rb, rs, ric, prs, r);
40} 40}
41 41
42static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
43{
44 unsigned long rb,rs,prs,r;
45
46 rb = PPC_BIT(53); /* IS = 1 */
47 rs = pid << PPC_BITLSHIFT(31);
48 prs = 1; /* process scoped */
49 r = 1; /* raidx format */
50
51 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
52 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
53 trace_tlbie(0, 0, rb, rs, ric, prs, r);
54}
55
42/* 56/*
43 * We use 128 set in radix mode and 256 set in hpt mode. 57 * We use 128 set in radix mode and 256 set in hpt mode.
44 */ 58 */
@@ -70,22 +84,13 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
70 84
71static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 85static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
72{ 86{
73 unsigned long rb,rs,prs,r;
74
75 rb = PPC_BIT(53); /* IS = 1 */
76 rs = pid << PPC_BITLSHIFT(31);
77 prs = 1; /* process scoped */
78 r = 1; /* raidx format */
79
80 asm volatile("ptesync": : :"memory"); 87 asm volatile("ptesync": : :"memory");
81 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 88 __tlbie_pid(pid, ric);
82 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
83 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 89 asm volatile("eieio; tlbsync; ptesync": : :"memory");
84 trace_tlbie(0, 0, rb, rs, ric, prs, r);
85} 90}
86 91
87static inline void _tlbiel_va(unsigned long va, unsigned long pid, 92static inline void __tlbiel_va(unsigned long va, unsigned long pid,
88 unsigned long ap, unsigned long ric) 93 unsigned long ap, unsigned long ric)
89{ 94{
90 unsigned long rb,rs,prs,r; 95 unsigned long rb,rs,prs,r;
91 96
@@ -95,14 +100,44 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
95 prs = 1; /* process scoped */ 100 prs = 1; /* process scoped */
96 r = 1; /* raidx format */ 101 r = 1; /* raidx format */
97 102
98 asm volatile("ptesync": : :"memory");
99 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 103 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
100 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 104 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
101 asm volatile("ptesync": : :"memory");
102 trace_tlbie(0, 1, rb, rs, ric, prs, r); 105 trace_tlbie(0, 1, rb, rs, ric, prs, r);
103} 106}
104 107
105static inline void _tlbie_va(unsigned long va, unsigned long pid, 108static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
109 unsigned long pid, unsigned long page_size,
110 unsigned long psize)
111{
112 unsigned long addr;
113 unsigned long ap = mmu_get_ap(psize);
114
115 for (addr = start; addr < end; addr += page_size)
116 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
117}
118
119static inline void _tlbiel_va(unsigned long va, unsigned long pid,
120 unsigned long psize, unsigned long ric)
121{
122 unsigned long ap = mmu_get_ap(psize);
123
124 asm volatile("ptesync": : :"memory");
125 __tlbiel_va(va, pid, ap, ric);
126 asm volatile("ptesync": : :"memory");
127}
128
129static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
130 unsigned long pid, unsigned long page_size,
131 unsigned long psize, bool also_pwc)
132{
133 asm volatile("ptesync": : :"memory");
134 if (also_pwc)
135 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
136 __tlbiel_va_range(start, end, pid, page_size, psize);
137 asm volatile("ptesync": : :"memory");
138}
139
140static inline void __tlbie_va(unsigned long va, unsigned long pid,
106 unsigned long ap, unsigned long ric) 141 unsigned long ap, unsigned long ric)
107{ 142{
108 unsigned long rb,rs,prs,r; 143 unsigned long rb,rs,prs,r;
@@ -113,13 +148,43 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
113 prs = 1; /* process scoped */ 148 prs = 1; /* process scoped */
114 r = 1; /* raidx format */ 149 r = 1; /* raidx format */
115 150
116 asm volatile("ptesync": : :"memory");
117 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 151 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
118 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 152 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
119 asm volatile("eieio; tlbsync; ptesync": : :"memory");
120 trace_tlbie(0, 0, rb, rs, ric, prs, r); 153 trace_tlbie(0, 0, rb, rs, ric, prs, r);
121} 154}
122 155
156static inline void __tlbie_va_range(unsigned long start, unsigned long end,
157 unsigned long pid, unsigned long page_size,
158 unsigned long psize)
159{
160 unsigned long addr;
161 unsigned long ap = mmu_get_ap(psize);
162
163 for (addr = start; addr < end; addr += page_size)
164 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
165}
166
167static inline void _tlbie_va(unsigned long va, unsigned long pid,
168 unsigned long psize, unsigned long ric)
169{
170 unsigned long ap = mmu_get_ap(psize);
171
172 asm volatile("ptesync": : :"memory");
173 __tlbie_va(va, pid, ap, ric);
174 asm volatile("eieio; tlbsync; ptesync": : :"memory");
175}
176
177static inline void _tlbie_va_range(unsigned long start, unsigned long end,
178 unsigned long pid, unsigned long page_size,
179 unsigned long psize, bool also_pwc)
180{
181 asm volatile("ptesync": : :"memory");
182 if (also_pwc)
183 __tlbie_pid(pid, RIC_FLUSH_PWC);
184 __tlbie_va_range(start, end, pid, page_size, psize);
185 asm volatile("eieio; tlbsync; ptesync": : :"memory");
186}
187
123/* 188/*
124 * Base TLB flushing operations: 189 * Base TLB flushing operations:
125 * 190 *
@@ -144,7 +209,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
144EXPORT_SYMBOL(radix__local_flush_tlb_mm); 209EXPORT_SYMBOL(radix__local_flush_tlb_mm);
145 210
146#ifndef CONFIG_SMP 211#ifndef CONFIG_SMP
147static void radix__local_flush_all_mm(struct mm_struct *mm) 212void radix__local_flush_all_mm(struct mm_struct *mm)
148{ 213{
149 unsigned long pid; 214 unsigned long pid;
150 215
@@ -154,18 +219,18 @@ static void radix__local_flush_all_mm(struct mm_struct *mm)
154 _tlbiel_pid(pid, RIC_FLUSH_ALL); 219 _tlbiel_pid(pid, RIC_FLUSH_ALL);
155 preempt_enable(); 220 preempt_enable();
156} 221}
222EXPORT_SYMBOL(radix__local_flush_all_mm);
157#endif /* CONFIG_SMP */ 223#endif /* CONFIG_SMP */
158 224
159void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 225void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
160 int psize) 226 int psize)
161{ 227{
162 unsigned long pid; 228 unsigned long pid;
163 unsigned long ap = mmu_get_ap(psize);
164 229
165 preempt_disable(); 230 preempt_disable();
166 pid = mm ? mm->context.id : 0; 231 pid = mm->context.id;
167 if (pid != MMU_NO_CONTEXT) 232 if (pid != MMU_NO_CONTEXT)
168 _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); 233 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
169 preempt_enable(); 234 preempt_enable();
170} 235}
171 236
@@ -173,11 +238,10 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
173{ 238{
174#ifdef CONFIG_HUGETLB_PAGE 239#ifdef CONFIG_HUGETLB_PAGE
175 /* need the return fix for nohash.c */ 240 /* need the return fix for nohash.c */
176 if (vma && is_vm_hugetlb_page(vma)) 241 if (is_vm_hugetlb_page(vma))
177 return __local_flush_hugetlb_page(vma, vmaddr); 242 return radix__local_flush_hugetlb_page(vma, vmaddr);
178#endif 243#endif
179 radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr, 244 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
180 mmu_virtual_psize);
181} 245}
182EXPORT_SYMBOL(radix__local_flush_tlb_page); 246EXPORT_SYMBOL(radix__local_flush_tlb_page);
183 247
@@ -186,36 +250,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
186{ 250{
187 unsigned long pid; 251 unsigned long pid;
188 252
189 preempt_disable();
190 pid = mm->context.id; 253 pid = mm->context.id;
191 if (unlikely(pid == MMU_NO_CONTEXT)) 254 if (unlikely(pid == MMU_NO_CONTEXT))
192 goto no_context; 255 return;
193 256
257 preempt_disable();
194 if (!mm_is_thread_local(mm)) 258 if (!mm_is_thread_local(mm))
195 _tlbie_pid(pid, RIC_FLUSH_TLB); 259 _tlbie_pid(pid, RIC_FLUSH_TLB);
196 else 260 else
197 _tlbiel_pid(pid, RIC_FLUSH_TLB); 261 _tlbiel_pid(pid, RIC_FLUSH_TLB);
198no_context:
199 preempt_enable(); 262 preempt_enable();
200} 263}
201EXPORT_SYMBOL(radix__flush_tlb_mm); 264EXPORT_SYMBOL(radix__flush_tlb_mm);
202 265
203static void radix__flush_all_mm(struct mm_struct *mm) 266void radix__flush_all_mm(struct mm_struct *mm)
204{ 267{
205 unsigned long pid; 268 unsigned long pid;
206 269
207 preempt_disable();
208 pid = mm->context.id; 270 pid = mm->context.id;
209 if (unlikely(pid == MMU_NO_CONTEXT)) 271 if (unlikely(pid == MMU_NO_CONTEXT))
210 goto no_context; 272 return;
211 273
274 preempt_disable();
212 if (!mm_is_thread_local(mm)) 275 if (!mm_is_thread_local(mm))
213 _tlbie_pid(pid, RIC_FLUSH_ALL); 276 _tlbie_pid(pid, RIC_FLUSH_ALL);
214 else 277 else
215 _tlbiel_pid(pid, RIC_FLUSH_ALL); 278 _tlbiel_pid(pid, RIC_FLUSH_ALL);
216no_context:
217 preempt_enable(); 279 preempt_enable();
218} 280}
281EXPORT_SYMBOL(radix__flush_all_mm);
219 282
220void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) 283void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
221{ 284{
@@ -227,28 +290,26 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
227 int psize) 290 int psize)
228{ 291{
229 unsigned long pid; 292 unsigned long pid;
230 unsigned long ap = mmu_get_ap(psize);
231 293
232 preempt_disable(); 294 pid = mm->context.id;
233 pid = mm ? mm->context.id : 0;
234 if (unlikely(pid == MMU_NO_CONTEXT)) 295 if (unlikely(pid == MMU_NO_CONTEXT))
235 goto bail; 296 return;
297
298 preempt_disable();
236 if (!mm_is_thread_local(mm)) 299 if (!mm_is_thread_local(mm))
237 _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); 300 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
238 else 301 else
239 _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); 302 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
240bail:
241 preempt_enable(); 303 preempt_enable();
242} 304}
243 305
244void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 306void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
245{ 307{
246#ifdef CONFIG_HUGETLB_PAGE 308#ifdef CONFIG_HUGETLB_PAGE
247 if (vma && is_vm_hugetlb_page(vma)) 309 if (is_vm_hugetlb_page(vma))
248 return flush_hugetlb_page(vma, vmaddr); 310 return radix__flush_hugetlb_page(vma, vmaddr);
249#endif 311#endif
250 radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr, 312 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
251 mmu_virtual_psize);
252} 313}
253EXPORT_SYMBOL(radix__flush_tlb_page); 314EXPORT_SYMBOL(radix__flush_tlb_page);
254 315
@@ -262,17 +323,86 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
262} 323}
263EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 324EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
264 325
326#define TLB_FLUSH_ALL -1UL
327
265/* 328/*
266 * Currently, for range flushing, we just do a full mm flush. Because 329 * Number of pages above which we invalidate the entire PID rather than
267 * we use this in code path where we don' track the page size. 330 * flush individual pages, for local and global flushes respectively.
331 *
332 * tlbie goes out to the interconnect and individual ops are more costly.
333 * It also does not iterate over sets like the local tlbiel variant when
334 * invalidating a full PID, so it has a far lower threshold to change from
335 * individual page flushes to full-pid flushes.
268 */ 336 */
337static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
338static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
339
269void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 340void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
270 unsigned long end) 341 unsigned long end)
271 342
272{ 343{
273 struct mm_struct *mm = vma->vm_mm; 344 struct mm_struct *mm = vma->vm_mm;
345 unsigned long pid;
346 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
347 unsigned long page_size = 1UL << page_shift;
348 unsigned long nr_pages = (end - start) >> page_shift;
349 bool local, full;
350
351#ifdef CONFIG_HUGETLB_PAGE
352 if (is_vm_hugetlb_page(vma))
353 return radix__flush_hugetlb_tlb_range(vma, start, end);
354#endif
355
356 pid = mm->context.id;
357 if (unlikely(pid == MMU_NO_CONTEXT))
358 return;
274 359
275 radix__flush_tlb_mm(mm); 360 preempt_disable();
361 if (mm_is_thread_local(mm)) {
362 local = true;
363 full = (end == TLB_FLUSH_ALL ||
364 nr_pages > tlb_local_single_page_flush_ceiling);
365 } else {
366 local = false;
367 full = (end == TLB_FLUSH_ALL ||
368 nr_pages > tlb_single_page_flush_ceiling);
369 }
370
371 if (full) {
372 if (local)
373 _tlbiel_pid(pid, RIC_FLUSH_TLB);
374 else
375 _tlbie_pid(pid, RIC_FLUSH_TLB);
376 } else {
377 bool hflush = false;
378 unsigned long hstart, hend;
379
380#ifdef CONFIG_TRANSPARENT_HUGEPAGE
381 hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
382 hend = end >> HPAGE_PMD_SHIFT;
383 if (hstart < hend) {
384 hstart <<= HPAGE_PMD_SHIFT;
385 hend <<= HPAGE_PMD_SHIFT;
386 hflush = true;
387 }
388#endif
389
390 asm volatile("ptesync": : :"memory");
391 if (local) {
392 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
393 if (hflush)
394 __tlbiel_va_range(hstart, hend, pid,
395 HPAGE_PMD_SIZE, MMU_PAGE_2M);
396 asm volatile("ptesync": : :"memory");
397 } else {
398 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
399 if (hflush)
400 __tlbie_va_range(hstart, hend, pid,
401 HPAGE_PMD_SIZE, MMU_PAGE_2M);
402 asm volatile("eieio; tlbsync; ptesync": : :"memory");
403 }
404 }
405 preempt_enable();
276} 406}
277EXPORT_SYMBOL(radix__flush_tlb_range); 407EXPORT_SYMBOL(radix__flush_tlb_range);
278 408
@@ -291,101 +421,118 @@ static int radix_get_mmu_psize(int page_size)
291 return psize; 421 return psize;
292} 422}
293 423
424static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
425 unsigned long end, int psize);
426
294void radix__tlb_flush(struct mmu_gather *tlb) 427void radix__tlb_flush(struct mmu_gather *tlb)
295{ 428{
296 int psize = 0; 429 int psize = 0;
297 struct mm_struct *mm = tlb->mm; 430 struct mm_struct *mm = tlb->mm;
298 int page_size = tlb->page_size; 431 int page_size = tlb->page_size;
299 432
300 psize = radix_get_mmu_psize(page_size);
301 /* 433 /*
302 * if page size is not something we understand, do a full mm flush 434 * if page size is not something we understand, do a full mm flush
435 *
436 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
437 * that flushes the process table entry cache upon process teardown.
438 * See the comment for radix in arch_exit_mmap().
303 */ 439 */
304 if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all) 440 if (tlb->fullmm) {
305 radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
306 else if (tlb->need_flush_all) {
307 tlb->need_flush_all = 0;
308 radix__flush_all_mm(mm); 441 radix__flush_all_mm(mm);
309 } else 442 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
310 radix__flush_tlb_mm(mm); 443 if (!tlb->need_flush_all)
311} 444 radix__flush_tlb_mm(mm);
445 else
446 radix__flush_all_mm(mm);
447 } else {
448 unsigned long start = tlb->start;
449 unsigned long end = tlb->end;
312 450
313#define TLB_FLUSH_ALL -1UL 451 if (!tlb->need_flush_all)
314/* 452 radix__flush_tlb_range_psize(mm, start, end, psize);
315 * Number of pages above which we will do a bcast tlbie. Just a 453 else
316 * number at this point copied from x86 454 radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
317 */ 455 }
318static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 456 tlb->need_flush_all = 0;
457}
319 458
320void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 459static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
321 unsigned long end, int psize) 460 unsigned long start, unsigned long end,
461 int psize, bool also_pwc)
322{ 462{
323 unsigned long pid; 463 unsigned long pid;
324 unsigned long addr; 464 unsigned int page_shift = mmu_psize_defs[psize].shift;
325 int local = mm_is_thread_local(mm); 465 unsigned long page_size = 1UL << page_shift;
326 unsigned long ap = mmu_get_ap(psize); 466 unsigned long nr_pages = (end - start) >> page_shift;
327 unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; 467 bool local, full;
328 468
469 pid = mm->context.id;
470 if (unlikely(pid == MMU_NO_CONTEXT))
471 return;
329 472
330 preempt_disable(); 473 preempt_disable();
331 pid = mm ? mm->context.id : 0; 474 if (mm_is_thread_local(mm)) {
332 if (unlikely(pid == MMU_NO_CONTEXT)) 475 local = true;
333 goto err_out; 476 full = (end == TLB_FLUSH_ALL ||
477 nr_pages > tlb_local_single_page_flush_ceiling);
478 } else {
479 local = false;
480 full = (end == TLB_FLUSH_ALL ||
481 nr_pages > tlb_single_page_flush_ceiling);
482 }
334 483
335 if (end == TLB_FLUSH_ALL || 484 if (full) {
336 (end - start) > tlb_single_page_flush_ceiling * page_size) {
337 if (local) 485 if (local)
338 _tlbiel_pid(pid, RIC_FLUSH_TLB); 486 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
339 else 487 else
340 _tlbie_pid(pid, RIC_FLUSH_TLB); 488 _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB);
341 goto err_out; 489 } else {
342 }
343 for (addr = start; addr < end; addr += page_size) {
344
345 if (local) 490 if (local)
346 _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 491 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
347 else 492 else
348 _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 493 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
349 } 494 }
350err_out:
351 preempt_enable(); 495 preempt_enable();
352} 496}
353 497
498void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
499 unsigned long end, int psize)
500{
501 return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
502}
503
504static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
505 unsigned long end, int psize)
506{
507 __radix__flush_tlb_range_psize(mm, start, end, psize, true);
508}
509
354#ifdef CONFIG_TRANSPARENT_HUGEPAGE 510#ifdef CONFIG_TRANSPARENT_HUGEPAGE
355void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 511void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
356{ 512{
357 int local = mm_is_thread_local(mm);
358 unsigned long ap = mmu_get_ap(mmu_virtual_psize);
359 unsigned long pid, end; 513 unsigned long pid, end;
360 514
361 515 pid = mm->context.id;
362 pid = mm ? mm->context.id : 0;
363 preempt_disable();
364 if (unlikely(pid == MMU_NO_CONTEXT)) 516 if (unlikely(pid == MMU_NO_CONTEXT))
365 goto no_context; 517 return;
366 518
367 /* 4k page size, just blow the world */ 519 /* 4k page size, just blow the world */
368 if (PAGE_SIZE == 0x1000) { 520 if (PAGE_SIZE == 0x1000) {
369 radix__flush_all_mm(mm); 521 radix__flush_all_mm(mm);
370 preempt_enable();
371 return; 522 return;
372 } 523 }
373 524
374 /* Otherwise first do the PWC */
375 if (local)
376 _tlbiel_pid(pid, RIC_FLUSH_PWC);
377 else
378 _tlbie_pid(pid, RIC_FLUSH_PWC);
379
380 /* Then iterate the pages */
381 end = addr + HPAGE_PMD_SIZE; 525 end = addr + HPAGE_PMD_SIZE;
382 for (; addr < end; addr += PAGE_SIZE) { 526
383 if (local) 527 /* Otherwise first do the PWC, then iterate the pages. */
384 _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 528 preempt_disable();
385 else 529
386 _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 530 if (mm_is_thread_local(mm)) {
531 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
532 } else {
533 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
387 } 534 }
388no_context: 535
389 preempt_enable(); 536 preempt_enable();
390} 537}
391#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 538#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 62fa7589db2b..8bdef7ed28a8 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -23,7 +23,7 @@
23 * [ nv gpr save area ] 8*8 | 23 * [ nv gpr save area ] 8*8 |
24 * [ tail_call_cnt ] 8 | 24 * [ tail_call_cnt ] 8 |
25 * [ local_tmp_var ] 8 | 25 * [ local_tmp_var ] 8 |
26 * fp (r31) --> [ ebpf stack space ] 512 | 26 * fp (r31) --> [ ebpf stack space ] upto 512 |
27 * [ frame header ] 32/112 | 27 * [ frame header ] 32/112 |
28 * sp (r1) ---> [ stack pointer ] -------------- 28 * sp (r1) ---> [ stack pointer ] --------------
29 */ 29 */
@@ -32,8 +32,8 @@
32#define BPF_PPC_STACK_SAVE (8*8) 32#define BPF_PPC_STACK_SAVE (8*8)
33/* for bpf JIT code internal usage */ 33/* for bpf JIT code internal usage */
34#define BPF_PPC_STACK_LOCALS 16 34#define BPF_PPC_STACK_LOCALS 16
35/* Ensure this is quadword aligned */ 35/* stack frame excluding BPF stack, ensure this is quadword aligned */
36#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + MAX_BPF_STACK + \ 36#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
37 BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) 37 BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
38 38
39#ifndef __ASSEMBLY__ 39#ifndef __ASSEMBLY__
@@ -103,6 +103,7 @@ struct codegen_context {
103 */ 103 */
104 unsigned int seen; 104 unsigned int seen;
105 unsigned int idx; 105 unsigned int idx;
106 unsigned int stack_size;
106}; 107};
107 108
108#endif /* !__ASSEMBLY__ */ 109#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index a66e64b0b251..46d74e81aff1 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -69,7 +69,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
69static int bpf_jit_stack_local(struct codegen_context *ctx) 69static int bpf_jit_stack_local(struct codegen_context *ctx)
70{ 70{
71 if (bpf_has_stack_frame(ctx)) 71 if (bpf_has_stack_frame(ctx))
72 return STACK_FRAME_MIN_SIZE + MAX_BPF_STACK; 72 return STACK_FRAME_MIN_SIZE + ctx->stack_size;
73 else 73 else
74 return -(BPF_PPC_STACK_SAVE + 16); 74 return -(BPF_PPC_STACK_SAVE + 16);
75} 75}
@@ -82,8 +82,9 @@ static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
82static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) 82static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
83{ 83{
84 if (reg >= BPF_PPC_NVR_MIN && reg < 32) 84 if (reg >= BPF_PPC_NVR_MIN && reg < 32)
85 return (bpf_has_stack_frame(ctx) ? BPF_PPC_STACKFRAME : 0) 85 return (bpf_has_stack_frame(ctx) ?
86 - (8 * (32 - reg)); 86 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
87 - (8 * (32 - reg));
87 88
88 pr_err("BPF JIT is asking about unknown registers"); 89 pr_err("BPF JIT is asking about unknown registers");
89 BUG(); 90 BUG();
@@ -134,7 +135,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
134 PPC_BPF_STL(0, 1, PPC_LR_STKOFF); 135 PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
135 } 136 }
136 137
137 PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); 138 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
138 } 139 }
139 140
140 /* 141 /*
@@ -161,7 +162,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
161 /* Setup frame pointer to point to the bpf stack area */ 162 /* Setup frame pointer to point to the bpf stack area */
162 if (bpf_is_seen_register(ctx, BPF_REG_FP)) 163 if (bpf_is_seen_register(ctx, BPF_REG_FP))
163 PPC_ADDI(b2p[BPF_REG_FP], 1, 164 PPC_ADDI(b2p[BPF_REG_FP], 1,
164 STACK_FRAME_MIN_SIZE + MAX_BPF_STACK); 165 STACK_FRAME_MIN_SIZE + ctx->stack_size);
165} 166}
166 167
167static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) 168static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -183,7 +184,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
183 184
184 /* Tear down our stack frame */ 185 /* Tear down our stack frame */
185 if (bpf_has_stack_frame(ctx)) { 186 if (bpf_has_stack_frame(ctx)) {
186 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); 187 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
187 if (ctx->seen & SEEN_FUNC) { 188 if (ctx->seen & SEEN_FUNC) {
188 PPC_BPF_LL(0, 1, PPC_LR_STKOFF); 189 PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
189 PPC_MTLR(0); 190 PPC_MTLR(0);
@@ -1013,6 +1014,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1013 1014
1014 memset(&cgctx, 0, sizeof(struct codegen_context)); 1015 memset(&cgctx, 0, sizeof(struct codegen_context));
1015 1016
1017 /* Make sure that the stack is quadword aligned. */
1018 cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
1019
1016 /* Scouting faux-generate pass 0 */ 1020 /* Scouting faux-generate pass 0 */
1017 if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { 1021 if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
1018 /* We hit something illegal or unsupported. */ 1022 /* We hit something illegal or unsupported. */
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index c82497a31c54..264b6ab11978 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -555,9 +555,7 @@ static void cell_virtual_cntr(unsigned long data)
555 555
556static void start_virt_cntrs(void) 556static void start_virt_cntrs(void)
557{ 557{
558 init_timer(&timer_virt_cntr); 558 setup_timer(&timer_virt_cntr, cell_virtual_cntr, 0UL);
559 timer_virt_cntr.function = cell_virtual_cntr;
560 timer_virt_cntr.data = 0UL;
561 timer_virt_cntr.expires = jiffies + HZ / 10; 559 timer_virt_cntr.expires = jiffies + HZ / 10;
562 add_timer(&timer_virt_cntr); 560 add_timer(&timer_virt_cntr);
563} 561}
@@ -679,9 +677,7 @@ static void spu_evnt_swap(unsigned long data)
679 677
680static void start_spu_event_swap(void) 678static void start_spu_event_swap(void)
681{ 679{
682 init_timer(&timer_spu_event_swap); 680 setup_timer(&timer_spu_event_swap, spu_evnt_swap, 0UL);
683 timer_spu_event_swap.function = spu_evnt_swap;
684 timer_spu_event_swap.data = 0UL;
685 timer_spu_event_swap.expires = jiffies + HZ / 25; 681 timer_spu_event_swap.expires = jiffies + HZ / 25;
686 add_timer(&timer_spu_event_swap); 682 add_timer(&timer_spu_event_swap);
687} 683}
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9c88b82f6229..72238eedc360 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -540,7 +540,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
540{ 540{
541 if (s1 < s2) 541 if (s1 < s2)
542 return 1; 542 return 1;
543 if (s2 > s1) 543 if (s1 > s2)
544 return -1; 544 return -1;
545 545
546 return memcmp(d1, d2, s1); 546 return memcmp(d1, d2, s1);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a78f255111f2..ae07470fde3c 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -295,10 +295,6 @@ config PPC_STD_MMU_32
295 def_bool y 295 def_bool y
296 depends on PPC_STD_MMU && PPC32 296 depends on PPC_STD_MMU && PPC32
297 297
298config PPC_STD_MMU_64
299 def_bool y
300 depends on PPC_STD_MMU && PPC64
301
302config PPC_RADIX_MMU 298config PPC_RADIX_MMU
303 bool "Radix MMU Support" 299 bool "Radix MMU Support"
304 depends on PPC_BOOK3S_64 300 depends on PPC_BOOK3S_64
@@ -309,6 +305,19 @@ config PPC_RADIX_MMU
309 is only implemented by IBM Power9 CPUs, if you don't have one of them 305 is only implemented by IBM Power9 CPUs, if you don't have one of them
310 you can probably disable this. 306 you can probably disable this.
311 307
308config PPC_RADIX_MMU_DEFAULT
309 bool "Default to using the Radix MMU when possible"
310 depends on PPC_RADIX_MMU
311 default y
312 help
313 When the hardware supports the Radix MMU, default to using it unless
314 "disable_radix[=yes]" is specified on the kernel command line.
315
316 If this option is disabled, the Hash MMU will be used by default,
317 unless "disable_radix=no" is specified on the kernel command line.
318
319 If you're unsure, say Y.
320
312config ARCH_ENABLE_HUGEPAGE_MIGRATION 321config ARCH_ENABLE_HUGEPAGE_MIGRATION
313 def_bool y 322 def_bool y
314 depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION 323 depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -324,7 +333,7 @@ config PPC_BOOK3E_MMU
324 333
325config PPC_MM_SLICES 334config PPC_MM_SLICES
326 bool 335 bool
327 default y if PPC_STD_MMU_64 336 default y if PPC_BOOK3S_64
328 default n 337 default n
329 338
330config PPC_HAVE_PMU_SUPPORT 339config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 70183eb3d5c8..39a1d4225e0f 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -513,9 +513,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
513 mutex_init(&host->mutex); 513 mutex_init(&host->mutex);
514 init_completion(&host->complete); 514 init_completion(&host->complete);
515 spin_lock_init(&host->lock); 515 spin_lock_init(&host->lock);
516 init_timer(&host->timeout_timer); 516 setup_timer(&host->timeout_timer, kw_i2c_timeout, (unsigned long)host);
517 host->timeout_timer.function = kw_i2c_timeout;
518 host->timeout_timer.data = (unsigned long)host;
519 517
520 psteps = of_get_property(np, "AAPL,address-step", NULL); 518 psteps = of_get_property(np, "AAPL,address-step", NULL);
521 steps = psteps ? (*psteps) : 0x10; 519 steps = psteps ? (*psteps) : 0x10;
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 7a31c26500e6..3732118a0482 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -15,4 +15,5 @@ obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
15obj-$(CONFIG_OPAL_PRD) += opal-prd.o 15obj-$(CONFIG_OPAL_PRD) += opal-prd.o
16obj-$(CONFIG_PERF_EVENTS) += opal-imc.o 16obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
17obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o 17obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
18obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o 18obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
19obj-$(CONFIG_PPC_FTW) += nx-ftw.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 8864065eba22..4650fb294e7a 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -41,7 +41,6 @@
41#include "powernv.h" 41#include "powernv.h"
42#include "pci.h" 42#include "pci.h"
43 43
44static bool pnv_eeh_nb_init = false;
45static int eeh_event_irq = -EINVAL; 44static int eeh_event_irq = -EINVAL;
46 45
47static int pnv_eeh_init(void) 46static int pnv_eeh_init(void)
@@ -197,31 +196,31 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
197 * been built. If the I/O cache staff has been built, EEH is 196 * been built. If the I/O cache staff has been built, EEH is
198 * ready to supply service. 197 * ready to supply service.
199 */ 198 */
200static int pnv_eeh_post_init(void) 199int pnv_eeh_post_init(void)
201{ 200{
202 struct pci_controller *hose; 201 struct pci_controller *hose;
203 struct pnv_phb *phb; 202 struct pnv_phb *phb;
204 int ret = 0; 203 int ret = 0;
205 204
206 /* Register OPAL event notifier */ 205 /* Probe devices & build address cache */
207 if (!pnv_eeh_nb_init) { 206 eeh_probe_devices();
208 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); 207 eeh_addr_cache_build();
209 if (eeh_event_irq < 0) {
210 pr_err("%s: Can't register OPAL event interrupt (%d)\n",
211 __func__, eeh_event_irq);
212 return eeh_event_irq;
213 }
214 208
215 ret = request_irq(eeh_event_irq, pnv_eeh_event, 209 /* Register OPAL event notifier */
216 IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); 210 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
217 if (ret < 0) { 211 if (eeh_event_irq < 0) {
218 irq_dispose_mapping(eeh_event_irq); 212 pr_err("%s: Can't register OPAL event interrupt (%d)\n",
219 pr_err("%s: Can't request OPAL event interrupt (%d)\n", 213 __func__, eeh_event_irq);
220 __func__, eeh_event_irq); 214 return eeh_event_irq;
221 return ret; 215 }
222 }
223 216
224 pnv_eeh_nb_init = true; 217 ret = request_irq(eeh_event_irq, pnv_eeh_event,
218 IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
219 if (ret < 0) {
220 irq_dispose_mapping(eeh_event_irq);
221 pr_err("%s: Can't request OPAL event interrupt (%d)\n",
222 __func__, eeh_event_irq);
223 return ret;
225 } 224 }
226 225
227 if (!eeh_enabled()) 226 if (!eeh_enabled())
@@ -367,6 +366,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
367 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) 366 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
368 return NULL; 367 return NULL;
369 368
369 /* Skip if we haven't probed yet */
370 if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE)
371 return NULL;
372
370 /* Initialize eeh device */ 373 /* Initialize eeh device */
371 edev->class_code = pdn->class_code; 374 edev->class_code = pdn->class_code;
372 edev->mode &= 0xFFFFFF00; 375 edev->mode &= 0xFFFFFF00;
@@ -1731,7 +1734,6 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
1731static struct eeh_ops pnv_eeh_ops = { 1734static struct eeh_ops pnv_eeh_ops = {
1732 .name = "powernv", 1735 .name = "powernv",
1733 .init = pnv_eeh_init, 1736 .init = pnv_eeh_init,
1734 .post_init = pnv_eeh_post_init,
1735 .probe = pnv_eeh_probe, 1737 .probe = pnv_eeh_probe,
1736 .set_option = pnv_eeh_set_option, 1738 .set_option = pnv_eeh_set_option,
1737 .get_pe_addr = pnv_eeh_get_pe_addr, 1739 .get_pe_addr = pnv_eeh_get_pe_addr,
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 2cb6cbea4b3b..f6cbc1a71472 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -395,6 +395,7 @@ struct npu_context {
395 struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; 395 struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
396 struct mmu_notifier mn; 396 struct mmu_notifier mn;
397 struct kref kref; 397 struct kref kref;
398 bool nmmu_flush;
398 399
399 /* Callback to stop translation requests on a given GPU */ 400 /* Callback to stop translation requests on a given GPU */
400 struct npu_context *(*release_cb)(struct npu_context *, void *); 401 struct npu_context *(*release_cb)(struct npu_context *, void *);
@@ -545,11 +546,13 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
545 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; 546 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
546 unsigned long pid = npu_context->mm->context.id; 547 unsigned long pid = npu_context->mm->context.id;
547 548
548 /* 549 if (npu_context->nmmu_flush)
549 * Unfortunately the nest mmu does not support flushing specific 550 /*
550 * addresses so we have to flush the whole mm. 551 * Unfortunately the nest mmu does not support flushing specific
551 */ 552 * addresses so we have to flush the whole mm once before
552 flush_tlb_mm(npu_context->mm); 553 * shooting down the GPU translation.
554 */
555 flush_all_mm(npu_context->mm);
553 556
554 /* 557 /*
555 * Loop over all the NPUs this process is active on and launch 558 * Loop over all the NPUs this process is active on and launch
@@ -722,6 +725,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
722 return ERR_PTR(-ENODEV); 725 return ERR_PTR(-ENODEV);
723 npu_context->npdev[npu->index][nvlink_index] = npdev; 726 npu_context->npdev[npu->index][nvlink_index] = npdev;
724 727
728 if (!nphb->npu.nmmu_flush) {
729 /*
730 * If we're not explicitly flushing ourselves we need to mark
731 * the thread for global flushes
732 */
733 npu_context->nmmu_flush = false;
734 mm_context_add_copro(mm);
735 } else
736 npu_context->nmmu_flush = true;
737
725 return npu_context; 738 return npu_context;
726} 739}
727EXPORT_SYMBOL(pnv_npu2_init_context); 740EXPORT_SYMBOL(pnv_npu2_init_context);
@@ -731,6 +744,9 @@ static void pnv_npu2_release_context(struct kref *kref)
731 struct npu_context *npu_context = 744 struct npu_context *npu_context =
732 container_of(kref, struct npu_context, kref); 745 container_of(kref, struct npu_context, kref);
733 746
747 if (!npu_context->nmmu_flush)
748 mm_context_remove_copro(npu_context->mm);
749
734 npu_context->mm->context.npu_context = NULL; 750 npu_context->mm->context.npu_context = NULL;
735 mmu_notifier_unregister(&npu_context->mn, 751 mmu_notifier_unregister(&npu_context->mn,
736 npu_context->mm); 752 npu_context->mm);
@@ -819,6 +835,8 @@ int pnv_npu2_init(struct pnv_phb *phb)
819 static int npu_index; 835 static int npu_index;
820 uint64_t rc = 0; 836 uint64_t rc = 0;
821 837
838 phb->npu.nmmu_flush =
839 of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
822 for_each_child_of_node(phb->hose->dn, dn) { 840 for_each_child_of_node(phb->hose->dn, dn) {
823 gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); 841 gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
824 if (gpdev) { 842 if (gpdev) {
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index cf33769a7b72..18a355fa15e8 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * PowerNV OPAL asynchronous completion interfaces 2 * PowerNV OPAL asynchronous completion interfaces
3 * 3 *
4 * Copyright 2013 IBM Corp. 4 * Copyright 2013-2017 IBM Corp.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
@@ -23,40 +23,50 @@
23#include <asm/machdep.h> 23#include <asm/machdep.h>
24#include <asm/opal.h> 24#include <asm/opal.h>
25 25
26#define N_ASYNC_COMPLETIONS 64 26enum opal_async_token_state {
27 ASYNC_TOKEN_UNALLOCATED = 0,
28 ASYNC_TOKEN_ALLOCATED,
29 ASYNC_TOKEN_DISPATCHED,
30 ASYNC_TOKEN_ABANDONED,
31 ASYNC_TOKEN_COMPLETED
32};
33
34struct opal_async_token {
35 enum opal_async_token_state state;
36 struct opal_msg response;
37};
27 38
28static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
29static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
30static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait); 39static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
31static DEFINE_SPINLOCK(opal_async_comp_lock); 40static DEFINE_SPINLOCK(opal_async_comp_lock);
32static struct semaphore opal_async_sem; 41static struct semaphore opal_async_sem;
33static struct opal_msg *opal_async_responses;
34static unsigned int opal_max_async_tokens; 42static unsigned int opal_max_async_tokens;
43static struct opal_async_token *opal_async_tokens;
35 44
36int __opal_async_get_token(void) 45static int __opal_async_get_token(void)
37{ 46{
38 unsigned long flags; 47 unsigned long flags;
39 int token; 48 int i, token = -EBUSY;
40 49
41 spin_lock_irqsave(&opal_async_comp_lock, flags); 50 spin_lock_irqsave(&opal_async_comp_lock, flags);
42 token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
43 if (token >= opal_max_async_tokens) {
44 token = -EBUSY;
45 goto out;
46 }
47 51
48 if (__test_and_set_bit(token, opal_async_token_map)) { 52 for (i = 0; i < opal_max_async_tokens; i++) {
49 token = -EBUSY; 53 if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) {
50 goto out; 54 opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED;
55 token = i;
56 break;
57 }
51 } 58 }
52 59
53 __clear_bit(token, opal_async_complete_map);
54
55out:
56 spin_unlock_irqrestore(&opal_async_comp_lock, flags); 60 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
57 return token; 61 return token;
58} 62}
59 63
64/*
65 * Note: If the returned token is used in an opal call and opal returns
66 * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or
67 * opal_async_wait_response_interruptible() at least once before calling another
68 * opal_async_* function
69 */
60int opal_async_get_token_interruptible(void) 70int opal_async_get_token_interruptible(void)
61{ 71{
62 int token; 72 int token;
@@ -73,9 +83,10 @@ int opal_async_get_token_interruptible(void)
73} 83}
74EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); 84EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
75 85
76int __opal_async_release_token(int token) 86static int __opal_async_release_token(int token)
77{ 87{
78 unsigned long flags; 88 unsigned long flags;
89 int rc;
79 90
80 if (token < 0 || token >= opal_max_async_tokens) { 91 if (token < 0 || token >= opal_max_async_tokens) {
81 pr_err("%s: Passed token is out of range, token %d\n", 92 pr_err("%s: Passed token is out of range, token %d\n",
@@ -84,11 +95,26 @@ int __opal_async_release_token(int token)
84 } 95 }
85 96
86 spin_lock_irqsave(&opal_async_comp_lock, flags); 97 spin_lock_irqsave(&opal_async_comp_lock, flags);
87 __set_bit(token, opal_async_complete_map); 98 switch (opal_async_tokens[token].state) {
88 __clear_bit(token, opal_async_token_map); 99 case ASYNC_TOKEN_COMPLETED:
100 case ASYNC_TOKEN_ALLOCATED:
101 opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED;
102 rc = 0;
103 break;
104 /*
105 * DISPATCHED and ABANDONED tokens must wait for OPAL to respond.
106 * Mark a DISPATCHED token as ABANDONED so that the response handling
107 * code knows no one cares and that it can free it then.
108 */
109 case ASYNC_TOKEN_DISPATCHED:
110 opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
111 /* Fall through */
112 default:
113 rc = 1;
114 }
89 spin_unlock_irqrestore(&opal_async_comp_lock, flags); 115 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
90 116
91 return 0; 117 return rc;
92} 118}
93 119
94int opal_async_release_token(int token) 120int opal_async_release_token(int token)
@@ -96,12 +122,10 @@ int opal_async_release_token(int token)
96 int ret; 122 int ret;
97 123
98 ret = __opal_async_release_token(token); 124 ret = __opal_async_release_token(token);
99 if (ret) 125 if (!ret)
100 return ret; 126 up(&opal_async_sem);
101
102 up(&opal_async_sem);
103 127
104 return 0; 128 return ret;
105} 129}
106EXPORT_SYMBOL_GPL(opal_async_release_token); 130EXPORT_SYMBOL_GPL(opal_async_release_token);
107 131
@@ -117,22 +141,83 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
117 return -EINVAL; 141 return -EINVAL;
118 } 142 }
119 143
120 /* Wakeup the poller before we wait for events to speed things 144 /*
145 * There is no need to mark the token as dispatched, wait_event()
146 * will block until the token completes.
147 *
148 * Wakeup the poller before we wait for events to speed things
121 * up on platforms or simulators where the interrupts aren't 149 * up on platforms or simulators where the interrupts aren't
122 * functional. 150 * functional.
123 */ 151 */
124 opal_wake_poller(); 152 opal_wake_poller();
125 wait_event(opal_async_wait, test_bit(token, opal_async_complete_map)); 153 wait_event(opal_async_wait, opal_async_tokens[token].state
126 memcpy(msg, &opal_async_responses[token], sizeof(*msg)); 154 == ASYNC_TOKEN_COMPLETED);
155 memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
127 156
128 return 0; 157 return 0;
129} 158}
130EXPORT_SYMBOL_GPL(opal_async_wait_response); 159EXPORT_SYMBOL_GPL(opal_async_wait_response);
131 160
161int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg)
162{
163 unsigned long flags;
164 int ret;
165
166 if (token >= opal_max_async_tokens) {
167 pr_err("%s: Invalid token passed\n", __func__);
168 return -EINVAL;
169 }
170
171 if (!msg) {
172 pr_err("%s: Invalid message pointer passed\n", __func__);
173 return -EINVAL;
174 }
175
176 /*
177 * The first time this gets called we mark the token as DISPATCHED
178 * so that if wait_event_interruptible() returns not zero and the
179 * caller frees the token, we know not to actually free the token
180 * until the response comes.
181 *
182 * Only change if the token is ALLOCATED - it may have been
183 * completed even before the caller gets around to calling this
184 * the first time.
185 *
186 * There is also a dirty great comment at the token allocation
187 * function that if the opal call returns OPAL_ASYNC_COMPLETION to
188 * the caller then the caller *must* call this or the not
189 * interruptible version before doing anything else with the
190 * token.
191 */
192 if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) {
193 spin_lock_irqsave(&opal_async_comp_lock, flags);
194 if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED)
195 opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED;
196 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
197 }
198
199 /*
200 * Wakeup the poller before we wait for events to speed things
201 * up on platforms or simulators where the interrupts aren't
202 * functional.
203 */
204 opal_wake_poller();
205 ret = wait_event_interruptible(opal_async_wait,
206 opal_async_tokens[token].state ==
207 ASYNC_TOKEN_COMPLETED);
208 if (!ret)
209 memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
210
211 return ret;
212}
213EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible);
214
215/* Called from interrupt context */
132static int opal_async_comp_event(struct notifier_block *nb, 216static int opal_async_comp_event(struct notifier_block *nb,
133 unsigned long msg_type, void *msg) 217 unsigned long msg_type, void *msg)
134{ 218{
135 struct opal_msg *comp_msg = msg; 219 struct opal_msg *comp_msg = msg;
220 enum opal_async_token_state state;
136 unsigned long flags; 221 unsigned long flags;
137 uint64_t token; 222 uint64_t token;
138 223
@@ -140,11 +225,17 @@ static int opal_async_comp_event(struct notifier_block *nb,
140 return 0; 225 return 0;
141 226
142 token = be64_to_cpu(comp_msg->params[0]); 227 token = be64_to_cpu(comp_msg->params[0]);
143 memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
144 spin_lock_irqsave(&opal_async_comp_lock, flags); 228 spin_lock_irqsave(&opal_async_comp_lock, flags);
145 __set_bit(token, opal_async_complete_map); 229 state = opal_async_tokens[token].state;
230 opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED;
146 spin_unlock_irqrestore(&opal_async_comp_lock, flags); 231 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
147 232
233 if (state == ASYNC_TOKEN_ABANDONED) {
234 /* Free the token, no one else will */
235 opal_async_release_token(token);
236 return 0;
237 }
238 memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg));
148 wake_up(&opal_async_wait); 239 wake_up(&opal_async_wait);
149 240
150 return 0; 241 return 0;
@@ -178,32 +269,23 @@ int __init opal_async_comp_init(void)
178 } 269 }
179 270
180 opal_max_async_tokens = be32_to_cpup(async); 271 opal_max_async_tokens = be32_to_cpup(async);
181 if (opal_max_async_tokens > N_ASYNC_COMPLETIONS) 272 opal_async_tokens = kcalloc(opal_max_async_tokens,
182 opal_max_async_tokens = N_ASYNC_COMPLETIONS; 273 sizeof(*opal_async_tokens), GFP_KERNEL);
274 if (!opal_async_tokens) {
275 err = -ENOMEM;
276 goto out_opal_node;
277 }
183 278
184 err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP, 279 err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
185 &opal_async_comp_nb); 280 &opal_async_comp_nb);
186 if (err) { 281 if (err) {
187 pr_err("%s: Can't register OPAL event notifier (%d)\n", 282 pr_err("%s: Can't register OPAL event notifier (%d)\n",
188 __func__, err); 283 __func__, err);
284 kfree(opal_async_tokens);
189 goto out_opal_node; 285 goto out_opal_node;
190 } 286 }
191 287
192 opal_async_responses = kzalloc( 288 sema_init(&opal_async_sem, opal_max_async_tokens);
193 sizeof(*opal_async_responses) * opal_max_async_tokens,
194 GFP_KERNEL);
195 if (!opal_async_responses) {
196 pr_err("%s: Out of memory, failed to do asynchronous "
197 "completion init\n", __func__);
198 err = -ENOMEM;
199 goto out_opal_node;
200 }
201
202 /* Initialize to 1 less than the maximum tokens available, as we may
203 * require to pop one during emergency through synchronous call to
204 * __opal_async_get_token()
205 */
206 sema_init(&opal_async_sem, opal_max_async_tokens - 1);
207 289
208out_opal_node: 290out_opal_node:
209 of_node_put(opal_node); 291 of_node_put(opal_node);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index d78fed728cdf..c9e1a4ff295c 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * OPAL hypervisor Maintenance interrupt handling support in PowreNV. 2 * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index ecdcba9d1220..9d1b8c0aaf93 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -174,8 +174,14 @@ void opal_event_shutdown(void)
174 174
175 /* First free interrupts, which will also mask them */ 175 /* First free interrupts, which will also mask them */
176 for (i = 0; i < opal_irq_count; i++) { 176 for (i = 0; i < opal_irq_count; i++) {
177 if (opal_irqs[i]) 177 if (!opal_irqs[i])
178 continue;
179
180 if (in_interrupt())
181 disable_irq_nosync(opal_irqs[i]);
182 else
178 free_irq(opal_irqs[i], NULL); 183 free_irq(opal_irqs[i], NULL);
184
179 opal_irqs[i] = 0; 185 opal_irqs[i] = 0;
180 } 186 }
181} 187}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 4495f428b500..d9916ea62305 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * OPAL asynchronus Memory error handling support in PowreNV. 2 * OPAL asynchronus Memory error handling support in PowerNV.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index aa267f120033..0a7074bb91dc 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -19,13 +19,10 @@
19 */ 19 */
20 20
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/mutex.h>
23#include <linux/of_platform.h> 22#include <linux/of_platform.h>
24#include <asm/opal.h> 23#include <asm/opal.h>
25#include <asm/machdep.h> 24#include <asm/machdep.h>
26 25
27static DEFINE_MUTEX(opal_sensor_mutex);
28
29/* 26/*
30 * This will return sensor information to driver based on the requested sensor 27 * This will return sensor information to driver based on the requested sensor
31 * handle. A handle is an opaque id for the powernv, read by the driver from the 28 * handle. A handle is an opaque id for the powernv, read by the driver from the
@@ -38,13 +35,9 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
38 __be32 data; 35 __be32 data;
39 36
40 token = opal_async_get_token_interruptible(); 37 token = opal_async_get_token_interruptible();
41 if (token < 0) { 38 if (token < 0)
42 pr_err("%s: Couldn't get the token, returning\n", __func__); 39 return token;
43 ret = token;
44 goto out;
45 }
46 40
47 mutex_lock(&opal_sensor_mutex);
48 ret = opal_sensor_read(sensor_hndl, token, &data); 41 ret = opal_sensor_read(sensor_hndl, token, &data);
49 switch (ret) { 42 switch (ret) {
50 case OPAL_ASYNC_COMPLETION: 43 case OPAL_ASYNC_COMPLETION:
@@ -52,7 +45,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
52 if (ret) { 45 if (ret) {
53 pr_err("%s: Failed to wait for the async response, %d\n", 46 pr_err("%s: Failed to wait for the async response, %d\n",
54 __func__, ret); 47 __func__, ret);
55 goto out_token; 48 goto out;
56 } 49 }
57 50
58 ret = opal_error_code(opal_get_async_rc(msg)); 51 ret = opal_error_code(opal_get_async_rc(msg));
@@ -73,10 +66,8 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
73 break; 66 break;
74 } 67 }
75 68
76out_token:
77 mutex_unlock(&opal_sensor_mutex);
78 opal_async_release_token(token);
79out: 69out:
70 opal_async_release_token(token);
80 return ret; 71 return ret;
81} 72}
82EXPORT_SYMBOL_GPL(opal_get_sensor_data); 73EXPORT_SYMBOL_GPL(opal_get_sensor_data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 8c1ede2d3f7e..6f4b00a2ac46 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -94,7 +94,7 @@ opal_return:
94 * bytes (always BE) since MSR:LE will end up fixed up as a side 94 * bytes (always BE) since MSR:LE will end up fixed up as a side
95 * effect of the rfid. 95 * effect of the rfid.
96 */ 96 */
97 FIXUP_ENDIAN 97 FIXUP_ENDIAN_HV
98 ld r2,PACATOC(r13); 98 ld r2,PACATOC(r13);
99 lwz r4,8(r1); 99 lwz r4,8(r1);
100 ld r5,PPC_LR_STKOFF(r1); 100 ld r5,PPC_LR_STKOFF(r1);
@@ -120,7 +120,7 @@ opal_real_call:
120 hrfid 120 hrfid
121 121
122opal_return_realmode: 122opal_return_realmode:
123 FIXUP_ENDIAN 123 FIXUP_ENDIAN_HV
124 ld r2,PACATOC(r13); 124 ld r2,PACATOC(r13);
125 lwz r11,8(r1); 125 lwz r11,8(r1);
126 ld r12,PPC_LR_STKOFF(r1) 126 ld r12,PPC_LR_STKOFF(r1)
@@ -307,6 +307,7 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
307OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); 307OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
308OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); 308OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
309OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); 309OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
310OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
310OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); 311OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
311OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); 312OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
312OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); 313OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 65c79ecf5a4d..041ddbd1fc57 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -998,6 +998,7 @@ int opal_error_code(int rc)
998 998
999 case OPAL_PARAMETER: return -EINVAL; 999 case OPAL_PARAMETER: return -EINVAL;
1000 case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; 1000 case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
1001 case OPAL_BUSY:
1001 case OPAL_BUSY_EVENT: return -EBUSY; 1002 case OPAL_BUSY_EVENT: return -EBUSY;
1002 case OPAL_NO_MEM: return -ENOMEM; 1003 case OPAL_NO_MEM: return -ENOMEM;
1003 case OPAL_PERMISSION: return -EPERM; 1004 case OPAL_PERMISSION: return -EPERM;
@@ -1037,3 +1038,4 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1037/* Export this for KVM */ 1038/* Export this for KVM */
1038EXPORT_SYMBOL_GPL(opal_int_set_mfrr); 1039EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1039EXPORT_SYMBOL_GPL(opal_int_eoi); 1040EXPORT_SYMBOL_GPL(opal_int_eoi);
1041EXPORT_SYMBOL_GPL(opal_error_code);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57f9e55f4352..749055553064 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1002,9 +1002,12 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
1002 } 1002 }
1003 1003
1004 /* 1004 /*
1005 * After doing so, there would be a "hole" in the /proc/iomem when 1005 * Since M64 BAR shares segments among all possible 256 PEs,
1006 * offset is a positive value. It looks like the device return some 1006 * we have to shift the beginning of PF IOV BAR to make it start from
1007 * mmio back to the system, which actually no one could use it. 1007 * the segment which belongs to the PE number assigned to the first VF.
1008 * This creates a "hole" in the /proc/iomem which could be used for
1009 * allocating other resources so we reserve this area below and
1010 * release when IOV is released.
1008 */ 1011 */
1009 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 1012 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
1010 res = &dev->resource[i + PCI_IOV_RESOURCES]; 1013 res = &dev->resource[i + PCI_IOV_RESOURCES];
@@ -1018,7 +1021,22 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
1018 dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", 1021 dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
1019 i, &res2, res, (offset > 0) ? "En" : "Dis", 1022 i, &res2, res, (offset > 0) ? "En" : "Dis",
1020 num_vfs, offset); 1023 num_vfs, offset);
1024
1025 if (offset < 0) {
1026 devm_release_resource(&dev->dev, &pdn->holes[i]);
1027 memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
1028 }
1029
1021 pci_update_resource(dev, i + PCI_IOV_RESOURCES); 1030 pci_update_resource(dev, i + PCI_IOV_RESOURCES);
1031
1032 if (offset > 0) {
1033 pdn->holes[i].start = res2.start;
1034 pdn->holes[i].end = res2.start + size * offset - 1;
1035 pdn->holes[i].flags = IORESOURCE_BUS;
1036 pdn->holes[i].name = "pnv_iov_reserved";
1037 devm_request_resource(&dev->dev, res->parent,
1038 &pdn->holes[i]);
1039 }
1022 } 1040 }
1023 return 0; 1041 return 0;
1024} 1042}
@@ -2779,7 +2797,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
2779 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) 2797 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
2780 return -EINVAL; 2798 return -EINVAL;
2781 2799
2782 if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) 2800 if (!is_power_of_2(window_size))
2783 return -EINVAL; 2801 return -EINVAL;
2784 2802
2785 /* Adjust direct table size from window_size and levels */ 2803 /* Adjust direct table size from window_size and levels */
@@ -3293,8 +3311,7 @@ static void pnv_pci_ioda_fixup(void)
3293 pnv_pci_ioda_create_dbgfs(); 3311 pnv_pci_ioda_create_dbgfs();
3294 3312
3295#ifdef CONFIG_EEH 3313#ifdef CONFIG_EEH
3296 eeh_init(); 3314 pnv_eeh_post_init();
3297 eeh_addr_cache_build();
3298#endif 3315#endif
3299} 3316}
3300 3317
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b47f9406d97e..b772d7473896 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -188,6 +188,9 @@ struct pnv_phb {
188 188
189 /* Bitmask for MMIO register usage */ 189 /* Bitmask for MMIO register usage */
190 unsigned long mmio_atsd_usage; 190 unsigned long mmio_atsd_usage;
191
192 /* Do we need to explicitly flush the nest mmu? */
193 bool nmmu_flush;
191 } npu; 194 } npu;
192 195
193#ifdef CONFIG_CXL_BASE 196#ifdef CONFIG_CXL_BASE
@@ -235,6 +238,7 @@ extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
235extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); 238extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
236extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); 239extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
237extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); 240extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
241extern int pnv_eeh_post_init(void);
238 242
239extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, 243extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
240 const char *fmt, ...); 244 const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index bbb73aa0eb8f..1edfbc1e40f4 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,6 +36,7 @@
36#include <asm/opal.h> 36#include <asm/opal.h>
37#include <asm/kexec.h> 37#include <asm/kexec.h>
38#include <asm/smp.h> 38#include <asm/smp.h>
39#include <asm/tm.h>
39 40
40#include "powernv.h" 41#include "powernv.h"
41 42
@@ -290,6 +291,7 @@ static void __init pnv_setup_machdep_opal(void)
290 ppc_md.restart = pnv_restart; 291 ppc_md.restart = pnv_restart;
291 pm_power_off = pnv_power_off; 292 pm_power_off = pnv_power_off;
292 ppc_md.halt = pnv_halt; 293 ppc_md.halt = pnv_halt;
294 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
293 ppc_md.machine_check_exception = opal_machine_check; 295 ppc_md.machine_check_exception = opal_machine_check;
294 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 296 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
295 ppc_md.hmi_exception_early = opal_hmi_exception_early; 297 ppc_md.hmi_exception_early = opal_hmi_exception_early;
@@ -311,6 +313,28 @@ static int __init pnv_probe(void)
311 return 1; 313 return 1;
312} 314}
313 315
316#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
317void __init pnv_tm_init(void)
318{
319 if (!firmware_has_feature(FW_FEATURE_OPAL) ||
320 !pvr_version_is(PVR_POWER9) ||
321 early_cpu_has_feature(CPU_FTR_TM))
322 return;
323
324 if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
325 return;
326
327 pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
328 cur_cpu_spec->cpu_features |= CPU_FTR_TM;
329 /* Make sure "normal" HTM is off (it should be) */
330 cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
331 /* Turn on no suspend mode, and HTM no SC */
332 cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
333 PPC_FEATURE2_HTM_NOSC;
334 tm_suspend_disabled = true;
335}
336#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
337
314/* 338/*
315 * Returns the cpu frequency for 'cpu' in Hz. This is used by 339 * Returns the cpu frequency for 'cpu' in Hz. This is used by
316 * /proc/cpuinfo 340 * /proc/cpuinfo
@@ -319,7 +343,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
319{ 343{
320 unsigned long ret_freq; 344 unsigned long ret_freq;
321 345
322 ret_freq = cpufreq_quick_get(cpu) * 1000ul; 346 ret_freq = cpufreq_get(cpu) * 1000ul;
323 347
324 /* 348 /*
325 * If the backend cpufreq driver does not exist, 349 * If the backend cpufreq driver does not exist,
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index c17f81e433f7..ba030669eca1 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -49,6 +49,13 @@
49 49
50static void pnv_smp_setup_cpu(int cpu) 50static void pnv_smp_setup_cpu(int cpu)
51{ 51{
52 /*
53 * P9 workaround for CI vector load (see traps.c),
54 * enable the corresponding HMI interrupt
55 */
56 if (pvr_version_is(PVR_POWER9))
57 mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
58
52 if (xive_enabled()) 59 if (xive_enabled())
53 xive_smp_setup_cpu(); 60 xive_smp_setup_cpu();
54 else if (cpu != boot_cpuid) 61 else if (cpu != boot_cpuid)
@@ -290,6 +297,54 @@ static void __init pnv_smp_probe(void)
290 } 297 }
291} 298}
292 299
300static int pnv_system_reset_exception(struct pt_regs *regs)
301{
302 if (smp_handle_nmi_ipi(regs))
303 return 1;
304 return 0;
305}
306
307static int pnv_cause_nmi_ipi(int cpu)
308{
309 int64_t rc;
310
311 if (cpu >= 0) {
312 rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu));
313 if (rc != OPAL_SUCCESS)
314 return 0;
315 return 1;
316
317 } else if (cpu == NMI_IPI_ALL_OTHERS) {
318 bool success = true;
319 int c;
320
321
322 /*
323 * We do not use broadcasts (yet), because it's not clear
324 * exactly what semantics Linux wants or the firmware should
325 * provide.
326 */
327 for_each_online_cpu(c) {
328 if (c == smp_processor_id())
329 continue;
330
331 rc = opal_signal_system_reset(
332 get_hard_smp_processor_id(c));
333 if (rc != OPAL_SUCCESS)
334 success = false;
335 }
336 if (success)
337 return 1;
338
339 /*
340 * Caller will fall back to doorbells, which may pick
341 * up the remainders.
342 */
343 }
344
345 return 0;
346}
347
293static struct smp_ops_t pnv_smp_ops = { 348static struct smp_ops_t pnv_smp_ops = {
294 .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ 349 .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
295 .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */ 350 .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */
@@ -308,6 +363,10 @@ static struct smp_ops_t pnv_smp_ops = {
308/* This is called very early during platform setup_arch */ 363/* This is called very early during platform setup_arch */
309void __init pnv_smp_init(void) 364void __init pnv_smp_init(void)
310{ 365{
366 if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
367 ppc_md.system_reset_exception = pnv_system_reset_exception;
368 pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
369 }
311 smp_ops = &pnv_smp_ops; 370 smp_ops = &pnv_smp_ops;
312 371
313#ifdef CONFIG_HOTPLUG_CPU 372#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
new file mode 100644
index 000000000000..ca22f1eae050
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -0,0 +1,209 @@
1/*
2 * Copyright 2016-17 IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "vas: " fmt
11
12#include <linux/types.h>
13#include <linux/slab.h>
14#include <linux/debugfs.h>
15#include <linux/seq_file.h>
16#include "vas.h"
17
18static struct dentry *vas_debugfs;
19
20static char *cop_to_str(int cop)
21{
22 switch (cop) {
23 case VAS_COP_TYPE_FAULT: return "Fault";
24 case VAS_COP_TYPE_842: return "NX-842 Normal Priority";
25 case VAS_COP_TYPE_842_HIPRI: return "NX-842 High Priority";
26 case VAS_COP_TYPE_GZIP: return "NX-GZIP Normal Priority";
27 case VAS_COP_TYPE_GZIP_HIPRI: return "NX-GZIP High Priority";
28 case VAS_COP_TYPE_FTW: return "Fast Thread-wakeup";
29 default: return "Unknown";
30 }
31}
32
33static int info_dbg_show(struct seq_file *s, void *private)
34{
35 struct vas_window *window = s->private;
36
37 mutex_lock(&vas_mutex);
38
39 /* ensure window is not unmapped */
40 if (!window->hvwc_map)
41 goto unlock;
42
43 seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
44 window->tx_win ? "Send" : "Receive");
45 seq_printf(s, "Pid : %d\n", window->pid);
46
47unlock:
48 mutex_unlock(&vas_mutex);
49 return 0;
50}
51
52static int info_dbg_open(struct inode *inode, struct file *file)
53{
54 return single_open(file, info_dbg_show, inode->i_private);
55}
56
57static const struct file_operations info_fops = {
58 .open = info_dbg_open,
59 .read = seq_read,
60 .llseek = seq_lseek,
61 .release = single_release,
62};
63
64static inline void print_reg(struct seq_file *s, struct vas_window *win,
65 char *name, u32 reg)
66{
67 seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
68}
69
70static int hvwc_dbg_show(struct seq_file *s, void *private)
71{
72 struct vas_window *window = s->private;
73
74 mutex_lock(&vas_mutex);
75
76 /* ensure window is not unmapped */
77 if (!window->hvwc_map)
78 goto unlock;
79
80 print_reg(s, window, VREG(LPID));
81 print_reg(s, window, VREG(PID));
82 print_reg(s, window, VREG(XLATE_MSR));
83 print_reg(s, window, VREG(XLATE_LPCR));
84 print_reg(s, window, VREG(XLATE_CTL));
85 print_reg(s, window, VREG(AMR));
86 print_reg(s, window, VREG(SEIDR));
87 print_reg(s, window, VREG(FAULT_TX_WIN));
88 print_reg(s, window, VREG(OSU_INTR_SRC_RA));
89 print_reg(s, window, VREG(HV_INTR_SRC_RA));
90 print_reg(s, window, VREG(PSWID));
91 print_reg(s, window, VREG(LFIFO_BAR));
92 print_reg(s, window, VREG(LDATA_STAMP_CTL));
93 print_reg(s, window, VREG(LDMA_CACHE_CTL));
94 print_reg(s, window, VREG(LRFIFO_PUSH));
95 print_reg(s, window, VREG(CURR_MSG_COUNT));
96 print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT));
97 print_reg(s, window, VREG(LRX_WCRED));
98 print_reg(s, window, VREG(LRX_WCRED_ADDER));
99 print_reg(s, window, VREG(TX_WCRED));
100 print_reg(s, window, VREG(TX_WCRED_ADDER));
101 print_reg(s, window, VREG(LFIFO_SIZE));
102 print_reg(s, window, VREG(WINCTL));
103 print_reg(s, window, VREG(WIN_STATUS));
104 print_reg(s, window, VREG(WIN_CTX_CACHING_CTL));
105 print_reg(s, window, VREG(TX_RSVD_BUF_COUNT));
106 print_reg(s, window, VREG(LRFIFO_WIN_PTR));
107 print_reg(s, window, VREG(LNOTIFY_CTL));
108 print_reg(s, window, VREG(LNOTIFY_PID));
109 print_reg(s, window, VREG(LNOTIFY_LPID));
110 print_reg(s, window, VREG(LNOTIFY_TID));
111 print_reg(s, window, VREG(LNOTIFY_SCOPE));
112 print_reg(s, window, VREG(NX_UTIL_ADDER));
113unlock:
114 mutex_unlock(&vas_mutex);
115 return 0;
116}
117
118static int hvwc_dbg_open(struct inode *inode, struct file *file)
119{
120 return single_open(file, hvwc_dbg_show, inode->i_private);
121}
122
123static const struct file_operations hvwc_fops = {
124 .open = hvwc_dbg_open,
125 .read = seq_read,
126 .llseek = seq_lseek,
127 .release = single_release,
128};
129
130void vas_window_free_dbgdir(struct vas_window *window)
131{
132 if (window->dbgdir) {
133 debugfs_remove_recursive(window->dbgdir);
134 kfree(window->dbgname);
135 window->dbgdir = NULL;
136 window->dbgname = NULL;
137 }
138}
139
140void vas_window_init_dbgdir(struct vas_window *window)
141{
142 struct dentry *f, *d;
143
144 if (!window->vinst->dbgdir)
145 return;
146
147 window->dbgname = kzalloc(16, GFP_KERNEL);
148 if (!window->dbgname)
149 return;
150
151 snprintf(window->dbgname, 16, "w%d", window->winid);
152
153 d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir);
154 if (IS_ERR(d))
155 goto free_name;
156
157 window->dbgdir = d;
158
159 f = debugfs_create_file("info", 0444, d, window, &info_fops);
160 if (IS_ERR(f))
161 goto remove_dir;
162
163 f = debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
164 if (IS_ERR(f))
165 goto remove_dir;
166
167 return;
168
169free_name:
170 kfree(window->dbgname);
171 window->dbgname = NULL;
172
173remove_dir:
174 debugfs_remove_recursive(window->dbgdir);
175 window->dbgdir = NULL;
176}
177
178void vas_instance_init_dbgdir(struct vas_instance *vinst)
179{
180 struct dentry *d;
181
182 if (!vas_debugfs)
183 return;
184
185 vinst->dbgname = kzalloc(16, GFP_KERNEL);
186 if (!vinst->dbgname)
187 return;
188
189 snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id);
190
191 d = debugfs_create_dir(vinst->dbgname, vas_debugfs);
192 if (IS_ERR(d))
193 goto free_name;
194
195 vinst->dbgdir = d;
196 return;
197
198free_name:
199 kfree(vinst->dbgname);
200 vinst->dbgname = NULL;
201 vinst->dbgdir = NULL;
202}
203
204void vas_init_dbgdir(void)
205{
206 vas_debugfs = debugfs_create_dir("vas", NULL);
207 if (IS_ERR(vas_debugfs))
208 vas_debugfs = NULL;
209}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 5aae845b8cd9..2b3eb01ab110 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -16,7 +16,8 @@
16#include <linux/log2.h> 16#include <linux/log2.h>
17#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
18#include <linux/cred.h> 18#include <linux/cred.h>
19 19#include <asm/switch_to.h>
20#include <asm/ppc-opcode.h>
20#include "vas.h" 21#include "vas.h"
21#include "copy-paste.h" 22#include "copy-paste.h"
22 23
@@ -40,6 +41,16 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len
40 pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); 41 pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
41} 42}
42 43
44u64 vas_win_paste_addr(struct vas_window *win)
45{
46 u64 addr;
47
48 compute_paste_address(win, &addr, NULL);
49
50 return addr;
51}
52EXPORT_SYMBOL(vas_win_paste_addr);
53
43static inline void get_hvwc_mmio_bar(struct vas_window *window, 54static inline void get_hvwc_mmio_bar(struct vas_window *window,
44 u64 *start, int *len) 55 u64 *start, int *len)
45{ 56{
@@ -145,23 +156,37 @@ static void unmap_paste_region(struct vas_window *window)
145} 156}
146 157
147/* 158/*
148 * Unmap the MMIO regions for a window. 159 * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
160 * unmap when the window's debugfs dir is in use. This serializes close
161 * of a window even on another VAS instance but since its not a critical
162 * path, just minimize the time we hold the mutex for now. We can add
163 * a per-instance mutex later if necessary.
149 */ 164 */
150static void unmap_winctx_mmio_bars(struct vas_window *window) 165static void unmap_winctx_mmio_bars(struct vas_window *window)
151{ 166{
152 int len; 167 int len;
168 void *uwc_map;
169 void *hvwc_map;
153 u64 busaddr_start; 170 u64 busaddr_start;
154 171
155 if (window->hvwc_map) { 172 mutex_lock(&vas_mutex);
173
174 hvwc_map = window->hvwc_map;
175 window->hvwc_map = NULL;
176
177 uwc_map = window->uwc_map;
178 window->uwc_map = NULL;
179
180 mutex_unlock(&vas_mutex);
181
182 if (hvwc_map) {
156 get_hvwc_mmio_bar(window, &busaddr_start, &len); 183 get_hvwc_mmio_bar(window, &busaddr_start, &len);
157 unmap_region(window->hvwc_map, busaddr_start, len); 184 unmap_region(hvwc_map, busaddr_start, len);
158 window->hvwc_map = NULL;
159 } 185 }
160 186
161 if (window->uwc_map) { 187 if (uwc_map) {
162 get_uwc_mmio_bar(window, &busaddr_start, &len); 188 get_uwc_mmio_bar(window, &busaddr_start, &len);
163 unmap_region(window->uwc_map, busaddr_start, len); 189 unmap_region(uwc_map, busaddr_start, len);
164 window->uwc_map = NULL;
165 } 190 }
166} 191}
167 192
@@ -528,6 +553,9 @@ static void vas_window_free(struct vas_window *window)
528 struct vas_instance *vinst = window->vinst; 553 struct vas_instance *vinst = window->vinst;
529 554
530 unmap_winctx_mmio_bars(window); 555 unmap_winctx_mmio_bars(window);
556
557 vas_window_free_dbgdir(window);
558
531 kfree(window); 559 kfree(window);
532 560
533 vas_release_window_id(&vinst->ida, winid); 561 vas_release_window_id(&vinst->ida, winid);
@@ -552,6 +580,8 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
552 if (map_winctx_mmio_bars(window)) 580 if (map_winctx_mmio_bars(window))
553 goto out_free; 581 goto out_free;
554 582
583 vas_window_init_dbgdir(window);
584
555 return window; 585 return window;
556 586
557out_free: 587out_free:
@@ -569,6 +599,32 @@ static void put_rx_win(struct vas_window *rxwin)
569} 599}
570 600
571/* 601/*
602 * Find the user space receive window given the @pswid.
603 * - We must have a valid vasid and it must belong to this instance.
604 * (so both send and receive windows are on the same VAS instance)
605 * - The window must refer to an OPEN, FTW, RECEIVE window.
606 *
607 * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
608 */
609static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
610{
611 int vasid, winid;
612 struct vas_window *rxwin;
613
614 decode_pswid(pswid, &vasid, &winid);
615
616 if (vinst->vas_id != vasid)
617 return ERR_PTR(-EINVAL);
618
619 rxwin = vinst->windows[winid];
620
621 if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
622 return ERR_PTR(-EINVAL);
623
624 return rxwin;
625}
626
627/*
572 * Get the VAS receive window associated with NX engine identified 628 * Get the VAS receive window associated with NX engine identified
573 * by @cop and if applicable, @pswid. 629 * by @cop and if applicable, @pswid.
574 * 630 *
@@ -581,10 +637,10 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
581 637
582 mutex_lock(&vinst->mutex); 638 mutex_lock(&vinst->mutex);
583 639
584 if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) 640 if (cop == VAS_COP_TYPE_FTW)
585 rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL); 641 rxwin = get_user_rxwin(vinst, pswid);
586 else 642 else
587 rxwin = ERR_PTR(-EINVAL); 643 rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
588 644
589 if (!IS_ERR(rxwin)) 645 if (!IS_ERR(rxwin))
590 atomic_inc(&rxwin->num_txwins); 646 atomic_inc(&rxwin->num_txwins);
@@ -674,15 +730,18 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
674 730
675 winctx->rx_fifo = rxattr->rx_fifo; 731 winctx->rx_fifo = rxattr->rx_fifo;
676 winctx->rx_fifo_size = rxattr->rx_fifo_size; 732 winctx->rx_fifo_size = rxattr->rx_fifo_size;
677 winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; 733 winctx->wcreds_max = rxwin->wcreds_max;
678 winctx->pin_win = rxattr->pin_win; 734 winctx->pin_win = rxattr->pin_win;
679 735
680 winctx->nx_win = rxattr->nx_win; 736 winctx->nx_win = rxattr->nx_win;
681 winctx->fault_win = rxattr->fault_win; 737 winctx->fault_win = rxattr->fault_win;
738 winctx->user_win = rxattr->user_win;
739 winctx->rej_no_credit = rxattr->rej_no_credit;
682 winctx->rx_word_mode = rxattr->rx_win_ord_mode; 740 winctx->rx_word_mode = rxattr->rx_win_ord_mode;
683 winctx->tx_word_mode = rxattr->tx_win_ord_mode; 741 winctx->tx_word_mode = rxattr->tx_win_ord_mode;
684 winctx->rx_wcred_mode = rxattr->rx_wcred_mode; 742 winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
685 winctx->tx_wcred_mode = rxattr->tx_wcred_mode; 743 winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
744 winctx->notify_early = rxattr->notify_early;
686 745
687 if (winctx->nx_win) { 746 if (winctx->nx_win) {
688 winctx->data_stamp = true; 747 winctx->data_stamp = true;
@@ -723,7 +782,10 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
723static bool rx_win_args_valid(enum vas_cop_type cop, 782static bool rx_win_args_valid(enum vas_cop_type cop,
724 struct vas_rx_win_attr *attr) 783 struct vas_rx_win_attr *attr)
725{ 784{
726 dump_rx_win_attr(attr); 785 pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
786 attr->fault_win, attr->notify_disable,
787 attr->intr_disable, attr->notify_early,
788 attr->rx_fifo_size);
727 789
728 if (cop >= VAS_COP_TYPE_MAX) 790 if (cop >= VAS_COP_TYPE_MAX)
729 return false; 791 return false;
@@ -735,6 +797,9 @@ static bool rx_win_args_valid(enum vas_cop_type cop,
735 if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) 797 if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
736 return false; 798 return false;
737 799
800 if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
801 return false;
802
738 if (attr->nx_win) { 803 if (attr->nx_win) {
739 /* cannot be fault or user window if it is nx */ 804 /* cannot be fault or user window if it is nx */
740 if (attr->fault_win || attr->user_win) 805 if (attr->fault_win || attr->user_win)
@@ -835,6 +900,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
835 rxwin->nx_win = rxattr->nx_win; 900 rxwin->nx_win = rxattr->nx_win;
836 rxwin->user_win = rxattr->user_win; 901 rxwin->user_win = rxattr->user_win;
837 rxwin->cop = cop; 902 rxwin->cop = cop;
903 rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
838 if (rxattr->user_win) 904 if (rxattr->user_win)
839 rxwin->pid = task_pid_vnr(current); 905 rxwin->pid = task_pid_vnr(current);
840 906
@@ -884,21 +950,23 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
884 */ 950 */
885 memset(winctx, 0, sizeof(struct vas_winctx)); 951 memset(winctx, 0, sizeof(struct vas_winctx));
886 952
887 winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT; 953 winctx->wcreds_max = txwin->wcreds_max;
888 954
889 winctx->user_win = txattr->user_win; 955 winctx->user_win = txattr->user_win;
890 winctx->nx_win = txwin->rxwin->nx_win; 956 winctx->nx_win = txwin->rxwin->nx_win;
891 winctx->pin_win = txattr->pin_win; 957 winctx->pin_win = txattr->pin_win;
958 winctx->rej_no_credit = txattr->rej_no_credit;
959 winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;
892 960
893 winctx->rx_wcred_mode = txattr->rx_wcred_mode; 961 winctx->rx_wcred_mode = txattr->rx_wcred_mode;
894 winctx->tx_wcred_mode = txattr->tx_wcred_mode; 962 winctx->tx_wcred_mode = txattr->tx_wcred_mode;
895 winctx->rx_word_mode = txattr->rx_win_ord_mode; 963 winctx->rx_word_mode = txattr->rx_win_ord_mode;
896 winctx->tx_word_mode = txattr->tx_win_ord_mode; 964 winctx->tx_word_mode = txattr->tx_win_ord_mode;
965 winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;
897 966
898 if (winctx->nx_win) { 967 winctx->intr_disable = true;
968 if (winctx->nx_win)
899 winctx->data_stamp = true; 969 winctx->data_stamp = true;
900 winctx->intr_disable = true;
901 }
902 970
903 winctx->lpid = txattr->lpid; 971 winctx->lpid = txattr->lpid;
904 winctx->pidr = txattr->pidr; 972 winctx->pidr = txattr->pidr;
@@ -921,6 +989,9 @@ static bool tx_win_args_valid(enum vas_cop_type cop,
921 if (cop > VAS_COP_TYPE_MAX) 989 if (cop > VAS_COP_TYPE_MAX)
922 return false; 990 return false;
923 991
992 if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
993 return false;
994
924 if (attr->user_win && 995 if (attr->user_win &&
925 (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) 996 (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
926 return false; 997 return false;
@@ -940,6 +1011,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
940 if (!tx_win_args_valid(cop, attr)) 1011 if (!tx_win_args_valid(cop, attr))
941 return ERR_PTR(-EINVAL); 1012 return ERR_PTR(-EINVAL);
942 1013
1014 /*
1015 * If caller did not specify a vasid but specified the PSWID of a
1016 * receive window (applicable only to FTW windows), use the vasid
1017 * from that receive window.
1018 */
1019 if (vasid == -1 && attr->pswid)
1020 decode_pswid(attr->pswid, &vasid, NULL);
1021
943 vinst = find_vas_instance(vasid); 1022 vinst = find_vas_instance(vasid);
944 if (!vinst) { 1023 if (!vinst) {
945 pr_devel("vasid %d not found!\n", vasid); 1024 pr_devel("vasid %d not found!\n", vasid);
@@ -958,11 +1037,13 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
958 goto put_rxwin; 1037 goto put_rxwin;
959 } 1038 }
960 1039
1040 txwin->cop = cop;
961 txwin->tx_win = 1; 1041 txwin->tx_win = 1;
962 txwin->rxwin = rxwin; 1042 txwin->rxwin = rxwin;
963 txwin->nx_win = txwin->rxwin->nx_win; 1043 txwin->nx_win = txwin->rxwin->nx_win;
964 txwin->pid = attr->pid; 1044 txwin->pid = attr->pid;
965 txwin->user_win = attr->user_win; 1045 txwin->user_win = attr->user_win;
1046 txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
966 1047
967 init_winctx_for_txwin(txwin, attr, &winctx); 1048 init_winctx_for_txwin(txwin, attr, &winctx);
968 1049
@@ -984,6 +1065,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
984 } 1065 }
985 } 1066 }
986 1067
1068 /*
1069 * Now that we have a send window, ensure context switch issues
1070 * CP_ABORT for this thread.
1071 */
1072 rc = -EINVAL;
1073 if (set_thread_uses_vas() < 0)
1074 goto free_window;
1075
987 set_vinst_win(vinst, txwin); 1076 set_vinst_win(vinst, txwin);
988 1077
989 return txwin; 1078 return txwin;
@@ -1038,50 +1127,110 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
1038 else 1127 else
1039 rc = -EINVAL; 1128 rc = -EINVAL;
1040 1129
1041 print_fifo_msg_count(txwin); 1130 pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
1131 read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
1042 1132
1043 return rc; 1133 return rc;
1044} 1134}
1045EXPORT_SYMBOL_GPL(vas_paste_crb); 1135EXPORT_SYMBOL_GPL(vas_paste_crb);
1046 1136
1137/*
1138 * If credit checking is enabled for this window, poll for the return
1139 * of window credits (i.e for NX engines to process any outstanding CRBs).
1140 * Since NX-842 waits for the CRBs to be processed before closing the
1141 * window, we should not have to wait for too long.
1142 *
1143 * TODO: We retry in 10ms intervals now. We could/should probably peek at
1144 * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
1145 * CRBs on the FIFO and compute the delay dynamically on each retry.
1146 * But that is not really needed until we support NX-GZIP access from
1147 * user space. (NX-842 driver waits for CSB and Fast thread-wakeup
1148 * doesn't use credit checking).
1149 */
1150static void poll_window_credits(struct vas_window *window)
1151{
1152 u64 val;
1153 int creds, mode;
1154
1155 val = read_hvwc_reg(window, VREG(WINCTL));
1156 if (window->tx_win)
1157 mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
1158 else
1159 mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);
1160
1161 if (!mode)
1162 return;
1163retry:
1164 if (window->tx_win) {
1165 val = read_hvwc_reg(window, VREG(TX_WCRED));
1166 creds = GET_FIELD(VAS_TX_WCRED, val);
1167 } else {
1168 val = read_hvwc_reg(window, VREG(LRX_WCRED));
1169 creds = GET_FIELD(VAS_LRX_WCRED, val);
1170 }
1171
1172 if (creds < window->wcreds_max) {
1173 val = 0;
1174 set_current_state(TASK_UNINTERRUPTIBLE);
1175 schedule_timeout(msecs_to_jiffies(10));
1176 goto retry;
1177 }
1178}
1179
1180/*
1181 * Wait for the window to go to "not-busy" state. It should only take a
1182 * short time to queue a CRB, so window should not be busy for too long.
1183 * Trying 5ms intervals.
1184 */
1047static void poll_window_busy_state(struct vas_window *window) 1185static void poll_window_busy_state(struct vas_window *window)
1048{ 1186{
1049 int busy; 1187 int busy;
1050 u64 val; 1188 u64 val;
1051 1189
1052retry: 1190retry:
1053 /*
1054 * Poll Window Busy flag
1055 */
1056 val = read_hvwc_reg(window, VREG(WIN_STATUS)); 1191 val = read_hvwc_reg(window, VREG(WIN_STATUS));
1057 busy = GET_FIELD(VAS_WIN_BUSY, val); 1192 busy = GET_FIELD(VAS_WIN_BUSY, val);
1058 if (busy) { 1193 if (busy) {
1059 val = 0; 1194 val = 0;
1060 set_current_state(TASK_UNINTERRUPTIBLE); 1195 set_current_state(TASK_UNINTERRUPTIBLE);
1061 schedule_timeout(HZ); 1196 schedule_timeout(msecs_to_jiffies(5));
1062 goto retry; 1197 goto retry;
1063 } 1198 }
1064} 1199}
1065 1200
1201/*
1202 * Have the hardware cast a window out of cache and wait for it to
1203 * be completed.
1204 *
1205 * NOTE: It can take a relatively long time to cast the window context
1206 * out of the cache. It is not strictly necessary to cast out if:
1207 *
1208 * - we clear the "Pin Window" bit (so hardware is free to evict)
1209 *
1210 * - we re-initialize the window context when it is reassigned.
1211 *
1212 * We do the former in vas_win_close() and latter in vas_win_open().
1213 * So, ignoring the cast-out for now. We can add it as needed. If
1214 * casting out becomes necessary we should consider offloading the
1215 * job to a worker thread, so the window close can proceed quickly.
1216 */
1066static void poll_window_castout(struct vas_window *window) 1217static void poll_window_castout(struct vas_window *window)
1067{ 1218{
1068 int cached; 1219 /* stub for now */
1069 u64 val; 1220}
1070 1221
1071 /* Cast window context out of the cache */ 1222/*
1072retry: 1223 * Unpin and close a window so no new requests are accepted and the
1073 val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL)); 1224 * hardware can evict this window from cache if necessary.
1074 cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val); 1225 */
1075 if (cached) { 1226static void unpin_close_window(struct vas_window *window)
1076 val = 0ULL; 1227{
1077 val = SET_FIELD(VAS_CASTOUT_REQ, val, 1); 1228 u64 val;
1078 val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
1079 write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
1080 1229
1081 set_current_state(TASK_UNINTERRUPTIBLE); 1230 val = read_hvwc_reg(window, VREG(WINCTL));
1082 schedule_timeout(HZ); 1231 val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
1083 goto retry; 1232 val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
1084 } 1233 write_hvwc_reg(window, VREG(WINCTL), val);
1085} 1234}
1086 1235
1087/* 1236/*
@@ -1098,8 +1247,6 @@ retry:
1098 */ 1247 */
1099int vas_win_close(struct vas_window *window) 1248int vas_win_close(struct vas_window *window)
1100{ 1249{
1101 u64 val;
1102
1103 if (!window) 1250 if (!window)
1104 return 0; 1251 return 0;
1105 1252
@@ -1115,11 +1262,9 @@ int vas_win_close(struct vas_window *window)
1115 1262
1116 poll_window_busy_state(window); 1263 poll_window_busy_state(window);
1117 1264
1118 /* Unpin window from cache and close it */ 1265 unpin_close_window(window);
1119 val = read_hvwc_reg(window, VREG(WINCTL)); 1266
1120 val = SET_FIELD(VAS_WINCTL_PIN, val, 0); 1267 poll_window_credits(window);
1121 val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
1122 write_hvwc_reg(window, VREG(WINCTL), val);
1123 1268
1124 poll_window_castout(window); 1269 poll_window_castout(window);
1125 1270
@@ -1132,3 +1277,12 @@ int vas_win_close(struct vas_window *window)
1132 return 0; 1277 return 0;
1133} 1278}
1134EXPORT_SYMBOL_GPL(vas_win_close); 1279EXPORT_SYMBOL_GPL(vas_win_close);
1280
1281/*
1282 * Return a system-wide unique window id for the window @win.
1283 */
1284u32 vas_win_id(struct vas_window *win)
1285{
1286 return encode_pswid(win->vinst->vas_id, win->winid);
1287}
1288EXPORT_SYMBOL_GPL(vas_win_id);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index 565a4878fefa..c488621dbec3 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -18,15 +18,18 @@
18#include <linux/of_platform.h> 18#include <linux/of_platform.h>
19#include <linux/of_address.h> 19#include <linux/of_address.h>
20#include <linux/of.h> 20#include <linux/of.h>
21#include <asm/prom.h>
21 22
22#include "vas.h" 23#include "vas.h"
23 24
24static DEFINE_MUTEX(vas_mutex); 25DEFINE_MUTEX(vas_mutex);
25static LIST_HEAD(vas_instances); 26static LIST_HEAD(vas_instances);
26 27
28static DEFINE_PER_CPU(int, cpu_vas_id);
29
27static int init_vas_instance(struct platform_device *pdev) 30static int init_vas_instance(struct platform_device *pdev)
28{ 31{
29 int rc, vasid; 32 int rc, cpu, vasid;
30 struct resource *res; 33 struct resource *res;
31 struct vas_instance *vinst; 34 struct vas_instance *vinst;
32 struct device_node *dn = pdev->dev.of_node; 35 struct device_node *dn = pdev->dev.of_node;
@@ -74,10 +77,17 @@ static int init_vas_instance(struct platform_device *pdev)
74 "paste_win_id_shift 0x%llx\n", pdev->name, vasid, 77 "paste_win_id_shift 0x%llx\n", pdev->name, vasid,
75 vinst->paste_base_addr, vinst->paste_win_id_shift); 78 vinst->paste_base_addr, vinst->paste_win_id_shift);
76 79
80 for_each_possible_cpu(cpu) {
81 if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
82 per_cpu(cpu_vas_id, cpu) = vasid;
83 }
84
77 mutex_lock(&vas_mutex); 85 mutex_lock(&vas_mutex);
78 list_add(&vinst->node, &vas_instances); 86 list_add(&vinst->node, &vas_instances);
79 mutex_unlock(&vas_mutex); 87 mutex_unlock(&vas_mutex);
80 88
89 vas_instance_init_dbgdir(vinst);
90
81 dev_set_drvdata(&pdev->dev, vinst); 91 dev_set_drvdata(&pdev->dev, vinst);
82 92
83 return 0; 93 return 0;
@@ -98,6 +108,10 @@ struct vas_instance *find_vas_instance(int vasid)
98 struct vas_instance *vinst; 108 struct vas_instance *vinst;
99 109
100 mutex_lock(&vas_mutex); 110 mutex_lock(&vas_mutex);
111
112 if (vasid == -1)
113 vasid = per_cpu(cpu_vas_id, smp_processor_id());
114
101 list_for_each(ent, &vas_instances) { 115 list_for_each(ent, &vas_instances) {
102 vinst = list_entry(ent, struct vas_instance, node); 116 vinst = list_entry(ent, struct vas_instance, node);
103 if (vinst->vas_id == vasid) { 117 if (vinst->vas_id == vasid) {
@@ -111,6 +125,17 @@ struct vas_instance *find_vas_instance(int vasid)
111 return NULL; 125 return NULL;
112} 126}
113 127
128int chip_to_vas_id(int chipid)
129{
130 int cpu;
131
132 for_each_possible_cpu(cpu) {
133 if (cpu_to_chip_id(cpu) == chipid)
134 return per_cpu(cpu_vas_id, cpu);
135 }
136 return -1;
137}
138
114static int vas_probe(struct platform_device *pdev) 139static int vas_probe(struct platform_device *pdev)
115{ 140{
116 return init_vas_instance(pdev); 141 return init_vas_instance(pdev);
@@ -134,6 +159,8 @@ static int __init vas_init(void)
134 int found = 0; 159 int found = 0;
135 struct device_node *dn; 160 struct device_node *dn;
136 161
162 vas_init_dbgdir();
163
137 platform_driver_register(&vas_driver); 164 platform_driver_register(&vas_driver);
138 165
139 for_each_compatible_node(dn, NULL, "ibm,vas") { 166 for_each_compatible_node(dn, NULL, "ibm,vas") {
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index 38dee5d50f31..ae0100fd35bb 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -13,6 +13,8 @@
13#include <linux/idr.h> 13#include <linux/idr.h>
14#include <asm/vas.h> 14#include <asm/vas.h>
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/dcache.h>
17#include <linux/mutex.h>
16 18
17/* 19/*
18 * Overview of Virtual Accelerator Switchboard (VAS). 20 * Overview of Virtual Accelerator Switchboard (VAS).
@@ -106,8 +108,8 @@
106 * 108 *
107 * TODO: Needs tuning for per-process credits 109 * TODO: Needs tuning for per-process credits
108 */ 110 */
109#define VAS_WCREDS_MIN 16 111#define VAS_RX_WCREDS_MAX ((64 << 10) - 1)
110#define VAS_WCREDS_MAX ((64 << 10) - 1) 112#define VAS_TX_WCREDS_MAX ((4 << 10) - 1)
111#define VAS_WCREDS_DEFAULT (1 << 10) 113#define VAS_WCREDS_DEFAULT (1 << 10)
112 114
113/* 115/*
@@ -259,6 +261,16 @@
259#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63) 261#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63)
260 262
261/* 263/*
264 * VREG(x):
265 * Expand a register's short name (eg: LPID) into two parameters:
266 * - the register's short name in string form ("LPID"), and
267 * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
268 * register's offset in the window context
269 */
270#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
271#define VREG(r) VREG_SFX(r, _OFFSET)
272
273/*
262 * Local Notify Scope Control Register. (Receive windows only). 274 * Local Notify Scope Control Register. (Receive windows only).
263 */ 275 */
264enum vas_notify_scope { 276enum vas_notify_scope {
@@ -307,6 +319,9 @@ struct vas_instance {
307 struct mutex mutex; 319 struct mutex mutex;
308 struct vas_window *rxwin[VAS_COP_TYPE_MAX]; 320 struct vas_window *rxwin[VAS_COP_TYPE_MAX];
309 struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; 321 struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
322
323 char *dbgname;
324 struct dentry *dbgdir;
310}; 325};
311 326
312/* 327/*
@@ -322,6 +337,10 @@ struct vas_window {
322 void *hvwc_map; /* HV window context */ 337 void *hvwc_map; /* HV window context */
323 void *uwc_map; /* OS/User window context */ 338 void *uwc_map; /* OS/User window context */
324 pid_t pid; /* Linux process id of owner */ 339 pid_t pid; /* Linux process id of owner */
340 int wcreds_max; /* Window credits */
341
342 char *dbgname;
343 struct dentry *dbgdir;
325 344
326 /* Fields applicable only to send windows */ 345 /* Fields applicable only to send windows */
327 void *paste_kaddr; 346 void *paste_kaddr;
@@ -383,45 +402,23 @@ struct vas_winctx {
383 enum vas_notify_after_count notify_after_count; 402 enum vas_notify_after_count notify_after_count;
384}; 403};
385 404
386extern struct vas_instance *find_vas_instance(int vasid); 405extern struct mutex vas_mutex;
387 406
388/* 407extern struct vas_instance *find_vas_instance(int vasid);
389 * VREG(x): 408extern void vas_init_dbgdir(void);
390 * Expand a register's short name (eg: LPID) into two parameters: 409extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
391 * - the register's short name in string form ("LPID"), and 410extern void vas_window_init_dbgdir(struct vas_window *win);
392 * - the name of the macro (eg: VAS_LPID_OFFSET), defining the 411extern void vas_window_free_dbgdir(struct vas_window *win);
393 * register's offset in the window context
394 */
395#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
396#define VREG(r) VREG_SFX(r, _OFFSET)
397
398#ifdef vas_debug
399static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
400{
401 pr_err("fault %d, notify %d, intr %d early %d\n",
402 attr->fault_win, attr->notify_disable,
403 attr->intr_disable, attr->notify_early);
404
405 pr_err("rx_fifo_size %d, max value %d\n",
406 attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX);
407}
408 412
409static inline void vas_log_write(struct vas_window *win, char *name, 413static inline void vas_log_write(struct vas_window *win, char *name,
410 void *regptr, u64 val) 414 void *regptr, u64 val)
411{ 415{
412 if (val) 416 if (val)
413 pr_err("%swin #%d: %s reg %p, val 0x%016llx\n", 417 pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
414 win->tx_win ? "Tx" : "Rx", win->winid, name, 418 win->tx_win ? "Tx" : "Rx", win->winid, name,
415 regptr, val); 419 regptr, val);
416} 420}
417 421
418#else /* vas_debug */
419
420#define vas_log_write(win, name, reg, val)
421#define dump_rx_win_attr(attr)
422
423#endif /* vas_debug */
424
425static inline void write_uwc_reg(struct vas_window *win, char *name, 422static inline void write_uwc_reg(struct vas_window *win, char *name,
426 s32 reg, u64 val) 423 s32 reg, u64 val)
427{ 424{
@@ -450,18 +447,32 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
450 return in_be64(win->hvwc_map+reg); 447 return in_be64(win->hvwc_map+reg);
451} 448}
452 449
453#ifdef vas_debug 450/*
454 451 * Encode/decode the Partition Send Window ID (PSWID) for a window in
455static void print_fifo_msg_count(struct vas_window *txwin) 452 * a way that we can uniquely identify any window in the system. i.e.
453 * we should be able to locate the 'struct vas_window' given the PSWID.
454 *
455 * Bits Usage
456 * 0:7 VAS id (8 bits)
457 * 8:15 Unused, 0 (3 bits)
458 * 16:31 Window id (16 bits)
459 */
460static inline u32 encode_pswid(int vasid, int winid)
456{ 461{
457 uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o); 462 u32 pswid = 0;
458 pr_devel("Winid %d, Msg count %llu\n", txwin->winid,
459 (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
460}
461#else /* vas_debug */
462 463
463#define print_fifo_msg_count(window) 464 pswid |= vasid << (31 - 7);
465 pswid |= winid;
464 466
465#endif /* vas_debug */ 467 return pswid;
468}
469
470static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
471{
472 if (vasid)
473 *vasid = pswid >> (31 - 7) & 0xFF;
466 474
475 if (winid)
476 *winid = pswid & 0xFFFF;
477}
467#endif /* _VAS_H */ 478#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fadb95efbb9e..a7d14aa7bb7c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -363,6 +363,7 @@ static int dlpar_online_cpu(struct device_node *dn)
363 BUG_ON(get_cpu_current_state(cpu) 363 BUG_ON(get_cpu_current_state(cpu)
364 != CPU_STATE_OFFLINE); 364 != CPU_STATE_OFFLINE);
365 cpu_maps_update_done(); 365 cpu_maps_update_done();
366 timed_topology_update(1);
366 rc = device_online(get_cpu_device(cpu)); 367 rc = device_online(get_cpu_device(cpu));
367 if (rc) 368 if (rc)
368 goto out; 369 goto out;
@@ -533,6 +534,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
533 set_preferred_offline_state(cpu, 534 set_preferred_offline_state(cpu,
534 CPU_STATE_OFFLINE); 535 CPU_STATE_OFFLINE);
535 cpu_maps_update_done(); 536 cpu_maps_update_done();
537 timed_topology_update(1);
536 rc = device_offline(get_cpu_device(cpu)); 538 rc = device_offline(get_cpu_device(cpu));
537 if (rc) 539 if (rc)
538 goto out; 540 goto out;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 7c181467d0ad..69921f72e2da 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -55,23 +55,23 @@
55 55
56static struct iommu_table_group *iommu_pseries_alloc_group(int node) 56static struct iommu_table_group *iommu_pseries_alloc_group(int node)
57{ 57{
58 struct iommu_table_group *table_group = NULL; 58 struct iommu_table_group *table_group;
59 struct iommu_table *tbl = NULL; 59 struct iommu_table *tbl;
60 struct iommu_table_group_link *tgl = NULL; 60 struct iommu_table_group_link *tgl;
61 61
62 table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, 62 table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
63 node); 63 node);
64 if (!table_group) 64 if (!table_group)
65 goto fail_exit; 65 return NULL;
66 66
67 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); 67 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
68 if (!tbl) 68 if (!tbl)
69 goto fail_exit; 69 goto free_group;
70 70
71 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, 71 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
72 node); 72 node);
73 if (!tgl) 73 if (!tgl)
74 goto fail_exit; 74 goto free_table;
75 75
76 INIT_LIST_HEAD_RCU(&tbl->it_group_list); 76 INIT_LIST_HEAD_RCU(&tbl->it_group_list);
77 kref_init(&tbl->it_kref); 77 kref_init(&tbl->it_kref);
@@ -82,11 +82,10 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
82 82
83 return table_group; 83 return table_group;
84 84
85fail_exit: 85free_table:
86 kfree(tgl);
87 kfree(table_group);
88 kfree(tbl); 86 kfree(tbl);
89 87free_group:
88 kfree(table_group);
90 return NULL; 89 return NULL;
91} 90}
92 91
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 495ba4e7336d..0ee4a469a4ae 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -93,7 +93,7 @@ void vpa_init(int cpu)
93 return; 93 return;
94 } 94 }
95 95
96#ifdef CONFIG_PPC_STD_MMU_64 96#ifdef CONFIG_PPC_BOOK3S_64
97 /* 97 /*
98 * PAPR says this feature is SLB-Buffer but firmware never 98 * PAPR says this feature is SLB-Buffer but firmware never
99 * reports that. All SPLPAR support SLB shadow buffer. 99 * reports that. All SPLPAR support SLB shadow buffer.
@@ -106,7 +106,7 @@ void vpa_init(int cpu)
106 "cpu %d (hw %d) of area %lx failed with %ld\n", 106 "cpu %d (hw %d) of area %lx failed with %ld\n",
107 cpu, hwcpu, addr, ret); 107 cpu, hwcpu, addr, ret);
108 } 108 }
109#endif /* CONFIG_PPC_STD_MMU_64 */ 109#endif /* CONFIG_PPC_BOOK3S_64 */
110 110
111 /* 111 /*
112 * Register dispatch trace log, if one has been allocated. 112 * Register dispatch trace log, if one has been allocated.
@@ -129,7 +129,7 @@ void vpa_init(int cpu)
129 } 129 }
130} 130}
131 131
132#ifdef CONFIG_PPC_STD_MMU_64 132#ifdef CONFIG_PPC_BOOK3S_64
133 133
134static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 134static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
135 unsigned long vpn, unsigned long pa, 135 unsigned long vpn, unsigned long pa,
@@ -824,7 +824,7 @@ void arch_free_page(struct page *page, int order)
824EXPORT_SYMBOL(arch_free_page); 824EXPORT_SYMBOL(arch_free_page);
825 825
826#endif /* CONFIG_PPC_SMLPAR */ 826#endif /* CONFIG_PPC_SMLPAR */
827#endif /* CONFIG_PPC_STD_MMU_64 */ 827#endif /* CONFIG_PPC_BOOK3S_64 */
828 828
829#ifdef CONFIG_TRACEPOINTS 829#ifdef CONFIG_TRACEPOINTS
830#ifdef HAVE_JUMP_LABEL 830#ifdef HAVE_JUMP_LABEL
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 779fc2a1c8f7..b2706c483067 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -485,7 +485,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
485 seq_printf(m, "shared_processor_mode=%d\n", 485 seq_printf(m, "shared_processor_mode=%d\n",
486 lppaca_shared_proc(get_lppaca())); 486 lppaca_shared_proc(get_lppaca()));
487 487
488#ifdef CONFIG_PPC_STD_MMU_64 488#ifdef CONFIG_PPC_BOOK3S_64
489 seq_printf(m, "slb_size=%d\n", mmu_slb_size); 489 seq_printf(m, "slb_size=%d\n", mmu_slb_size);
490#endif 490#endif
491 parse_em_data(m); 491 parse_em_data(m);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 12277bc9fd9e..d86938260a86 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1592,6 +1592,8 @@ ATTRIBUTE_GROUPS(vio_dev);
1592void vio_unregister_device(struct vio_dev *viodev) 1592void vio_unregister_device(struct vio_dev *viodev)
1593{ 1593{
1594 device_unregister(&viodev->dev); 1594 device_unregister(&viodev->dev);
1595 if (viodev->family == VDEVICE)
1596 irq_dispose_mapping(viodev->irq);
1595} 1597}
1596EXPORT_SYMBOL(vio_unregister_device); 1598EXPORT_SYMBOL(vio_unregister_device);
1597 1599
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index c60e84e4558d..1b307c80b401 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -184,7 +184,7 @@ static int axon_ram_probe(struct platform_device *device)
184 static int axon_ram_bank_id = -1; 184 static int axon_ram_bank_id = -1;
185 struct axon_ram_bank *bank; 185 struct axon_ram_bank *bank;
186 struct resource resource; 186 struct resource resource;
187 int rc = 0; 187 int rc;
188 188
189 axon_ram_bank_id++; 189 axon_ram_bank_id++;
190 190
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 16f1edd78c40..535cf1f6941c 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -846,12 +846,12 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
846 846
847u32 ipic_get_mcp_status(void) 847u32 ipic_get_mcp_status(void)
848{ 848{
849 return ipic_read(primary_ipic->regs, IPIC_SERMR); 849 return ipic_read(primary_ipic->regs, IPIC_SERSR);
850} 850}
851 851
852void ipic_clear_mcp_status(u32 mask) 852void ipic_clear_mcp_status(u32 mask)
853{ 853{
854 ipic_write(primary_ipic->regs, IPIC_SERMR, mask); 854 ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
855} 855}
856 856
857/* Return an interrupt vector or 0 if no interrupt is pending. */ 857/* Return an interrupt vector or 0 if no interrupt is pending. */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 33351c6704b1..1b2d8cb49abb 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -28,6 +28,7 @@
28#include <linux/bug.h> 28#include <linux/bug.h>
29#include <linux/nmi.h> 29#include <linux/nmi.h>
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31#include <linux/highmem.h>
31 32
32#include <asm/debugfs.h> 33#include <asm/debugfs.h>
33#include <asm/ptrace.h> 34#include <asm/ptrace.h>
@@ -127,6 +128,7 @@ static void byterev(unsigned char *, int);
127static void memex(void); 128static void memex(void);
128static int bsesc(void); 129static int bsesc(void);
129static void dump(void); 130static void dump(void);
131static void show_pte(unsigned long);
130static void prdump(unsigned long, long); 132static void prdump(unsigned long, long);
131static int ppc_inst_dump(unsigned long, long, int); 133static int ppc_inst_dump(unsigned long, long, int);
132static void dump_log_buf(void); 134static void dump_log_buf(void);
@@ -234,6 +236,7 @@ Commands:\n\
234#endif 236#endif
235 "\ 237 "\
236 dr dump stream of raw bytes\n\ 238 dr dump stream of raw bytes\n\
239 dv dump virtual address translation \n\
237 dt dump the tracing buffers (uses printk)\n\ 240 dt dump the tracing buffers (uses printk)\n\
238 dtc dump the tracing buffers for current CPU (uses printk)\n\ 241 dtc dump the tracing buffers for current CPU (uses printk)\n\
239" 242"
@@ -278,6 +281,7 @@ Commands:\n\
278#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E) 281#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E)
279" u dump TLB\n" 282" u dump TLB\n"
280#endif 283#endif
284" U show uptime information\n"
281" ? help\n" 285" ? help\n"
282" # n limit output to n lines per page (for dp, dpa, dl)\n" 286" # n limit output to n lines per page (for dp, dpa, dl)\n"
283" zr reboot\n\ 287" zr reboot\n\
@@ -530,14 +534,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
530 534
531 waiting: 535 waiting:
532 secondary = 1; 536 secondary = 1;
537 spin_begin();
533 while (secondary && !xmon_gate) { 538 while (secondary && !xmon_gate) {
534 if (in_xmon == 0) { 539 if (in_xmon == 0) {
535 if (fromipi) 540 if (fromipi) {
541 spin_end();
536 goto leave; 542 goto leave;
543 }
537 secondary = test_and_set_bit(0, &in_xmon); 544 secondary = test_and_set_bit(0, &in_xmon);
538 } 545 }
539 barrier(); 546 spin_cpu_relax();
547 touch_nmi_watchdog();
540 } 548 }
549 spin_end();
541 550
542 if (!secondary && !xmon_gate) { 551 if (!secondary && !xmon_gate) {
543 /* we are the first cpu to come in */ 552 /* we are the first cpu to come in */
@@ -568,21 +577,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
568 mb(); 577 mb();
569 xmon_gate = 1; 578 xmon_gate = 1;
570 barrier(); 579 barrier();
580 touch_nmi_watchdog();
571 } 581 }
572 582
573 cmdloop: 583 cmdloop:
574 while (in_xmon) { 584 while (in_xmon) {
575 if (secondary) { 585 if (secondary) {
586 spin_begin();
576 if (cpu == xmon_owner) { 587 if (cpu == xmon_owner) {
577 if (!test_and_set_bit(0, &xmon_taken)) { 588 if (!test_and_set_bit(0, &xmon_taken)) {
578 secondary = 0; 589 secondary = 0;
590 spin_end();
579 continue; 591 continue;
580 } 592 }
581 /* missed it */ 593 /* missed it */
582 while (cpu == xmon_owner) 594 while (cpu == xmon_owner)
583 barrier(); 595 spin_cpu_relax();
584 } 596 }
585 barrier(); 597 spin_cpu_relax();
598 touch_nmi_watchdog();
586 } else { 599 } else {
587 cmd = cmds(regs); 600 cmd = cmds(regs);
588 if (cmd != 0) { 601 if (cmd != 0) {
@@ -896,6 +909,26 @@ static void remove_cpu_bpts(void)
896 write_ciabr(0); 909 write_ciabr(0);
897} 910}
898 911
912/* Based on uptime_proc_show(). */
913static void
914show_uptime(void)
915{
916 struct timespec uptime;
917
918 if (setjmp(bus_error_jmp) == 0) {
919 catch_memory_errors = 1;
920 sync();
921
922 get_monotonic_boottime(&uptime);
923 printf("Uptime: %lu.%.2lu seconds\n", (unsigned long)uptime.tv_sec,
924 ((unsigned long)uptime.tv_nsec / (NSEC_PER_SEC/100)));
925
926 sync();
927 __delay(200); \
928 }
929 catch_memory_errors = 0;
930}
931
899static void set_lpp_cmd(void) 932static void set_lpp_cmd(void)
900{ 933{
901 unsigned long lpp; 934 unsigned long lpp;
@@ -1031,6 +1064,9 @@ cmds(struct pt_regs *excp)
1031 dump_tlb_book3e(); 1064 dump_tlb_book3e();
1032 break; 1065 break;
1033#endif 1066#endif
1067 case 'U':
1068 show_uptime();
1069 break;
1034 default: 1070 default:
1035 printf("Unrecognized command: "); 1071 printf("Unrecognized command: ");
1036 do { 1072 do {
@@ -2279,7 +2315,7 @@ static void dump_tracing(void)
2279static void dump_one_paca(int cpu) 2315static void dump_one_paca(int cpu)
2280{ 2316{
2281 struct paca_struct *p; 2317 struct paca_struct *p;
2282#ifdef CONFIG_PPC_STD_MMU_64 2318#ifdef CONFIG_PPC_BOOK3S_64
2283 int i = 0; 2319 int i = 0;
2284#endif 2320#endif
2285 2321
@@ -2320,7 +2356,7 @@ static void dump_one_paca(int cpu)
2320 DUMP(p, hw_cpu_id, "x"); 2356 DUMP(p, hw_cpu_id, "x");
2321 DUMP(p, cpu_start, "x"); 2357 DUMP(p, cpu_start, "x");
2322 DUMP(p, kexec_state, "x"); 2358 DUMP(p, kexec_state, "x");
2323#ifdef CONFIG_PPC_STD_MMU_64 2359#ifdef CONFIG_PPC_BOOK3S_64
2324 for (i = 0; i < SLB_NUM_BOLTED; i++) { 2360 for (i = 0; i < SLB_NUM_BOLTED; i++) {
2325 u64 esid, vsid; 2361 u64 esid, vsid;
2326 2362
@@ -2351,6 +2387,7 @@ static void dump_one_paca(int cpu)
2351#endif 2387#endif
2352 DUMP(p, __current, "p"); 2388 DUMP(p, __current, "p");
2353 DUMP(p, kstack, "lx"); 2389 DUMP(p, kstack, "lx");
2390 printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
2354 DUMP(p, stab_rr, "lx"); 2391 DUMP(p, stab_rr, "lx");
2355 DUMP(p, saved_r1, "lx"); 2392 DUMP(p, saved_r1, "lx");
2356 DUMP(p, trap_save, "x"); 2393 DUMP(p, trap_save, "x");
@@ -2475,6 +2512,11 @@ static void dump_xives(void)
2475 unsigned long num; 2512 unsigned long num;
2476 int c; 2513 int c;
2477 2514
2515 if (!xive_enabled()) {
2516 printf("Xive disabled on this system\n");
2517 return;
2518 }
2519
2478 c = inchar(); 2520 c = inchar();
2479 if (c == 'a') { 2521 if (c == 'a') {
2480 dump_all_xives(); 2522 dump_all_xives();
@@ -2574,6 +2616,9 @@ dump(void)
2574 dump_log_buf(); 2616 dump_log_buf();
2575 } else if (c == 'o') { 2617 } else if (c == 'o') {
2576 dump_opal_msglog(); 2618 dump_opal_msglog();
2619 } else if (c == 'v') {
2620 /* dump virtual to physical translation */
2621 show_pte(adrs);
2577 } else if (c == 'r') { 2622 } else if (c == 'r') {
2578 scanhex(&ndump); 2623 scanhex(&ndump);
2579 if (ndump == 0) 2624 if (ndump == 0)
@@ -2907,6 +2952,116 @@ static void show_task(struct task_struct *tsk)
2907 tsk->comm); 2952 tsk->comm);
2908} 2953}
2909 2954
2955#ifdef CONFIG_PPC_BOOK3S_64
2956void format_pte(void *ptep, unsigned long pte)
2957{
2958 printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte);
2959 printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK);
2960
2961 printf("Flags = %s%s%s%s%s\n",
2962 (pte & _PAGE_ACCESSED) ? "Accessed " : "",
2963 (pte & _PAGE_DIRTY) ? "Dirty " : "",
2964 (pte & _PAGE_READ) ? "Read " : "",
2965 (pte & _PAGE_WRITE) ? "Write " : "",
2966 (pte & _PAGE_EXEC) ? "Exec " : "");
2967}
2968
2969static void show_pte(unsigned long addr)
2970{
2971 unsigned long tskv = 0;
2972 struct task_struct *tsk = NULL;
2973 struct mm_struct *mm;
2974 pgd_t *pgdp, *pgdir;
2975 pud_t *pudp;
2976 pmd_t *pmdp;
2977 pte_t *ptep;
2978
2979 if (!scanhex(&tskv))
2980 mm = &init_mm;
2981 else
2982 tsk = (struct task_struct *)tskv;
2983
2984 if (tsk == NULL)
2985 mm = &init_mm;
2986 else
2987 mm = tsk->active_mm;
2988
2989 if (setjmp(bus_error_jmp) != 0) {
2990 catch_memory_errors = 0;
2991 printf("*** Error dumping pte for task %p\n", tsk);
2992 return;
2993 }
2994
2995 catch_memory_errors = 1;
2996 sync();
2997
2998 if (mm == &init_mm) {
2999 pgdp = pgd_offset_k(addr);
3000 pgdir = pgd_offset_k(0);
3001 } else {
3002 pgdp = pgd_offset(mm, addr);
3003 pgdir = pgd_offset(mm, 0);
3004 }
3005
3006 if (pgd_none(*pgdp)) {
3007 printf("no linux page table for address\n");
3008 return;
3009 }
3010
3011 printf("pgd @ 0x%016lx\n", pgdir);
3012
3013 if (pgd_huge(*pgdp)) {
3014 format_pte(pgdp, pgd_val(*pgdp));
3015 return;
3016 }
3017 printf("pgdp @ 0x%016lx = 0x%016lx\n", pgdp, pgd_val(*pgdp));
3018
3019 pudp = pud_offset(pgdp, addr);
3020
3021 if (pud_none(*pudp)) {
3022 printf("No valid PUD\n");
3023 return;
3024 }
3025
3026 if (pud_huge(*pudp)) {
3027 format_pte(pudp, pud_val(*pudp));
3028 return;
3029 }
3030
3031 printf("pudp @ 0x%016lx = 0x%016lx\n", pudp, pud_val(*pudp));
3032
3033 pmdp = pmd_offset(pudp, addr);
3034
3035 if (pmd_none(*pmdp)) {
3036 printf("No valid PMD\n");
3037 return;
3038 }
3039
3040 if (pmd_huge(*pmdp)) {
3041 format_pte(pmdp, pmd_val(*pmdp));
3042 return;
3043 }
3044 printf("pmdp @ 0x%016lx = 0x%016lx\n", pmdp, pmd_val(*pmdp));
3045
3046 ptep = pte_offset_map(pmdp, addr);
3047 if (pte_none(*ptep)) {
3048 printf("no valid PTE\n");
3049 return;
3050 }
3051
3052 format_pte(ptep, pte_val(*ptep));
3053
3054 sync();
3055 __delay(200);
3056 catch_memory_errors = 0;
3057}
3058#else
3059static void show_pte(unsigned long addr)
3060{
3061 printf("show_pte not yet implemented\n");
3062}
3063#endif /* CONFIG_PPC_BOOK3S_64 */
3064
2910static void show_tasks(void) 3065static void show_tasks(void)
2911{ 3066{
2912 unsigned long tskv; 3067 unsigned long tskv;
@@ -3224,7 +3379,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
3224 printf("%s", after); 3379 printf("%s", after);
3225} 3380}
3226 3381
3227#ifdef CONFIG_PPC_STD_MMU_64 3382#ifdef CONFIG_PPC_BOOK3S_64
3228void dump_segments(void) 3383void dump_segments(void)
3229{ 3384{
3230 int i; 3385 int i;
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index ed6531f075c6..e06605b21841 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -384,9 +384,9 @@ static int powernv_add_idle_states(void)
384 * Firmware passes residency and latency values in ns. 384 * Firmware passes residency and latency values in ns.
385 * cpuidle expects it in us. 385 * cpuidle expects it in us.
386 */ 386 */
387 exit_latency = latency_ns[i] / 1000; 387 exit_latency = DIV_ROUND_UP(latency_ns[i], 1000);
388 if (!rc) 388 if (!rc)
389 target_residency = residency_ns[i] / 1000; 389 target_residency = DIV_ROUND_UP(residency_ns[i], 1000);
390 else 390 else
391 target_residency = 0; 391 target_residency = 0;
392 392
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index 0f20f5ec9617..f2246a5abcf6 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -46,7 +46,6 @@ struct nx842_workmem {
46 46
47 ktime_t start; 47 ktime_t start;
48 48
49 struct vas_window *txwin; /* Used with VAS function */
50 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ 49 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
51} __packed __aligned(WORKMEM_ALIGN); 50} __packed __aligned(WORKMEM_ALIGN);
52 51
@@ -65,7 +64,7 @@ struct nx842_coproc {
65 * Send the request to NX engine on the chip for the corresponding CPU 64 * Send the request to NX engine on the chip for the corresponding CPU
66 * where the process is executing. Use with VAS function. 65 * where the process is executing. Use with VAS function.
67 */ 66 */
68static DEFINE_PER_CPU(struct nx842_coproc *, coproc_inst); 67static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
69 68
70/* no cpu hotplug on powernv, so this list never changes after init */ 69/* no cpu hotplug on powernv, so this list never changes after init */
71static LIST_HEAD(nx842_coprocs); 70static LIST_HEAD(nx842_coprocs);
@@ -586,16 +585,11 @@ static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
586 ccw = SET_FIELD(CCW_FC_842, ccw, fc); 585 ccw = SET_FIELD(CCW_FC_842, ccw, fc);
587 crb->ccw = cpu_to_be32(ccw); 586 crb->ccw = cpu_to_be32(ccw);
588 587
589 txwin = wmem->txwin;
590 /* shoudn't happen, we don't load without a coproc */
591 if (!txwin) {
592 pr_err_ratelimited("NX-842 coprocessor is not available");
593 return -ENODEV;
594 }
595
596 do { 588 do {
597 wmem->start = ktime_get(); 589 wmem->start = ktime_get();
598 preempt_disable(); 590 preempt_disable();
591 txwin = this_cpu_read(cpu_txwin);
592
599 /* 593 /*
600 * VAS copy CRB into L2 cache. Refer <asm/vas.h>. 594 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
601 * @crb and @offset. 595 * @crb and @offset.
@@ -689,25 +683,6 @@ static inline void nx842_add_coprocs_list(struct nx842_coproc *coproc,
689 list_add(&coproc->list, &nx842_coprocs); 683 list_add(&coproc->list, &nx842_coprocs);
690} 684}
691 685
692/*
693 * Identify chip ID for each CPU and save coprocesor adddress for the
694 * corresponding NX engine in percpu coproc_inst.
695 * coproc_inst is used in crypto_init to open send window on the NX instance
696 * for the corresponding CPU / chip where the open request is executed.
697 */
698static void nx842_set_per_cpu_coproc(struct nx842_coproc *coproc)
699{
700 unsigned int i, chip_id;
701
702 for_each_possible_cpu(i) {
703 chip_id = cpu_to_chip_id(i);
704
705 if (coproc->chip_id == chip_id)
706 per_cpu(coproc_inst, i) = coproc;
707 }
708}
709
710
711static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc) 686static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc)
712{ 687{
713 struct vas_window *txwin = NULL; 688 struct vas_window *txwin = NULL;
@@ -725,15 +700,58 @@ static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc)
725 * Open a VAS send window which is used to send request to NX. 700 * Open a VAS send window which is used to send request to NX.
726 */ 701 */
727 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr); 702 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
728 if (IS_ERR(txwin)) { 703 if (IS_ERR(txwin))
729 pr_err("ibm,nx-842: Can not open TX window: %ld\n", 704 pr_err("ibm,nx-842: Can not open TX window: %ld\n",
730 PTR_ERR(txwin)); 705 PTR_ERR(txwin));
731 return NULL;
732 }
733 706
734 return txwin; 707 return txwin;
735} 708}
736 709
710/*
711 * Identify chip ID for each CPU, open send wndow for the corresponding NX
712 * engine and save txwin in percpu cpu_txwin.
713 * cpu_txwin is used in copy/paste operation for each compression /
714 * decompression request.
715 */
716static int nx842_open_percpu_txwins(void)
717{
718 struct nx842_coproc *coproc, *n;
719 unsigned int i, chip_id;
720
721 for_each_possible_cpu(i) {
722 struct vas_window *txwin = NULL;
723
724 chip_id = cpu_to_chip_id(i);
725
726 list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
727 /*
728 * Kernel requests use only high priority FIFOs. So
729 * open send windows for these FIFOs.
730 */
731
732 if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
733 continue;
734
735 if (coproc->chip_id == chip_id) {
736 txwin = nx842_alloc_txwin(coproc);
737 if (IS_ERR(txwin))
738 return PTR_ERR(txwin);
739
740 per_cpu(cpu_txwin, i) = txwin;
741 break;
742 }
743 }
744
745 if (!per_cpu(cpu_txwin, i)) {
746 /* shoudn't happen, Each chip will have NX engine */
747 pr_err("NX engine is not availavle for CPU %d\n", i);
748 return -EINVAL;
749 }
750 }
751
752 return 0;
753}
754
737static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, 755static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
738 int vasid) 756 int vasid)
739{ 757{
@@ -819,14 +837,6 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
819 coproc->vas.id = vasid; 837 coproc->vas.id = vasid;
820 nx842_add_coprocs_list(coproc, chip_id); 838 nx842_add_coprocs_list(coproc, chip_id);
821 839
822 /*
823 * Kernel requests use only high priority FIFOs. So save coproc
824 * info in percpu coproc_inst which will be used to open send
825 * windows for crypto open requests later.
826 */
827 if (coproc->ct == VAS_COP_TYPE_842_HIPRI)
828 nx842_set_per_cpu_coproc(coproc);
829
830 return 0; 840 return 0;
831 841
832err_out: 842err_out:
@@ -847,24 +857,12 @@ static int __init nx842_powernv_probe_vas(struct device_node *pn)
847 return -EINVAL; 857 return -EINVAL;
848 } 858 }
849 859
850 for_each_compatible_node(dn, NULL, "ibm,power9-vas-x") { 860 vasid = chip_to_vas_id(chip_id);
851 if (of_get_ibm_chip_id(dn) == chip_id) 861 if (vasid < 0) {
852 break; 862 pr_err("Unable to map chip_id %d to vasid\n", chip_id);
853 }
854
855 if (!dn) {
856 pr_err("Missing VAS device node\n");
857 return -EINVAL; 863 return -EINVAL;
858 } 864 }
859 865
860 if (of_property_read_u32(dn, "ibm,vas-id", &vasid)) {
861 pr_err("Missing ibm,vas-id device property\n");
862 of_node_put(dn);
863 return -EINVAL;
864 }
865
866 of_node_put(dn);
867
868 for_each_child_of_node(pn, dn) { 866 for_each_child_of_node(pn, dn) {
869 if (of_device_is_compatible(dn, "ibm,p9-nx-842")) { 867 if (of_device_is_compatible(dn, "ibm,p9-nx-842")) {
870 ret = vas_cfg_coproc_info(dn, chip_id, vasid); 868 ret = vas_cfg_coproc_info(dn, chip_id, vasid);
@@ -928,6 +926,19 @@ static int __init nx842_powernv_probe(struct device_node *dn)
928static void nx842_delete_coprocs(void) 926static void nx842_delete_coprocs(void)
929{ 927{
930 struct nx842_coproc *coproc, *n; 928 struct nx842_coproc *coproc, *n;
929 struct vas_window *txwin;
930 int i;
931
932 /*
933 * close percpu txwins that are opened for the corresponding coproc.
934 */
935 for_each_possible_cpu(i) {
936 txwin = per_cpu(cpu_txwin, i);
937 if (txwin)
938 vas_win_close(txwin);
939
940 per_cpu(cpu_txwin, i) = 0;
941 }
931 942
932 list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) { 943 list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
933 if (coproc->vas.rxwin) 944 if (coproc->vas.rxwin)
@@ -954,46 +965,6 @@ static struct nx842_driver nx842_powernv_driver = {
954 .decompress = nx842_powernv_decompress, 965 .decompress = nx842_powernv_decompress,
955}; 966};
956 967
957static int nx842_powernv_crypto_init_vas(struct crypto_tfm *tfm)
958{
959 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
960 struct nx842_workmem *wmem;
961 struct nx842_coproc *coproc;
962 int ret;
963
964 ret = nx842_crypto_init(tfm, &nx842_powernv_driver);
965
966 if (ret)
967 return ret;
968
969 wmem = PTR_ALIGN((struct nx842_workmem *)ctx->wmem, WORKMEM_ALIGN);
970 coproc = per_cpu(coproc_inst, smp_processor_id());
971
972 ret = -EINVAL;
973 if (coproc && coproc->vas.rxwin) {
974 wmem->txwin = nx842_alloc_txwin(coproc);
975 if (!IS_ERR(wmem->txwin))
976 return 0;
977
978 ret = PTR_ERR(wmem->txwin);
979 }
980
981 return ret;
982}
983
984void nx842_powernv_crypto_exit_vas(struct crypto_tfm *tfm)
985{
986 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
987 struct nx842_workmem *wmem;
988
989 wmem = PTR_ALIGN((struct nx842_workmem *)ctx->wmem, WORKMEM_ALIGN);
990
991 if (wmem && wmem->txwin)
992 vas_win_close(wmem->txwin);
993
994 nx842_crypto_exit(tfm);
995}
996
997static int nx842_powernv_crypto_init(struct crypto_tfm *tfm) 968static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
998{ 969{
999 return nx842_crypto_init(tfm, &nx842_powernv_driver); 970 return nx842_crypto_init(tfm, &nx842_powernv_driver);
@@ -1044,9 +1015,13 @@ static __init int nx842_powernv_init(void)
1044 1015
1045 nx842_powernv_exec = nx842_exec_icswx; 1016 nx842_powernv_exec = nx842_exec_icswx;
1046 } else { 1017 } else {
1018 ret = nx842_open_percpu_txwins();
1019 if (ret) {
1020 nx842_delete_coprocs();
1021 return ret;
1022 }
1023
1047 nx842_powernv_exec = nx842_exec_vas; 1024 nx842_powernv_exec = nx842_exec_vas;
1048 nx842_powernv_alg.cra_init = nx842_powernv_crypto_init_vas;
1049 nx842_powernv_alg.cra_exit = nx842_powernv_crypto_exit_vas;
1050 } 1025 }
1051 1026
1052 ret = crypto_register_alg(&nx842_powernv_alg); 1027 ret = crypto_register_alg(&nx842_powernv_alg);
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index da3cb8c35ec7..d94e25df503b 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -116,7 +116,7 @@ int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
116 116
117 spin_lock_init(&ctx->lock); 117 spin_lock_init(&ctx->lock);
118 ctx->driver = driver; 118 ctx->driver = driver;
119 ctx->wmem = kzalloc(driver->workmem_size, GFP_KERNEL); 119 ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
120 ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); 120 ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
121 ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); 121 ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
122 if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { 122 if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index a0c44d16bf30..7c11bad5cded 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/sched/mm.h> 17#include <linux/sched/mm.h>
18#include <linux/mmu_context.h>
18 19
19#include "cxl.h" 20#include "cxl.h"
20 21
@@ -331,9 +332,12 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
331 /* ensure this mm_struct can't be freed */ 332 /* ensure this mm_struct can't be freed */
332 cxl_context_mm_count_get(ctx); 333 cxl_context_mm_count_get(ctx);
333 334
334 /* decrement the use count */ 335 if (ctx->mm) {
335 if (ctx->mm) 336 /* decrement the use count from above */
336 mmput(ctx->mm); 337 mmput(ctx->mm);
338 /* make TLBIs for this context global */
339 mm_context_add_copro(ctx->mm);
340 }
337 } 341 }
338 342
339 /* 343 /*
@@ -342,13 +346,19 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
342 */ 346 */
343 cxl_ctx_get(); 347 cxl_ctx_get();
344 348
349 /* See the comment in afu_ioctl_start_work() */
350 smp_mb();
351
345 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { 352 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
346 put_pid(ctx->pid); 353 put_pid(ctx->pid);
347 ctx->pid = NULL; 354 ctx->pid = NULL;
348 cxl_adapter_context_put(ctx->afu->adapter); 355 cxl_adapter_context_put(ctx->afu->adapter);
349 cxl_ctx_put(); 356 cxl_ctx_put();
350 if (task) 357 if (task) {
351 cxl_context_mm_count_put(ctx); 358 cxl_context_mm_count_put(ctx);
359 if (ctx->mm)
360 mm_context_remove_copro(ctx->mm);
361 }
352 goto out; 362 goto out;
353 } 363 }
354 364
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 8c32040b9c09..12a41b2753f0 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/idr.h> 19#include <linux/idr.h>
20#include <linux/sched/mm.h> 20#include <linux/sched/mm.h>
21#include <linux/mmu_context.h>
21#include <asm/cputable.h> 22#include <asm/cputable.h>
22#include <asm/current.h> 23#include <asm/current.h>
23#include <asm/copro.h> 24#include <asm/copro.h>
@@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx)
267 268
268 /* Decrease the mm count on the context */ 269 /* Decrease the mm count on the context */
269 cxl_context_mm_count_put(ctx); 270 cxl_context_mm_count_put(ctx);
271 if (ctx->mm)
272 mm_context_remove_copro(ctx->mm);
270 ctx->mm = NULL; 273 ctx->mm = NULL;
271 274
272 return 0; 275 return 0;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b1afeccbb97f..e46a4062904a 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -100,9 +100,12 @@ static const cxl_p1_reg_t CXL_XSL_FEC = {0x0158};
100static const cxl_p1_reg_t CXL_XSL_DSNCTL = {0x0168}; 100static const cxl_p1_reg_t CXL_XSL_DSNCTL = {0x0168};
101/* PSL registers - CAIA 2 */ 101/* PSL registers - CAIA 2 */
102static const cxl_p1_reg_t CXL_PSL9_CONTROL = {0x0020}; 102static const cxl_p1_reg_t CXL_PSL9_CONTROL = {0x0020};
103static const cxl_p1_reg_t CXL_XSL9_INV = {0x0110};
104static const cxl_p1_reg_t CXL_XSL9_DBG = {0x0130};
105static const cxl_p1_reg_t CXL_XSL9_DEF = {0x0140};
103static const cxl_p1_reg_t CXL_XSL9_DSNCTL = {0x0168}; 106static const cxl_p1_reg_t CXL_XSL9_DSNCTL = {0x0168};
104static const cxl_p1_reg_t CXL_PSL9_FIR1 = {0x0300}; 107static const cxl_p1_reg_t CXL_PSL9_FIR1 = {0x0300};
105static const cxl_p1_reg_t CXL_PSL9_FIR2 = {0x0308}; 108static const cxl_p1_reg_t CXL_PSL9_FIR_MASK = {0x0308};
106static const cxl_p1_reg_t CXL_PSL9_Timebase = {0x0310}; 109static const cxl_p1_reg_t CXL_PSL9_Timebase = {0x0310};
107static const cxl_p1_reg_t CXL_PSL9_DEBUG = {0x0320}; 110static const cxl_p1_reg_t CXL_PSL9_DEBUG = {0x0320};
108static const cxl_p1_reg_t CXL_PSL9_FIR_CNTL = {0x0348}; 111static const cxl_p1_reg_t CXL_PSL9_FIR_CNTL = {0x0348};
@@ -112,6 +115,7 @@ static const cxl_p1_reg_t CXL_PSL9_TRACECFG = {0x0368};
112static const cxl_p1_reg_t CXL_PSL9_APCDEDALLOC = {0x0378}; 115static const cxl_p1_reg_t CXL_PSL9_APCDEDALLOC = {0x0378};
113static const cxl_p1_reg_t CXL_PSL9_APCDEDTYPE = {0x0380}; 116static const cxl_p1_reg_t CXL_PSL9_APCDEDTYPE = {0x0380};
114static const cxl_p1_reg_t CXL_PSL9_TNR_ADDR = {0x0388}; 117static const cxl_p1_reg_t CXL_PSL9_TNR_ADDR = {0x0388};
118static const cxl_p1_reg_t CXL_PSL9_CTCCFG = {0x0390};
115static const cxl_p1_reg_t CXL_PSL9_GP_CT = {0x0398}; 119static const cxl_p1_reg_t CXL_PSL9_GP_CT = {0x0398};
116static const cxl_p1_reg_t CXL_XSL9_IERAT = {0x0588}; 120static const cxl_p1_reg_t CXL_XSL9_IERAT = {0x0588};
117static const cxl_p1_reg_t CXL_XSL9_ILPP = {0x0590}; 121static const cxl_p1_reg_t CXL_XSL9_ILPP = {0x0590};
@@ -414,6 +418,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
414#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) 418#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS)
415#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) 419#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS)
416 420
421#define CXL_PSL9_TRACEID_MAX 0xAU
422#define CXL_PSL9_TRACESTATE_FIN 0x3U
423
417enum cxl_context_status { 424enum cxl_context_status {
418 CLOSED, 425 CLOSED,
419 OPENED, 426 OPENED,
@@ -938,8 +945,6 @@ int cxl_debugfs_adapter_add(struct cxl *adapter);
938void cxl_debugfs_adapter_remove(struct cxl *adapter); 945void cxl_debugfs_adapter_remove(struct cxl *adapter);
939int cxl_debugfs_afu_add(struct cxl_afu *afu); 946int cxl_debugfs_afu_add(struct cxl_afu *afu);
940void cxl_debugfs_afu_remove(struct cxl_afu *afu); 947void cxl_debugfs_afu_remove(struct cxl_afu *afu);
941void cxl_stop_trace_psl9(struct cxl *cxl);
942void cxl_stop_trace_psl8(struct cxl *cxl);
943void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir); 948void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir);
944void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir); 949void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir);
945void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, struct dentry *dir); 950void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, struct dentry *dir);
@@ -975,14 +980,6 @@ static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu)
975{ 980{
976} 981}
977 982
978static inline void cxl_stop_trace_psl9(struct cxl *cxl)
979{
980}
981
982static inline void cxl_stop_trace_psl8(struct cxl *cxl)
983{
984}
985
986static inline void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, 983static inline void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter,
987 struct dentry *dir) 984 struct dentry *dir)
988{ 985{
@@ -1070,7 +1067,8 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
1070 1067
1071void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx); 1068void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
1072void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx); 1069void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx);
1073void cxl_native_err_irq_dump_regs(struct cxl *adapter); 1070void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter);
1071void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter);
1074int cxl_pci_vphb_add(struct cxl_afu *afu); 1072int cxl_pci_vphb_add(struct cxl_afu *afu);
1075void cxl_pci_vphb_remove(struct cxl_afu *afu); 1073void cxl_pci_vphb_remove(struct cxl_afu *afu);
1076void cxl_release_mapping(struct cxl_context *ctx); 1074void cxl_release_mapping(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
index eae9d749f967..1643850d2302 100644
--- a/drivers/misc/cxl/debugfs.c
+++ b/drivers/misc/cxl/debugfs.c
@@ -15,28 +15,6 @@
15 15
16static struct dentry *cxl_debugfs; 16static struct dentry *cxl_debugfs;
17 17
18void cxl_stop_trace_psl9(struct cxl *adapter)
19{
20 /* Stop the trace */
21 cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x4480000000000000ULL);
22}
23
24void cxl_stop_trace_psl8(struct cxl *adapter)
25{
26 int slice;
27
28 /* Stop the trace */
29 cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL);
30
31 /* Stop the slice traces */
32 spin_lock(&adapter->afu_list_lock);
33 for (slice = 0; slice < adapter->slices; slice++) {
34 if (adapter->afu[slice])
35 cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE, 0x8000000000000000LL);
36 }
37 spin_unlock(&adapter->afu_list_lock);
38}
39
40/* Helpers to export CXL mmaped IO registers via debugfs */ 18/* Helpers to export CXL mmaped IO registers via debugfs */
41static int debugfs_io_u64_get(void *data, u64 *val) 19static int debugfs_io_u64_get(void *data, u64 *val)
42{ 20{
@@ -62,9 +40,14 @@ static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode,
62void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir) 40void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir)
63{ 41{
64 debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR1)); 42 debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR1));
65 debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR2)); 43 debugfs_create_io_x64("fir_mask", 0400, dir,
44 _cxl_p1_addr(adapter, CXL_PSL9_FIR_MASK));
66 debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR_CNTL)); 45 debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR_CNTL));
67 debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_TRACECFG)); 46 debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_TRACECFG));
47 debugfs_create_io_x64("debug", 0600, dir,
48 _cxl_p1_addr(adapter, CXL_PSL9_DEBUG));
49 debugfs_create_io_x64("xsl-debug", 0600, dir,
50 _cxl_p1_addr(adapter, CXL_XSL9_DBG));
68} 51}
69 52
70void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir) 53void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir)
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index f17f72ea0545..70dbb6de102c 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -220,22 +220,11 @@ static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
220 220
221static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr) 221static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
222{ 222{
223 u64 crs; /* Translation Checkout Response Status */
224
225 if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM)) 223 if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
226 return true; 224 return true;
227 225
228 if (cxl_is_power9()) { 226 if (cxl_is_power9())
229 crs = (dsisr & CXL_PSL9_DSISR_An_CO_MASK); 227 return true;
230 if ((crs == CXL_PSL9_DSISR_An_PF_SLR) ||
231 (crs == CXL_PSL9_DSISR_An_PF_RGC) ||
232 (crs == CXL_PSL9_DSISR_An_PF_RGP) ||
233 (crs == CXL_PSL9_DSISR_An_PF_HRH) ||
234 (crs == CXL_PSL9_DSISR_An_PF_STEG) ||
235 (crs == CXL_PSL9_DSISR_An_URTCH)) {
236 return true;
237 }
238 }
239 228
240 return false; 229 return false;
241} 230}
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 4bfad9f6dc9f..76c0b0ca9388 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -19,6 +19,7 @@
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/sched/mm.h> 21#include <linux/sched/mm.h>
22#include <linux/mmu_context.h>
22#include <asm/cputable.h> 23#include <asm/cputable.h>
23#include <asm/current.h> 24#include <asm/current.h>
24#include <asm/copro.h> 25#include <asm/copro.h>
@@ -220,9 +221,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
220 /* ensure this mm_struct can't be freed */ 221 /* ensure this mm_struct can't be freed */
221 cxl_context_mm_count_get(ctx); 222 cxl_context_mm_count_get(ctx);
222 223
223 /* decrement the use count */ 224 if (ctx->mm) {
224 if (ctx->mm) 225 /* decrement the use count from above */
225 mmput(ctx->mm); 226 mmput(ctx->mm);
227 /* make TLBIs for this context global */
228 mm_context_add_copro(ctx->mm);
229 }
226 230
227 /* 231 /*
228 * Increment driver use count. Enables global TLBIs for hash 232 * Increment driver use count. Enables global TLBIs for hash
@@ -230,6 +234,20 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
230 */ 234 */
231 cxl_ctx_get(); 235 cxl_ctx_get();
232 236
237 /*
238 * A barrier is needed to make sure all TLBIs are global
239 * before we attach and the context starts being used by the
240 * adapter.
241 *
242 * Needed after mm_context_add_copro() for radix and
243 * cxl_ctx_get() for hash/p8.
244 *
245 * The barrier should really be mb(), since it involves a
246 * device. However, it's only useful when we have local
247 * vs. global TLBIs, i.e SMP=y. So keep smp_mb().
248 */
249 smp_mb();
250
233 trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); 251 trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
234 252
235 if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, 253 if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
@@ -240,6 +258,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
240 ctx->pid = NULL; 258 ctx->pid = NULL;
241 cxl_ctx_put(); 259 cxl_ctx_put();
242 cxl_context_mm_count_put(ctx); 260 cxl_context_mm_count_put(ctx);
261 if (ctx->mm)
262 mm_context_remove_copro(ctx->mm);
243 goto out; 263 goto out;
244 } 264 }
245 265
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 4a82c313cf71..02b6b45b4c20 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -897,6 +897,14 @@ int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
897 if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes) 897 if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)
898 afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx); 898 afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
899 899
900 ctx->elem->software_state = cpu_to_be32(CXL_PE_SOFTWARE_STATE_V);
901 /*
902 * Ideally we should do a wmb() here to make sure the changes to the
903 * PE are visible to the card before we call afu_enable.
904 * On ppc64 though all mmios are preceded by a 'sync' instruction hence
905 * we dont dont need one here.
906 */
907
900 result = cxl_ops->afu_reset(afu); 908 result = cxl_ops->afu_reset(afu);
901 if (result) 909 if (result)
902 return result; 910 return result;
@@ -1077,13 +1085,11 @@ static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info)
1077 1085
1078void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx) 1086void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx)
1079{ 1087{
1080 u64 fir1, fir2, serr; 1088 u64 fir1, serr;
1081 1089
1082 fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR1); 1090 fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR1);
1083 fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR2);
1084 1091
1085 dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); 1092 dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
1086 dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
1087 if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { 1093 if (ctx->afu->adapter->native->sl_ops->register_serr_irq) {
1088 serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); 1094 serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
1089 cxl_afu_decode_psl_serr(ctx->afu, serr); 1095 cxl_afu_decode_psl_serr(ctx->afu, serr);
@@ -1257,14 +1263,23 @@ static irqreturn_t native_slice_irq_err(int irq, void *data)
1257 return IRQ_HANDLED; 1263 return IRQ_HANDLED;
1258} 1264}
1259 1265
1260void cxl_native_err_irq_dump_regs(struct cxl *adapter) 1266void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter)
1267{
1268 u64 fir1;
1269
1270 fir1 = cxl_p1_read(adapter, CXL_PSL9_FIR1);
1271 dev_crit(&adapter->dev, "PSL_FIR: 0x%016llx\n", fir1);
1272}
1273
1274void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter)
1261{ 1275{
1262 u64 fir1, fir2; 1276 u64 fir1, fir2;
1263 1277
1264 fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); 1278 fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
1265 fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); 1279 fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
1266 1280 dev_crit(&adapter->dev,
1267 dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); 1281 "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n",
1282 fir1, fir2);
1268} 1283}
1269 1284
1270static irqreturn_t native_irq_err(int irq, void *data) 1285static irqreturn_t native_irq_err(int irq, void *data)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 3ba04f371380..bb7fd3f4edab 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -401,7 +401,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
401 *capp_unit_id = get_capp_unit_id(np, *phb_index); 401 *capp_unit_id = get_capp_unit_id(np, *phb_index);
402 of_node_put(np); 402 of_node_put(np);
403 if (!*capp_unit_id) { 403 if (!*capp_unit_id) {
404 pr_err("cxl: invalid capp unit id\n"); 404 pr_err("cxl: invalid capp unit id (phb_index: %d)\n",
405 *phb_index);
405 return -ENODEV; 406 return -ENODEV;
406 } 407 }
407 408
@@ -475,37 +476,37 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
475 psl_fircntl |= 0x1ULL; /* ce_thresh */ 476 psl_fircntl |= 0x1ULL; /* ce_thresh */
476 cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl); 477 cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl);
477 478
478 /* vccredits=0x1 pcklat=0x4 */ 479 /* Setup the PSL to transmit packets on the PCIe before the
479 cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0000000000001810ULL); 480 * CAPP is enabled
480
481 /*
482 * For debugging with trace arrays.
483 * Configure RX trace 0 segmented mode.
484 * Configure CT trace 0 segmented mode.
485 * Configure LA0 trace 0 segmented mode.
486 * Configure LA1 trace 0 segmented mode.
487 */ 481 */
488 cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000000ULL); 482 cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000002A10ULL);
489 cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000003ULL);
490 cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000005ULL);
491 cxl_p1_write(adapter, CXL_PSL9_TRACECFG, 0x8040800080000006ULL);
492 483
493 /* 484 /*
494 * A response to an ASB_Notify request is returned by the 485 * A response to an ASB_Notify request is returned by the
495 * system as an MMIO write to the address defined in 486 * system as an MMIO write to the address defined in
496 * the PSL_TNR_ADDR register 487 * the PSL_TNR_ADDR register.
488 * keep the Reset Value: 0x00020000E0000000
497 */ 489 */
498 /* PSL_TNR_ADDR */
499 490
500 /* NORST */ 491 /* Enable XSL rty limit */
501 cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL); 492 cxl_p1_write(adapter, CXL_XSL9_DEF, 0x51F8000000000005ULL);
493
494 /* Change XSL_INV dummy read threshold */
495 cxl_p1_write(adapter, CXL_XSL9_INV, 0x0000040007FFC200ULL);
496
497 if (phb_index == 3) {
498 /* disable machines 31-47 and 20-27 for DMA */
499 cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000FF3FFFF0000ULL);
500 }
502 501
503 /* allocate the apc machines */ 502 /* Snoop machines */
504 cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000003FFFF0000ULL); 503 cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL);
505 504
506 /* Disable vc dd1 fix */ 505 if (cxl_is_power9_dd1()) {
507 if (cxl_is_power9_dd1()) 506 /* Disabling deadlock counter CAR */
508 cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0400000000000001ULL); 507 cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL);
508 } else
509 cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x4000000000000000ULL);
509 510
510 return 0; 511 return 0;
511} 512}
@@ -1746,6 +1747,44 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
1746 pci_disable_device(pdev); 1747 pci_disable_device(pdev);
1747} 1748}
1748 1749
1750static void cxl_stop_trace_psl9(struct cxl *adapter)
1751{
1752 int traceid;
1753 u64 trace_state, trace_mask;
1754 struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
1755
1756 /* read each tracearray state and issue mmio to stop them is needed */
1757 for (traceid = 0; traceid <= CXL_PSL9_TRACEID_MAX; ++traceid) {
1758 trace_state = cxl_p1_read(adapter, CXL_PSL9_CTCCFG);
1759 trace_mask = (0x3ULL << (62 - traceid * 2));
1760 trace_state = (trace_state & trace_mask) >> (62 - traceid * 2);
1761 dev_dbg(&dev->dev, "cxl: Traceid-%d trace_state=0x%0llX\n",
1762 traceid, trace_state);
1763
1764 /* issue mmio if the trace array isn't in FIN state */
1765 if (trace_state != CXL_PSL9_TRACESTATE_FIN)
1766 cxl_p1_write(adapter, CXL_PSL9_TRACECFG,
1767 0x8400000000000000ULL | traceid);
1768 }
1769}
1770
1771static void cxl_stop_trace_psl8(struct cxl *adapter)
1772{
1773 int slice;
1774
1775 /* Stop the trace */
1776 cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL);
1777
1778 /* Stop the slice traces */
1779 spin_lock(&adapter->afu_list_lock);
1780 for (slice = 0; slice < adapter->slices; slice++) {
1781 if (adapter->afu[slice])
1782 cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE,
1783 0x8000000000000000LL);
1784 }
1785 spin_unlock(&adapter->afu_list_lock);
1786}
1787
1749static const struct cxl_service_layer_ops psl9_ops = { 1788static const struct cxl_service_layer_ops psl9_ops = {
1750 .adapter_regs_init = init_implementation_adapter_regs_psl9, 1789 .adapter_regs_init = init_implementation_adapter_regs_psl9,
1751 .invalidate_all = cxl_invalidate_all_psl9, 1790 .invalidate_all = cxl_invalidate_all_psl9,
@@ -1762,6 +1801,7 @@ static const struct cxl_service_layer_ops psl9_ops = {
1762 .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9, 1801 .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
1763 .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9, 1802 .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
1764 .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9, 1803 .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
1804 .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9,
1765 .debugfs_stop_trace = cxl_stop_trace_psl9, 1805 .debugfs_stop_trace = cxl_stop_trace_psl9,
1766 .write_timebase_ctrl = write_timebase_ctrl_psl9, 1806 .write_timebase_ctrl = write_timebase_ctrl_psl9,
1767 .timebase_read = timebase_read_psl9, 1807 .timebase_read = timebase_read_psl9,
@@ -1785,7 +1825,7 @@ static const struct cxl_service_layer_ops psl8_ops = {
1785 .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl8, 1825 .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl8,
1786 .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl8, 1826 .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl8,
1787 .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl8, 1827 .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl8,
1788 .err_irq_dump_registers = cxl_native_err_irq_dump_regs, 1828 .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl8,
1789 .debugfs_stop_trace = cxl_stop_trace_psl8, 1829 .debugfs_stop_trace = cxl_stop_trace_psl8,
1790 .write_timebase_ctrl = write_timebase_ctrl_psl8, 1830 .write_timebase_ctrl = write_timebase_ctrl_psl8,
1791 .timebase_read = timebase_read_psl8, 1831 .timebase_read = timebase_read_psl8,
diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c
index f5396f26ddb4..26f9feaa5d17 100644
--- a/drivers/mtd/devices/powernv_flash.c
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -47,6 +47,11 @@ enum flash_op {
47 FLASH_OP_ERASE, 47 FLASH_OP_ERASE,
48}; 48};
49 49
50/*
51 * Don't return -ERESTARTSYS if we can't get a token, the MTD core
52 * might have split up the call from userspace and called into the
53 * driver more than once, we'll already have done some amount of work.
54 */
50static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op, 55static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
51 loff_t offset, size_t len, size_t *retlen, u_char *buf) 56 loff_t offset, size_t len, size_t *retlen, u_char *buf)
52{ 57{
@@ -63,7 +68,8 @@ static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
63 if (token < 0) { 68 if (token < 0) {
64 if (token != -ERESTARTSYS) 69 if (token != -ERESTARTSYS)
65 dev_err(dev, "Failed to get an async token\n"); 70 dev_err(dev, "Failed to get an async token\n");
66 71 else
72 token = -EINTR;
67 return token; 73 return token;
68 } 74 }
69 75
@@ -78,32 +84,53 @@ static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
78 rc = opal_flash_erase(info->id, offset, len, token); 84 rc = opal_flash_erase(info->id, offset, len, token);
79 break; 85 break;
80 default: 86 default:
81 BUG_ON(1); 87 WARN_ON_ONCE(1);
82 }
83
84 if (rc != OPAL_ASYNC_COMPLETION) {
85 dev_err(dev, "opal_flash_async_op(op=%d) failed (rc %d)\n",
86 op, rc);
87 opal_async_release_token(token); 88 opal_async_release_token(token);
88 return -EIO; 89 return -EIO;
89 } 90 }
90 91
91 rc = opal_async_wait_response(token, &msg); 92 if (rc == OPAL_ASYNC_COMPLETION) {
92 opal_async_release_token(token); 93 rc = opal_async_wait_response_interruptible(token, &msg);
93 if (rc) { 94 if (rc) {
94 dev_err(dev, "opal async wait failed (rc %d)\n", rc); 95 /*
95 return -EIO; 96 * If we return the mtd core will free the
97 * buffer we've just passed to OPAL but OPAL
98 * will continue to read or write from that
99 * memory.
100 * It may be tempting to ultimately return 0
101 * if we're doing a read or a write since we
102 * are going to end up waiting until OPAL is
103 * done. However, because the MTD core sends
104 * us the userspace request in chunks, we need
105 * it to know we've been interrupted.
106 */
107 rc = -EINTR;
108 if (opal_async_wait_response(token, &msg))
109 dev_err(dev, "opal_async_wait_response() failed\n");
110 goto out;
111 }
112 rc = opal_get_async_rc(msg);
96 } 113 }
97 114
98 rc = opal_get_async_rc(msg); 115 /*
99 if (rc == OPAL_SUCCESS) { 116 * OPAL does mutual exclusion on the flash, it will return
100 rc = 0; 117 * OPAL_BUSY.
101 if (retlen) 118 * During firmware updates by the service processor OPAL may
102 *retlen = len; 119 * be (temporarily) prevented from accessing the flash, in
103 } else { 120 * this case OPAL will also return OPAL_BUSY.
104 rc = -EIO; 121 * Both cases aren't errors exactly but the flash could have
105 } 122 * changed, userspace should be informed.
123 */
124 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY)
125 dev_err(dev, "opal_flash_async_op(op=%d) failed (rc %d)\n",
126 op, rc);
127
128 if (rc == OPAL_SUCCESS && retlen)
129 *retlen = len;
106 130
131 rc = opal_error_code(rc);
132out:
133 opal_async_release_token(token);
107 return rc; 134 return rc;
108} 135}
109 136
@@ -220,21 +247,20 @@ static int powernv_flash_probe(struct platform_device *pdev)
220 int ret; 247 int ret;
221 248
222 data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); 249 data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
223 if (!data) { 250 if (!data)
224 ret = -ENOMEM; 251 return -ENOMEM;
225 goto out; 252
226 }
227 data->mtd.priv = data; 253 data->mtd.priv = data;
228 254
229 ret = of_property_read_u32(dev->of_node, "ibm,opal-id", &(data->id)); 255 ret = of_property_read_u32(dev->of_node, "ibm,opal-id", &(data->id));
230 if (ret) { 256 if (ret) {
231 dev_err(dev, "no device property 'ibm,opal-id'\n"); 257 dev_err(dev, "no device property 'ibm,opal-id'\n");
232 goto out; 258 return ret;
233 } 259 }
234 260
235 ret = powernv_flash_set_driver_info(dev, &data->mtd); 261 ret = powernv_flash_set_driver_info(dev, &data->mtd);
236 if (ret) 262 if (ret)
237 goto out; 263 return ret;
238 264
239 dev_set_drvdata(dev, data); 265 dev_set_drvdata(dev, data);
240 266
@@ -243,10 +269,7 @@ static int powernv_flash_probe(struct platform_device *pdev)
243 * with an ffs partition at the start, it should prove easier for users 269 * with an ffs partition at the start, it should prove easier for users
244 * to deal with partitions or not as they see fit 270 * to deal with partitions or not as they see fit
245 */ 271 */
246 ret = mtd_device_register(&data->mtd, NULL, 0); 272 return mtd_device_register(&data->mtd, NULL, 0);
247
248out:
249 return ret;
250} 273}
251 274
252/** 275/**
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index f4241339edd2..87f1f0252299 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -10,6 +10,7 @@
10 */ 10 */
11 11
12#define _GNU_SOURCE 12#define _GNU_SOURCE
13#include <errno.h>
13#include <sched.h> 14#include <sched.h>
14#include <string.h> 15#include <string.h>
15#include <stdio.h> 16#include <stdio.h>
@@ -75,6 +76,7 @@ static void touch(void)
75 76
76static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu) 77static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
77{ 78{
79 int rc;
78 pthread_t tid; 80 pthread_t tid;
79 cpu_set_t cpuset; 81 cpu_set_t cpuset;
80 pthread_attr_t attr; 82 pthread_attr_t attr;
@@ -82,14 +84,23 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
82 CPU_ZERO(&cpuset); 84 CPU_ZERO(&cpuset);
83 CPU_SET(cpu, &cpuset); 85 CPU_SET(cpu, &cpuset);
84 86
85 pthread_attr_init(&attr); 87 rc = pthread_attr_init(&attr);
88 if (rc) {
89 errno = rc;
90 perror("pthread_attr_init");
91 exit(1);
92 }
86 93
87 if (pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset)) { 94 rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
95 if (rc) {
96 errno = rc;
88 perror("pthread_attr_setaffinity_np"); 97 perror("pthread_attr_setaffinity_np");
89 exit(1); 98 exit(1);
90 } 99 }
91 100
92 if (pthread_create(&tid, &attr, fn, arg)) { 101 rc = pthread_create(&tid, &attr, fn, arg);
102 if (rc) {
103 errno = rc;
93 perror("pthread_create"); 104 perror("pthread_create");
94 exit(1); 105 exit(1);
95 } 106 }
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 17fb1b43c320..1899bd85121f 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -53,6 +53,8 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
53 } 53 }
54 54
55 while ((dp = readdir(sysfs))) { 55 while ((dp = readdir(sysfs))) {
56 int len;
57
56 if (!(dp->d_type & DT_DIR)) 58 if (!(dp->d_type & DT_DIR))
57 continue; 59 continue;
58 if (!strcmp(dp->d_name, "cpuidle")) 60 if (!strcmp(dp->d_name, "cpuidle"))
@@ -60,7 +62,9 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
60 if (!strstr(dp->d_name, "cpu")) 62 if (!strstr(dp->d_name, "cpu"))
61 continue; 63 continue;
62 64
63 sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name); 65 len = snprintf(file, LEN_MAX, "%s%s/dscr", CPU_PATH, dp->d_name);
66 if (len >= LEN_MAX)
67 continue;
64 if (access(file, F_OK)) 68 if (access(file, F_OK))
65 continue; 69 continue;
66 70
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 2f1f7b013293..241a4a4ee0e4 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -12,3 +12,4 @@ tm-signal-context-chk-gpr
12tm-signal-context-chk-vmx 12tm-signal-context-chk-vmx
13tm-signal-context-chk-vsx 13tm-signal-context-chk-vsx
14tm-vmx-unavail 14tm-vmx-unavail
15tm-unavailable
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index fca7c7f5e640..8ed6f8c57230 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -3,7 +3,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
3 tm-signal-context-chk-vmx tm-signal-context-chk-vsx 3 tm-signal-context-chk-vmx tm-signal-context-chk-vsx
4 4
5TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ 5TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
6 tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail \ 6 tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \
7 $(SIGNAL_CONTEXT_CHK_TESTS) 7 $(SIGNAL_CONTEXT_CHK_TESTS)
8 8
9include ../../lib.mk 9include ../../lib.mk
@@ -17,6 +17,7 @@ $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
17$(OUTPUT)/tm-tmspr: CFLAGS += -pthread 17$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
18$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 18$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
19$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o 19$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
20$(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
20 21
21SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) 22SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
22$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S 23$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
new file mode 100644
index 000000000000..96c37f84ce54
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -0,0 +1,371 @@
1/*
2 * Copyright 2017, Gustavo Romero, Breno Leitao, Cyril Bur, IBM Corp.
3 * Licensed under GPLv2.
4 *
5 * Force FP, VEC and VSX unavailable exception during transaction in all
6 * possible scenarios regarding the MSR.FP and MSR.VEC state, e.g. when FP
7 * is enable and VEC is disable, when FP is disable and VEC is enable, and
8 * so on. Then we check if the restored state is correctly set for the
9 * FP and VEC registers to the previous state we set just before we entered
10 * in TM, i.e. we check if it corrupts somehow the recheckpointed FP and
11 * VEC/Altivec registers on abortion due to an unavailable exception in TM.
12 * N.B. In this test we do not test all the FP/Altivec/VSX registers for
13 * corruption, but only for registers vs0 and vs32, which are respectively
14 * representatives of FP and VEC/Altivec reg sets.
15 */
16
17#define _GNU_SOURCE
18#include <stdio.h>
19#include <stdlib.h>
20#include <unistd.h>
21#include <inttypes.h>
22#include <stdbool.h>
23#include <pthread.h>
24#include <sched.h>
25
26#include "tm.h"
27
28#define DEBUG 0
29
30/* Unavailable exceptions to test in HTM */
31#define FP_UNA_EXCEPTION 0
32#define VEC_UNA_EXCEPTION 1
33#define VSX_UNA_EXCEPTION 2
34
35#define NUM_EXCEPTIONS 3
36
37struct Flags {
38 int touch_fp;
39 int touch_vec;
40 int result;
41 int exception;
42} flags;
43
44bool expecting_failure(void)
45{
46 if (flags.touch_fp && flags.exception == FP_UNA_EXCEPTION)
47 return false;
48
49 if (flags.touch_vec && flags.exception == VEC_UNA_EXCEPTION)
50 return false;
51
52 /*
53 * If both FP and VEC are touched it does not mean that touching VSX
54 * won't raise an exception. However since FP and VEC state are already
55 * correctly loaded, the transaction is not aborted (i.e.
56 * treclaimed/trecheckpointed) and MSR.VSX is just set as 1, so a TM
57 * failure is not expected also in this case.
58 */
59 if ((flags.touch_fp && flags.touch_vec) &&
60 flags.exception == VSX_UNA_EXCEPTION)
61 return false;
62
63 return true;
64}
65
66/* Check if failure occurred whilst in transaction. */
67bool is_failure(uint64_t condition_reg)
68{
69 /*
70 * When failure handling occurs, CR0 is set to 0b1010 (0xa). Otherwise
71 * transaction completes without failure and hence reaches out 'tend.'
72 * that sets CR0 to 0b0100 (0x4).
73 */
74 return ((condition_reg >> 28) & 0xa) == 0xa;
75}
76
77void *ping(void *input)
78{
79
80 /*
81 * Expected values for vs0 and vs32 after a TM failure. They must never
82 * change, otherwise they got corrupted.
83 */
84 uint64_t high_vs0 = 0x5555555555555555;
85 uint64_t low_vs0 = 0xffffffffffffffff;
86 uint64_t high_vs32 = 0x5555555555555555;
87 uint64_t low_vs32 = 0xffffffffffffffff;
88
89 /* Counter for busy wait */
90 uint64_t counter = 0x1ff000000;
91
92 /*
93 * Variable to keep a copy of CR register content taken just after we
94 * leave the transactional state.
95 */
96 uint64_t cr_ = 0;
97
98 /*
99 * Wait a bit so thread can get its name "ping". This is not important
100 * to reproduce the issue but it's nice to have for systemtap debugging.
101 */
102 if (DEBUG)
103 sleep(1);
104
105 printf("If MSR.FP=%d MSR.VEC=%d: ", flags.touch_fp, flags.touch_vec);
106
107 if (flags.exception != FP_UNA_EXCEPTION &&
108 flags.exception != VEC_UNA_EXCEPTION &&
109 flags.exception != VSX_UNA_EXCEPTION) {
110 printf("No valid exception specified to test.\n");
111 return NULL;
112 }
113
114 asm (
115 /* Prepare to merge low and high. */
116 " mtvsrd 33, %[high_vs0] ;"
117 " mtvsrd 34, %[low_vs0] ;"
118
119 /*
120 * Adjust VS0 expected value after an TM failure,
121 * i.e. vs0 = 0x5555555555555555555FFFFFFFFFFFFFFFF
122 */
123 " xxmrghd 0, 33, 34 ;"
124
125 /*
126 * Adjust VS32 expected value after an TM failure,
127 * i.e. vs32 = 0x5555555555555555555FFFFFFFFFFFFFFFF
128 */
129 " xxmrghd 32, 33, 34 ;"
130
131 /*
132 * Wait an amount of context switches so load_fp and load_vec
133 * overflow and MSR.FP, MSR.VEC, and MSR.VSX become zero (off).
134 */
135 " mtctr %[counter] ;"
136
137 /* Decrement CTR branch if CTR non zero. */
138 "1: bdnz 1b ;"
139
140 /*
141 * Check if we want to touch FP prior to the test in order
142 * to set MSR.FP = 1 before provoking an unavailable
143 * exception in TM.
144 */
145 " cmpldi %[touch_fp], 0 ;"
146 " beq no_fp ;"
147 " fadd 10, 10, 10 ;"
148 "no_fp: ;"
149
150 /*
151 * Check if we want to touch VEC prior to the test in order
152 * to set MSR.VEC = 1 before provoking an unavailable
153 * exception in TM.
154 */
155 " cmpldi %[touch_vec], 0 ;"
156 " beq no_vec ;"
157 " vaddcuw 10, 10, 10 ;"
158 "no_vec: ;"
159
160 /*
161 * Perhaps it would be a better idea to do the
162 * compares outside transactional context and simply
163 * duplicate code.
164 */
165 " tbegin. ;"
166 " beq trans_fail ;"
167
168 /* Do we do FP Unavailable? */
169 " cmpldi %[exception], %[ex_fp] ;"
170 " bne 1f ;"
171 " fadd 10, 10, 10 ;"
172 " b done ;"
173
174 /* Do we do VEC Unavailable? */
175 "1: cmpldi %[exception], %[ex_vec] ;"
176 " bne 2f ;"
177 " vaddcuw 10, 10, 10 ;"
178 " b done ;"
179
180 /*
181 * Not FP or VEC, therefore VSX. Ensure this
182 * instruction always generates a VSX Unavailable.
183 * ISA 3.0 is tricky here.
184 * (xxmrghd will on ISA 2.07 and ISA 3.0)
185 */
186 "2: xxmrghd 10, 10, 10 ;"
187
188 "done: tend. ;"
189
190 "trans_fail: ;"
191
192 /* Give values back to C. */
193 " mfvsrd %[high_vs0], 0 ;"
194 " xxsldwi 3, 0, 0, 2 ;"
195 " mfvsrd %[low_vs0], 3 ;"
196 " mfvsrd %[high_vs32], 32 ;"
197 " xxsldwi 3, 32, 32, 2 ;"
198 " mfvsrd %[low_vs32], 3 ;"
199
200 /* Give CR back to C so that it can check what happened. */
201 " mfcr %[cr_] ;"
202
203 : [high_vs0] "+r" (high_vs0),
204 [low_vs0] "+r" (low_vs0),
205 [high_vs32] "=r" (high_vs32),
206 [low_vs32] "=r" (low_vs32),
207 [cr_] "+r" (cr_)
208 : [touch_fp] "r" (flags.touch_fp),
209 [touch_vec] "r" (flags.touch_vec),
210 [exception] "r" (flags.exception),
211 [ex_fp] "i" (FP_UNA_EXCEPTION),
212 [ex_vec] "i" (VEC_UNA_EXCEPTION),
213 [ex_vsx] "i" (VSX_UNA_EXCEPTION),
214 [counter] "r" (counter)
215
216 : "cr0", "ctr", "v10", "vs0", "vs10", "vs3", "vs32", "vs33",
217 "vs34", "fr10"
218
219 );
220
221 /*
222 * Check if we were expecting a failure and it did not occur by checking
223 * CR0 state just after we leave the transaction. Either way we check if
224 * vs0 or vs32 got corrupted.
225 */
226 if (expecting_failure() && !is_failure(cr_)) {
227 printf("\n\tExpecting the transaction to fail, %s",
228 "but it didn't\n\t");
229 flags.result++;
230 }
231
232 /* Check if we were not expecting a failure and a it occurred. */
233 if (!expecting_failure() && is_failure(cr_)) {
234 printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
235 failure_code());
236 return (void *) -1;
237 }
238
239 /*
240 * Check if TM failed due to the cause we were expecting. 0xda is a
241 * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause.
242 */
243 if (is_failure(cr_) && !failure_is_unavailable()) {
244 printf("\n\tUnexpected failure cause 0x%02lx\n\t",
245 failure_code());
246 return (void *) -1;
247 }
248
249 /* 0x4 is a success and 0xa is a fail. See comment in is_failure(). */
250 if (DEBUG)
251 printf("CR0: 0x%1lx ", cr_ >> 28);
252
253 /* Check FP (vs0) for the expected value. */
254 if (high_vs0 != 0x5555555555555555 || low_vs0 != 0xFFFFFFFFFFFFFFFF) {
255 printf("FP corrupted!");
256 printf(" high = %#16" PRIx64 " low = %#16" PRIx64 " ",
257 high_vs0, low_vs0);
258 flags.result++;
259 } else
260 printf("FP ok ");
261
262 /* Check VEC (vs32) for the expected value. */
263 if (high_vs32 != 0x5555555555555555 || low_vs32 != 0xFFFFFFFFFFFFFFFF) {
264 printf("VEC corrupted!");
265 printf(" high = %#16" PRIx64 " low = %#16" PRIx64,
266 high_vs32, low_vs32);
267 flags.result++;
268 } else
269 printf("VEC ok");
270
271 putchar('\n');
272
273 return NULL;
274}
275
276/* Thread to force context switch */
277void *pong(void *not_used)
278{
279 /* Wait thread get its name "pong". */
280 if (DEBUG)
281 sleep(1);
282
283 /* Classed as an interactive-like thread. */
284 while (1)
285 sched_yield();
286}
287
288/* Function that creates a thread and launches the "ping" task. */
289void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
290{
291 int retries = 2;
292 void *ret_value;
293 pthread_t t0;
294
295 flags.touch_fp = fp;
296 flags.touch_vec = vec;
297
298 /*
299 * Without luck it's possible that the transaction is aborted not due to
300 * the unavailable exception caught in the middle as we expect but also,
301 * for instance, due to a context switch or due to a KVM reschedule (if
302 * it's running on a VM). Thus we try a few times before giving up,
303 * checking if the failure cause is the one we expect.
304 */
305 do {
306 /* Bind 'ping' to CPU 0, as specified in 'attr'. */
307 pthread_create(&t0, attr, ping, (void *) &flags);
308 pthread_setname_np(t0, "ping");
309 pthread_join(t0, &ret_value);
310 retries--;
311 } while (ret_value != NULL && retries);
312
313 if (!retries) {
314 flags.result = 1;
315 if (DEBUG)
316 printf("All transactions failed unexpectedly\n");
317
318 }
319}
320
321int main(int argc, char **argv)
322{
323 int exception; /* FP = 0, VEC = 1, VSX = 2 */
324 pthread_t t1;
325 pthread_attr_t attr;
326 cpu_set_t cpuset;
327
328 /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
329 CPU_ZERO(&cpuset);
330 CPU_SET(0, &cpuset);
331
332 /* Init pthread attribute. */
333 pthread_attr_init(&attr);
334
335 /* Set CPU 0 mask into the pthread attribute. */
336 pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
337
338 pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
339 pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */
340
341 flags.result = 0;
342
343 for (exception = 0; exception < NUM_EXCEPTIONS; exception++) {
344 printf("Checking if FP/VEC registers are sane after");
345
346 if (exception == FP_UNA_EXCEPTION)
347 printf(" a FP unavailable exception...\n");
348
349 else if (exception == VEC_UNA_EXCEPTION)
350 printf(" a VEC unavailable exception...\n");
351
352 else
353 printf(" a VSX unavailable exception...\n");
354
355 flags.exception = exception;
356
357 test_fp_vec(0, 0, &attr);
358 test_fp_vec(1, 0, &attr);
359 test_fp_vec(0, 1, &attr);
360 test_fp_vec(1, 1, &attr);
361
362 }
363
364 if (flags.result > 0) {
365 printf("result: failed!\n");
366 exit(1);
367 } else {
368 printf("result: success\n");
369 exit(0);
370 }
371}
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index 0ffff04433c5..df4204247d45 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -47,6 +47,11 @@ static inline bool failure_is_syscall(void)
47 return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL; 47 return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
48} 48}
49 49
50static inline bool failure_is_unavailable(void)
51{
52 return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
53}
54
50static inline bool failure_is_nesting(void) 55static inline bool failure_is_nesting(void)
51{ 56{
52 return (__builtin_get_texasru() & 0x400000); 57 return (__builtin_get_texasru() & 0x400000);