aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 13:30:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 13:30:38 -0500
commit38705613b74ab090eee55c327cd0cb77fb10eb26 (patch)
treeb219755a7eaaab097fbda4041cf2ba21df44fed5
parentff47d8c05019d6e7753cef270d6399cb5a33be57 (diff)
parent438e69b52be776c035aa2a851ccc1709033d729b (diff)
Merge tag 'powerpc-4.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Highlights include: - Support for direct mapped LPC on POWER9, giving Linux direct access to devices that may be on there such as a UART. - Memory hotplug support for the Power9 Radix MMU. - Add new AUX vectors describing the processor's cache geometry, to be used by glibc. - The ability for a guest to ask the hypervisor to resize the guest's hash table, and in addition support for doing so automatically when memory is hotplugged into/out-of the guest. This allows the hash table to be sized based on the current memory usage of the guest, rather than the maximum possible memory usage. - Implementation of optprobes (kprobe optimisation) for powerpc. In addition there's the topic branch shared with the KVM tree, which includes support for guests to use the Radix MMU on Power9. Thanks to: Alistair Popple, Andrew Donnellan, Aneesh Kumar K.V, Anju T, Anton Blanchard, Benjamin Herrenschmidt, Chris Packham, Daniel Axtens, Daniel Borkmann, David Gibson, Finn Thain, Gautham R. Shenoy, Gavin Shan, Greg Kurz, Joel Stanley, John Allen, Madhavan Srinivasan, Mahesh Salgaonkar, Markus Elfring, Michael Neuling, Nathan Fontenot, Naveen N. Rao, Nicholas Piggin, Paul Mackerras, Ravi Bangoria, Reza Arbab, Shailendra Singh, Vaibhav Jain, Wei Yongjun" * tag 'powerpc-4.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (129 commits) powerpc/mm/radix: Skip ptesync in pte update helpers powerpc/mm/radix: Use ptep_get_and_clear_full when clearing pte for full mm powerpc/mm/radix: Update pte update sequence for pte clear case powerpc/mm: Update PROTFAULT handling in the page fault path powerpc/xmon: Fix data-breakpoint powerpc/mm: Fix build break with BOOK3S_64=n and MEMORY_HOTPLUG=y powerpc/mm: Fix build break when CMA=n && SPAPR_TCE_IOMMU=y powerpc/mm: Fix build break with RADIX=y & HUGETLBFS=n powerpc/pseries: Fix typo in parameter description powerpc/kprobes: Remove kprobe_exceptions_notify() kprobes: Introduce weak variant of kprobe_exceptions_notify() powerpc/ftrace: Fix confusing help text for DISABLE_MPROFILE_KERNEL powerpc/powernv: Fix opal_exit tracepoint opcode powerpc: Add a prototype for mcount() so it can be versioned powerpc: Drop GPL from of_node_to_nid() export to match other arches powerpc/kprobes: Optimize kprobe in kretprobe_trampoline() powerpc/kprobes: Implement Optprobes powerpc/kprobes: Fixes for kprobe_lookup_name() on BE powerpc: Add helper to check if offset is within relative branch range powerpc/bpf: Introduce __PPC_SH64() ...
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt42
-rw-r--r--Documentation/devicetree/bindings/powerpc/opal/power-mgt.txt118
-rw-r--r--Documentation/virtual/kvm/api.txt83
-rw-r--r--arch/m68k/include/asm/macintosh.h2
-rw-r--r--arch/m68k/mac/config.c18
-rw-r--r--arch/m68k/mac/misc.c72
-rw-r--r--arch/powerpc/Kconfig9
-rw-r--r--arch/powerpc/Kconfig.debug3
-rw-r--r--arch/powerpc/boot/.gitignore4
-rw-r--r--arch/powerpc/configs/powernv_defconfig11
-rw-r--r--arch/powerpc/configs/ppc64_defconfig7
-rw-r--r--arch/powerpc/configs/pseries_defconfig8
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h18
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-4k.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h17
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h41
-rw-r--r--arch/powerpc/include/asm/cache.h23
-rw-r--r--arch/powerpc/include/asm/checksum.h21
-rw-r--r--arch/powerpc/include/asm/code-patching.h2
-rw-r--r--arch/powerpc/include/asm/cpuidle.h49
-rw-r--r--arch/powerpc/include/asm/elf.h42
-rw-r--r--arch/powerpc/include/asm/exception-64s.h83
-rw-r--r--arch/powerpc/include/asm/firmware.h5
-rw-r--r--arch/powerpc/include/asm/head-64.h232
-rw-r--r--arch/powerpc/include/asm/hvcall.h13
-rw-r--r--arch/powerpc/include/asm/isa-bridge.h29
-rw-r--r--arch/powerpc/include/asm/kprobes.h27
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h26
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h6
-rw-r--r--arch/powerpc/include/asm/kvm_host.h6
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/mmu.h1
-rw-r--r--arch/powerpc/include/asm/opal-api.h3
-rw-r--r--arch/powerpc/include/asm/opal.h8
-rw-r--r--arch/powerpc/include/asm/page_64.h4
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h18
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h12
-rw-r--r--arch/powerpc/include/asm/powernv.h19
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/prom.h18
-rw-r--r--arch/powerpc/include/asm/reg.h6
-rw-r--r--arch/powerpc/include/asm/rtas.h1
-rw-r--r--arch/powerpc/include/asm/sparsemem.h7
-rw-r--r--arch/powerpc/include/asm/uaccess.h6
-rw-r--r--arch/powerpc/include/uapi/asm/auxvec.h33
-rw-r--r--arch/powerpc/include/uapi/asm/elf.h23
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h20
-rw-r--r--arch/powerpc/kernel/Makefile3
-rw-r--r--arch/powerpc/kernel/align.c2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c14
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S262
-rw-r--r--arch/powerpc/kernel/fadump.c25
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c4
-rw-r--r--arch/powerpc/kernel/idle_book3s.S46
-rw-r--r--arch/powerpc/kernel/iomap.c1
-rw-r--r--arch/powerpc/kernel/isa-bridge.c92
-rw-r--r--arch/powerpc/kernel/kprobes.c17
-rw-r--r--arch/powerpc/kernel/legacy_serial.c3
-rw-r--r--arch/powerpc/kernel/misc_64.S28
-rw-r--r--arch/powerpc/kernel/optprobes.c347
-rw-r--r--arch/powerpc/kernel/optprobes_head.S135
-rw-r--r--arch/powerpc/kernel/prom_init.c20
-rw-r--r--arch/powerpc/kernel/rtas.c32
-rw-r--r--arch/powerpc/kernel/rtasd.c7
-rw-r--r--arch/powerpc/kernel/setup-common.c9
-rw-r--r--arch/powerpc/kernel/setup_32.c8
-rw-r--r--arch/powerpc/kernel/setup_64.c196
-rw-r--r--arch/powerpc/kernel/vdso.c18
-rw-r--r--arch/powerpc/kvm/Makefile3
-rw-r--r--arch/powerpc/kvm/book3s.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c110
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c716
-rw-r--r--arch/powerpc/kvm/book3s_hv.c205
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c30
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c25
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c18
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S154
-rw-r--r--arch/powerpc/kvm/book3s_segment.S32
-rw-r--r--arch/powerpc/kvm/powerpc.c32
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/checksum_64.S12
-rw-r--r--arch/powerpc/lib/code-patching.c45
-rw-r--r--arch/powerpc/lib/copypage_64.S4
-rw-r--r--arch/powerpc/lib/sstep.c6
-rw-r--r--arch/powerpc/lib/string_64.S6
-rw-r--r--arch/powerpc/mm/copro_fault.c10
-rw-r--r--arch/powerpc/mm/fault.c43
-rw-r--r--arch/powerpc/mm/hash_utils_64.c62
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c21
-rw-r--r--arch/powerpc/mm/init-common.c3
-rw-r--r--arch/powerpc/mm/init_64.c35
-rw-r--r--arch/powerpc/mm/mem.c4
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c2
-rw-r--r--arch/powerpc/mm/numa.c15
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c4
-rw-r--r--arch/powerpc/mm/pgtable-radix.c261
-rw-r--r--arch/powerpc/mm/pgtable_64.c22
-rw-r--r--arch/powerpc/mm/subpage-prot.c4
-rw-r--r--arch/powerpc/net/bpf_jit.h11
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c17
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c16
-rw-r--r--arch/powerpc/platforms/maple/pci.c1
-rw-r--r--arch/powerpc/platforms/powernv/idle.c142
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c55
-rw-r--r--arch/powerpc/platforms/powernv/opal-lpc.c22
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S73
-rw-r--r--arch/powerpc/platforms/powernv/opal.c11
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci.c7
-rw-r--r--arch/powerpc/platforms/powernv/pci.h1
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h3
-rw-r--r--arch/powerpc/platforms/powernv/smp.c14
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c2
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c8
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c3
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c75
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c138
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c34
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/powerpc/xmon/xmon.c8
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c129
-rw-r--r--drivers/macintosh/Kconfig24
-rw-r--r--drivers/macintosh/Makefile1
-rw-r--r--drivers/macintosh/adb.c4
-rw-r--r--drivers/macintosh/via-cuda.c294
-rw-r--r--drivers/macintosh/via-maciisi.c677
-rw-r--r--drivers/misc/cxl/Makefile3
-rw-r--r--drivers/misc/cxl/api.c1
-rw-r--r--drivers/misc/cxl/cxl.h61
-rw-r--r--drivers/misc/cxl/main.c3
-rw-r--r--drivers/misc/cxl/pci.c5
-rw-r--r--drivers/misc/cxl/vphb.c51
-rw-r--r--include/linux/cpuidle.h1
-rw-r--r--include/uapi/linux/kvm.h6
-rw-r--r--kernel/kprobes.c6
-rw-r--r--scripts/Makefile.gcc-plugins10
144 files changed, 4502 insertions, 1832 deletions
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt b/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
index c41b2187eaa8..dc9bb3182525 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
@@ -5,8 +5,46 @@ The cache bindings explained below are ePAPR compliant
5 5
6Required Properties: 6Required Properties:
7 7
8- compatible : Should include "fsl,chip-l2-cache-controller" and "cache" 8- compatible : Should include one of the following:
9 where chip is the processor (bsc9132, npc8572 etc.) 9 "fsl,8540-l2-cache-controller"
10 "fsl,8541-l2-cache-controller"
11 "fsl,8544-l2-cache-controller"
12 "fsl,8548-l2-cache-controller"
13 "fsl,8555-l2-cache-controller"
14 "fsl,8568-l2-cache-controller"
15 "fsl,b4420-l2-cache-controller"
16 "fsl,b4860-l2-cache-controller"
17 "fsl,bsc9131-l2-cache-controller"
18 "fsl,bsc9132-l2-cache-controller"
19 "fsl,c293-l2-cache-controller"
20 "fsl,mpc8536-l2-cache-controller"
21 "fsl,mpc8540-l2-cache-controller"
22 "fsl,mpc8541-l2-cache-controller"
23 "fsl,mpc8544-l2-cache-controller"
24 "fsl,mpc8548-l2-cache-controller"
25 "fsl,mpc8555-l2-cache-controller"
26 "fsl,mpc8560-l2-cache-controller"
27 "fsl,mpc8568-l2-cache-controller"
28 "fsl,mpc8569-l2-cache-controller"
29 "fsl,mpc8572-l2-cache-controller"
30 "fsl,p1010-l2-cache-controller"
31 "fsl,p1011-l2-cache-controller"
32 "fsl,p1012-l2-cache-controller"
33 "fsl,p1013-l2-cache-controller"
34 "fsl,p1014-l2-cache-controller"
35 "fsl,p1015-l2-cache-controller"
36 "fsl,p1016-l2-cache-controller"
37 "fsl,p1020-l2-cache-controller"
38 "fsl,p1021-l2-cache-controller"
39 "fsl,p1022-l2-cache-controller"
40 "fsl,p1023-l2-cache-controller"
41 "fsl,p1024-l2-cache-controller"
42 "fsl,p1025-l2-cache-controller"
43 "fsl,p2010-l2-cache-controller"
44 "fsl,p2020-l2-cache-controller"
45 "fsl,t2080-l2-cache-controller"
46 "fsl,t4240-l2-cache-controller"
47 and "cache".
10- reg : Address and size of L2 cache controller registers 48- reg : Address and size of L2 cache controller registers
11- cache-size : Size of the entire L2 cache 49- cache-size : Size of the entire L2 cache
12- interrupts : Error interrupt of L2 controller 50- interrupts : Error interrupt of L2 controller
diff --git a/Documentation/devicetree/bindings/powerpc/opal/power-mgt.txt b/Documentation/devicetree/bindings/powerpc/opal/power-mgt.txt
new file mode 100644
index 000000000000..9d619e955576
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/opal/power-mgt.txt
@@ -0,0 +1,118 @@
1IBM Power-Management Bindings
2=============================
3
4Linux running on baremetal POWER machines has access to the processor
5idle states. The description of these idle states is exposed via the
6node @power-mgt in the device-tree by the firmware.
7
8Definitions:
9----------------
10Typically each idle state has the following associated properties:
11
12- name: The name of the idle state as defined by the firmware.
13
14- flags: indicating some aspects of this idle states such as the
15 extent of state-loss, whether timebase is stopped on this
16 idle states and so on. The flag bits are as follows:
17
18- exit-latency: The latency involved in transitioning the state of the
19 CPU from idle to running.
20
21- target-residency: The minimum time that the CPU needs to reside in
22 this idle state in order to accrue power-savings
23 benefit.
24
25Properties
26----------------
27The following properties provide details about the idle states. These
28properties are exposed as arrays. Each entry in the property array
29provides the value of that property for the idle state associated with
30the array index of that entry.
31
32If idle-states are defined, then the properties
33"ibm,cpu-idle-state-names" and "ibm,cpu-idle-state-flags" are
34required. The other properties are required unless mentioned
35otherwise. The length of all the property arrays must be the same.
36
37- ibm,cpu-idle-state-names:
38 Array of strings containing the names of the idle states.
39
40- ibm,cpu-idle-state-flags:
41 Array of unsigned 32-bit values containing the values of the
42 flags associated with the the aforementioned idle-states. The
43 flag bits are as follows:
44 0x00000001 /* Decrementer would stop */
45 0x00000002 /* Needs timebase restore */
46 0x00001000 /* Restore GPRs like nap */
47 0x00002000 /* Restore hypervisor resource from PACA pointer */
48 0x00004000 /* Program PORE to restore PACA pointer */
49 0x00010000 /* This is a nap state (POWER7,POWER8) */
50 0x00020000 /* This is a fast-sleep state (POWER8)*/
51 0x00040000 /* This is a winkle state (POWER8) */
52 0x00080000 /* This is a fast-sleep state which requires a */
53 /* software workaround for restoring the */
54 /* timebase (POWER8) */
55 0x00800000 /* This state uses SPR PMICR instruction */
56 /* (POWER8)*/
57 0x00100000 /* This is a fast stop state (POWER9) */
58 0x00200000 /* This is a deep-stop state (POWER9) */
59
60- ibm,cpu-idle-state-latencies-ns:
61 Array of unsigned 32-bit values containing the values of the
62 exit-latencies (in ns) for the idle states in
63 ibm,cpu-idle-state-names.
64
65- ibm,cpu-idle-state-residency-ns:
66 Array of unsigned 32-bit values containing the values of the
67 target-residency (in ns) for the idle states in
68 ibm,cpu-idle-state-names. On POWER8 this is an optional
69 property. If the property is absent, the target residency for
70 the "Nap", "FastSleep" are defined to 10000 and 300000000
71 respectively by the kernel. On POWER9 this property is required.
72
73- ibm,cpu-idle-state-psscr:
74 Array of unsigned 64-bit values containing the values for the
75 PSSCR for each of the idle states in ibm,cpu-idle-state-names.
76 This property is required on POWER9 and absent on POWER8.
77
78- ibm,cpu-idle-state-psscr-mask:
79 Array of unsigned 64-bit values containing the masks
80 indicating which psscr fields are set in the corresponding
81 entries of ibm,cpu-idle-state-psscr. This property is
82 required on POWER9 and absent on POWER8.
83
84 Whenever the firmware sets an entry in
85 ibm,cpu-idle-state-psscr-mask value to 0xf, it implies that
86 only the Requested Level (RL) field of the corresponding entry
87 in ibm,cpu-idle-state-psscr should be considered by the
88 kernel. For such idle states, the kernel would set the
89 remaining fields of the psscr to the following sane-default
90 values.
91
92 - ESL and EC bits are to 1. So wakeup from any stop
93 state will be at vector 0x100.
94
95 - MTL and PSLL are set to the maximum allowed value as
96 per the ISA, i.e. 15.
97
98 - The Transition Rate, TR is set to the Maximum value
99 3.
100
101 For all the other values of the entry in
102 ibm,cpu-idle-state-psscr-mask, the kernel expects all the
103 psscr fields of the corresponding entry in
104 ibm,cpu-idle-state-psscr to be correctly set by the firmware.
105
106- ibm,cpu-idle-state-pmicr:
107 Array of unsigned 64-bit values containing the pmicr values
108 for the idle states in ibm,cpu-idle-state-names. This 64-bit
109 register value is to be set in pmicr for the corresponding
110 state if the flag indicates that pmicr SPR should be set. This
111 is an optional property on POWER8 and is absent on
112 POWER9.
113
114- ibm,cpu-idle-state-pmicr-mask:
115 Array of unsigned 64-bit values containing the mask indicating
116 which of the fields of the PMICR are set in the corresponding
117 entries in ibm,cpu-idle-state-pmicr. This is an optional
118 property on POWER8 and is absent on POWER9.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 03145b7cafaa..4470671b0c26 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3201,6 +3201,71 @@ struct kvm_reinject_control {
3201pit_reinject = 0 (!reinject mode) is recommended, unless running an old 3201pit_reinject = 0 (!reinject mode) is recommended, unless running an old
3202operating system that uses the PIT for timing (e.g. Linux 2.4.x). 3202operating system that uses the PIT for timing (e.g. Linux 2.4.x).
3203 3203
32044.99 KVM_PPC_CONFIGURE_V3_MMU
3205
3206Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
3207Architectures: ppc
3208Type: vm ioctl
3209Parameters: struct kvm_ppc_mmuv3_cfg (in)
3210Returns: 0 on success,
3211 -EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read,
3212 -EINVAL if the configuration is invalid
3213
3214This ioctl controls whether the guest will use radix or HPT (hashed
3215page table) translation, and sets the pointer to the process table for
3216the guest.
3217
3218struct kvm_ppc_mmuv3_cfg {
3219 __u64 flags;
3220 __u64 process_table;
3221};
3222
3223There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and
3224KVM_PPC_MMUV3_GTSE. KVM_PPC_MMUV3_RADIX, if set, configures the guest
3225to use radix tree translation, and if clear, to use HPT translation.
3226KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest
3227to be able to use the global TLB and SLB invalidation instructions;
3228if clear, the guest may not use these instructions.
3229
3230The process_table field specifies the address and size of the guest
3231process table, which is in the guest's space. This field is formatted
3232as the second doubleword of the partition table entry, as defined in
3233the Power ISA V3.00, Book III section 5.7.6.1.
3234
32354.100 KVM_PPC_GET_RMMU_INFO
3236
3237Capability: KVM_CAP_PPC_RADIX_MMU
3238Architectures: ppc
3239Type: vm ioctl
3240Parameters: struct kvm_ppc_rmmu_info (out)
3241Returns: 0 on success,
3242 -EFAULT if struct kvm_ppc_rmmu_info cannot be written,
3243 -EINVAL if no useful information can be returned
3244
3245This ioctl returns a structure containing two things: (a) a list
3246containing supported radix tree geometries, and (b) a list that maps
3247page sizes to put in the "AP" (actual page size) field for the tlbie
3248(TLB invalidate entry) instruction.
3249
3250struct kvm_ppc_rmmu_info {
3251 struct kvm_ppc_radix_geom {
3252 __u8 page_shift;
3253 __u8 level_bits[4];
3254 __u8 pad[3];
3255 } geometries[8];
3256 __u32 ap_encodings[8];
3257};
3258
3259The geometries[] field gives up to 8 supported geometries for the
3260radix page table, in terms of the log base 2 of the smallest page
3261size, and the number of bits indexed at each level of the tree, from
3262the PTE level up to the PGD level in that order. Any unused entries
3263will have 0 in the page_shift field.
3264
3265The ap_encodings gives the supported page sizes and their AP field
3266encodings, encoded with the AP value in the top 3 bits and the log
3267base 2 of the page size in the bottom 6 bits.
3268
32045. The kvm_run structure 32695. The kvm_run structure
3205------------------------ 3270------------------------
3206 3271
@@ -3942,3 +4007,21 @@ In order to use SynIC, it has to be activated by setting this
3942capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this 4007capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
3943will disable the use of APIC hardware virtualization even if supported 4008will disable the use of APIC hardware virtualization even if supported
3944by the CPU, as it's incompatible with SynIC auto-EOI behavior. 4009by the CPU, as it's incompatible with SynIC auto-EOI behavior.
4010
40118.3 KVM_CAP_PPC_RADIX_MMU
4012
4013Architectures: ppc
4014
4015This capability, if KVM_CHECK_EXTENSION indicates that it is
4016available, means that that the kernel can support guests using the
4017radix MMU defined in Power ISA V3.00 (as implemented in the POWER9
4018processor).
4019
40208.4 KVM_CAP_PPC_HASH_MMU_V3
4021
4022Architectures: ppc
4023
4024This capability, if KVM_CHECK_EXTENSION indicates that it is
4025available, means that that the kernel can support guests using the
4026hashed page table MMU defined in Power ISA V3.00 (as implemented in
4027the POWER9 processor), including in-memory segment tables.
diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h
index 42235e7fbeed..5b81ab188aa5 100644
--- a/arch/m68k/include/asm/macintosh.h
+++ b/arch/m68k/include/asm/macintosh.h
@@ -38,7 +38,7 @@ struct mac_model
38 38
39#define MAC_ADB_NONE 0 39#define MAC_ADB_NONE 0
40#define MAC_ADB_II 1 40#define MAC_ADB_II 1
41#define MAC_ADB_IISI 2 41#define MAC_ADB_EGRET 2
42#define MAC_ADB_CUDA 3 42#define MAC_ADB_CUDA 3
43#define MAC_ADB_PB1 4 43#define MAC_ADB_PB1 4
44#define MAC_ADB_PB2 5 44#define MAC_ADB_PB2 5
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index e46895316eb0..9dc65a4c28d2 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -286,7 +286,7 @@ static struct mac_model mac_data_table[] = {
286 }, { 286 }, {
287 .ident = MAC_MODEL_IISI, 287 .ident = MAC_MODEL_IISI,
288 .name = "IIsi", 288 .name = "IIsi",
289 .adb_type = MAC_ADB_IISI, 289 .adb_type = MAC_ADB_EGRET,
290 .via_type = MAC_VIA_IICI, 290 .via_type = MAC_VIA_IICI,
291 .scsi_type = MAC_SCSI_OLD, 291 .scsi_type = MAC_SCSI_OLD,
292 .scc_type = MAC_SCC_II, 292 .scc_type = MAC_SCC_II,
@@ -295,7 +295,7 @@ static struct mac_model mac_data_table[] = {
295 }, { 295 }, {
296 .ident = MAC_MODEL_IIVI, 296 .ident = MAC_MODEL_IIVI,
297 .name = "IIvi", 297 .name = "IIvi",
298 .adb_type = MAC_ADB_IISI, 298 .adb_type = MAC_ADB_EGRET,
299 .via_type = MAC_VIA_IICI, 299 .via_type = MAC_VIA_IICI,
300 .scsi_type = MAC_SCSI_LC, 300 .scsi_type = MAC_SCSI_LC,
301 .scc_type = MAC_SCC_II, 301 .scc_type = MAC_SCC_II,
@@ -304,7 +304,7 @@ static struct mac_model mac_data_table[] = {
304 }, { 304 }, {
305 .ident = MAC_MODEL_IIVX, 305 .ident = MAC_MODEL_IIVX,
306 .name = "IIvx", 306 .name = "IIvx",
307 .adb_type = MAC_ADB_IISI, 307 .adb_type = MAC_ADB_EGRET,
308 .via_type = MAC_VIA_IICI, 308 .via_type = MAC_VIA_IICI,
309 .scsi_type = MAC_SCSI_LC, 309 .scsi_type = MAC_SCSI_LC,
310 .scc_type = MAC_SCC_II, 310 .scc_type = MAC_SCC_II,
@@ -319,7 +319,7 @@ static struct mac_model mac_data_table[] = {
319 { 319 {
320 .ident = MAC_MODEL_CLII, 320 .ident = MAC_MODEL_CLII,
321 .name = "Classic II", 321 .name = "Classic II",
322 .adb_type = MAC_ADB_IISI, 322 .adb_type = MAC_ADB_EGRET,
323 .via_type = MAC_VIA_IICI, 323 .via_type = MAC_VIA_IICI,
324 .scsi_type = MAC_SCSI_LC, 324 .scsi_type = MAC_SCSI_LC,
325 .scc_type = MAC_SCC_II, 325 .scc_type = MAC_SCC_II,
@@ -352,7 +352,7 @@ static struct mac_model mac_data_table[] = {
352 { 352 {
353 .ident = MAC_MODEL_LC, 353 .ident = MAC_MODEL_LC,
354 .name = "LC", 354 .name = "LC",
355 .adb_type = MAC_ADB_IISI, 355 .adb_type = MAC_ADB_EGRET,
356 .via_type = MAC_VIA_IICI, 356 .via_type = MAC_VIA_IICI,
357 .scsi_type = MAC_SCSI_LC, 357 .scsi_type = MAC_SCSI_LC,
358 .scc_type = MAC_SCC_II, 358 .scc_type = MAC_SCC_II,
@@ -361,7 +361,7 @@ static struct mac_model mac_data_table[] = {
361 }, { 361 }, {
362 .ident = MAC_MODEL_LCII, 362 .ident = MAC_MODEL_LCII,
363 .name = "LC II", 363 .name = "LC II",
364 .adb_type = MAC_ADB_IISI, 364 .adb_type = MAC_ADB_EGRET,
365 .via_type = MAC_VIA_IICI, 365 .via_type = MAC_VIA_IICI,
366 .scsi_type = MAC_SCSI_LC, 366 .scsi_type = MAC_SCSI_LC,
367 .scc_type = MAC_SCC_II, 367 .scc_type = MAC_SCC_II,
@@ -370,7 +370,7 @@ static struct mac_model mac_data_table[] = {
370 }, { 370 }, {
371 .ident = MAC_MODEL_LCIII, 371 .ident = MAC_MODEL_LCIII,
372 .name = "LC III", 372 .name = "LC III",
373 .adb_type = MAC_ADB_IISI, 373 .adb_type = MAC_ADB_EGRET,
374 .via_type = MAC_VIA_IICI, 374 .via_type = MAC_VIA_IICI,
375 .scsi_type = MAC_SCSI_LC, 375 .scsi_type = MAC_SCSI_LC,
376 .scc_type = MAC_SCC_II, 376 .scc_type = MAC_SCC_II,
@@ -498,7 +498,7 @@ static struct mac_model mac_data_table[] = {
498 { 498 {
499 .ident = MAC_MODEL_P460, 499 .ident = MAC_MODEL_P460,
500 .name = "Performa 460", 500 .name = "Performa 460",
501 .adb_type = MAC_ADB_IISI, 501 .adb_type = MAC_ADB_EGRET,
502 .via_type = MAC_VIA_IICI, 502 .via_type = MAC_VIA_IICI,
503 .scsi_type = MAC_SCSI_LC, 503 .scsi_type = MAC_SCSI_LC,
504 .scc_type = MAC_SCC_II, 504 .scc_type = MAC_SCC_II,
@@ -575,7 +575,7 @@ static struct mac_model mac_data_table[] = {
575 }, { 575 }, {
576 .ident = MAC_MODEL_P600, 576 .ident = MAC_MODEL_P600,
577 .name = "Performa 600", 577 .name = "Performa 600",
578 .adb_type = MAC_ADB_IISI, 578 .adb_type = MAC_ADB_EGRET,
579 .via_type = MAC_VIA_IICI, 579 .via_type = MAC_VIA_IICI,
580 .scsi_type = MAC_SCSI_LC, 580 .scsi_type = MAC_SCSI_LC,
581 .scc_type = MAC_SCC_II, 581 .scc_type = MAC_SCC_II,
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 3b1f7a6159f8..5b01704c85eb 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -141,54 +141,6 @@ static void pmu_write_pram(int offset, __u8 data)
141#define pmu_write_pram NULL 141#define pmu_write_pram NULL
142#endif 142#endif
143 143
144#if 0 /* def CONFIG_ADB_MACIISI */
145extern int maciisi_request(struct adb_request *req,
146 void (*done)(struct adb_request *), int nbytes, ...);
147
148static long maciisi_read_time(void)
149{
150 struct adb_request req;
151 long time;
152
153 if (maciisi_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME))
154 return 0;
155
156 time = (req.reply[3] << 24) | (req.reply[4] << 16)
157 | (req.reply[5] << 8) | req.reply[6];
158 return time - RTC_OFFSET;
159}
160
161static void maciisi_write_time(long data)
162{
163 struct adb_request req;
164 data += RTC_OFFSET;
165 maciisi_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
166 (data >> 24) & 0xFF, (data >> 16) & 0xFF,
167 (data >> 8) & 0xFF, data & 0xFF);
168}
169
170static __u8 maciisi_read_pram(int offset)
171{
172 struct adb_request req;
173 if (maciisi_request(&req, NULL, 4, CUDA_PACKET, CUDA_GET_PRAM,
174 (offset >> 8) & 0xFF, offset & 0xFF))
175 return 0;
176 return req.reply[3];
177}
178
179static void maciisi_write_pram(int offset, __u8 data)
180{
181 struct adb_request req;
182 maciisi_request(&req, NULL, 5, CUDA_PACKET, CUDA_SET_PRAM,
183 (offset >> 8) & 0xFF, offset & 0xFF, data);
184}
185#else
186#define maciisi_read_time() 0
187#define maciisi_write_time(n)
188#define maciisi_read_pram NULL
189#define maciisi_write_pram NULL
190#endif
191
192/* 144/*
193 * VIA PRAM/RTC access routines 145 * VIA PRAM/RTC access routines
194 * 146 *
@@ -457,11 +409,10 @@ void mac_pram_read(int offset, __u8 *buffer, int len)
457 int i; 409 int i;
458 410
459 switch(macintosh_config->adb_type) { 411 switch(macintosh_config->adb_type) {
460 case MAC_ADB_IISI:
461 func = maciisi_read_pram; break;
462 case MAC_ADB_PB1: 412 case MAC_ADB_PB1:
463 case MAC_ADB_PB2: 413 case MAC_ADB_PB2:
464 func = pmu_read_pram; break; 414 func = pmu_read_pram; break;
415 case MAC_ADB_EGRET:
465 case MAC_ADB_CUDA: 416 case MAC_ADB_CUDA:
466 func = cuda_read_pram; break; 417 func = cuda_read_pram; break;
467 default: 418 default:
@@ -480,11 +431,10 @@ void mac_pram_write(int offset, __u8 *buffer, int len)
480 int i; 431 int i;
481 432
482 switch(macintosh_config->adb_type) { 433 switch(macintosh_config->adb_type) {
483 case MAC_ADB_IISI:
484 func = maciisi_write_pram; break;
485 case MAC_ADB_PB1: 434 case MAC_ADB_PB1:
486 case MAC_ADB_PB2: 435 case MAC_ADB_PB2:
487 func = pmu_write_pram; break; 436 func = pmu_write_pram; break;
437 case MAC_ADB_EGRET:
488 case MAC_ADB_CUDA: 438 case MAC_ADB_CUDA:
489 func = cuda_write_pram; break; 439 func = cuda_write_pram; break;
490 default: 440 default:
@@ -499,17 +449,13 @@ void mac_pram_write(int offset, __u8 *buffer, int len)
499 449
500void mac_poweroff(void) 450void mac_poweroff(void)
501{ 451{
502 /*
503 * MAC_ADB_IISI may need to be moved up here if it doesn't actually
504 * work using the ADB packet method. --David Kilzer
505 */
506
507 if (oss_present) { 452 if (oss_present) {
508 oss_shutdown(); 453 oss_shutdown();
509 } else if (macintosh_config->adb_type == MAC_ADB_II) { 454 } else if (macintosh_config->adb_type == MAC_ADB_II) {
510 via_shutdown(); 455 via_shutdown();
511#ifdef CONFIG_ADB_CUDA 456#ifdef CONFIG_ADB_CUDA
512 } else if (macintosh_config->adb_type == MAC_ADB_CUDA) { 457 } else if (macintosh_config->adb_type == MAC_ADB_EGRET ||
458 macintosh_config->adb_type == MAC_ADB_CUDA) {
513 cuda_shutdown(); 459 cuda_shutdown();
514#endif 460#endif
515#ifdef CONFIG_ADB_PMU68K 461#ifdef CONFIG_ADB_PMU68K
@@ -549,7 +495,8 @@ void mac_reset(void)
549 local_irq_restore(flags); 495 local_irq_restore(flags);
550 } 496 }
551#ifdef CONFIG_ADB_CUDA 497#ifdef CONFIG_ADB_CUDA
552 } else if (macintosh_config->adb_type == MAC_ADB_CUDA) { 498 } else if (macintosh_config->adb_type == MAC_ADB_EGRET ||
499 macintosh_config->adb_type == MAC_ADB_CUDA) {
553 cuda_restart(); 500 cuda_restart();
554#endif 501#endif
555#ifdef CONFIG_ADB_PMU68K 502#ifdef CONFIG_ADB_PMU68K
@@ -698,13 +645,11 @@ int mac_hwclk(int op, struct rtc_time *t)
698 case MAC_ADB_IOP: 645 case MAC_ADB_IOP:
699 now = via_read_time(); 646 now = via_read_time();
700 break; 647 break;
701 case MAC_ADB_IISI:
702 now = maciisi_read_time();
703 break;
704 case MAC_ADB_PB1: 648 case MAC_ADB_PB1:
705 case MAC_ADB_PB2: 649 case MAC_ADB_PB2:
706 now = pmu_read_time(); 650 now = pmu_read_time();
707 break; 651 break;
652 case MAC_ADB_EGRET:
708 case MAC_ADB_CUDA: 653 case MAC_ADB_CUDA:
709 now = cuda_read_time(); 654 now = cuda_read_time();
710 break; 655 break;
@@ -736,6 +681,7 @@ int mac_hwclk(int op, struct rtc_time *t)
736 case MAC_ADB_IOP: 681 case MAC_ADB_IOP:
737 via_write_time(now); 682 via_write_time(now);
738 break; 683 break;
684 case MAC_ADB_EGRET:
739 case MAC_ADB_CUDA: 685 case MAC_ADB_CUDA:
740 cuda_write_time(now); 686 cuda_write_time(now);
741 break; 687 break;
@@ -743,8 +689,6 @@ int mac_hwclk(int op, struct rtc_time *t)
743 case MAC_ADB_PB2: 689 case MAC_ADB_PB2:
744 pmu_write_time(now); 690 pmu_write_time(now);
745 break; 691 break;
746 case MAC_ADB_IISI:
747 maciisi_write_time(now);
748 } 692 }
749 } 693 }
750 return 0; 694 return 0;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 281f4f1fcd1f..8582121d7a45 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -93,12 +93,14 @@ config PPC
93 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL 93 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
94 select HAVE_FUNCTION_TRACER 94 select HAVE_FUNCTION_TRACER
95 select HAVE_FUNCTION_GRAPH_TRACER 95 select HAVE_FUNCTION_GRAPH_TRACER
96 select HAVE_GCC_PLUGINS
96 select SYSCTL_EXCEPTION_TRACE 97 select SYSCTL_EXCEPTION_TRACE
97 select VIRT_TO_BUS if !PPC64 98 select VIRT_TO_BUS if !PPC64
98 select HAVE_IDE 99 select HAVE_IDE
99 select HAVE_IOREMAP_PROT 100 select HAVE_IOREMAP_PROT
100 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) 101 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
101 select HAVE_KPROBES 102 select HAVE_KPROBES
103 select HAVE_OPTPROBES if PPC64
102 select HAVE_ARCH_KGDB 104 select HAVE_ARCH_KGDB
103 select HAVE_KRETPROBES 105 select HAVE_KRETPROBES
104 select HAVE_ARCH_TRACEHOOK 106 select HAVE_ARCH_TRACEHOOK
@@ -164,9 +166,10 @@ config PPC
164 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE 166 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
165 select HAVE_ARCH_HARDENED_USERCOPY 167 select HAVE_ARCH_HARDENED_USERCOPY
166 select HAVE_KERNEL_GZIP 168 select HAVE_KERNEL_GZIP
169 select HAVE_CONTEXT_TRACKING if PPC64
167 170
168config GENERIC_CSUM 171config GENERIC_CSUM
169 def_bool CPU_LITTLE_ENDIAN 172 def_bool n
170 173
171config EARLY_PRINTK 174config EARLY_PRINTK
172 bool 175 bool
@@ -390,8 +393,8 @@ config DISABLE_MPROFILE_KERNEL
390 be disabled also. 393 be disabled also.
391 394
392 If you have a toolchain which supports mprofile-kernel, then you can 395 If you have a toolchain which supports mprofile-kernel, then you can
393 enable this. Otherwise leave it disabled. If you're not sure, say 396 disable this. Otherwise leave it enabled. If you're not sure, say
394 "N". 397 "Y".
395 398
396config MPROFILE_KERNEL 399config MPROFILE_KERNEL
397 depends on PPC64 && CPU_LITTLE_ENDIAN 400 depends on PPC64 && CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 949258d412d0..c86df246339e 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -356,8 +356,7 @@ config FAIL_IOMMU
356 356
357config PPC_PTDUMP 357config PPC_PTDUMP
358 bool "Export kernel pagetable layout to userspace via debugfs" 358 bool "Export kernel pagetable layout to userspace via debugfs"
359 depends on DEBUG_KERNEL 359 depends on DEBUG_KERNEL && DEBUG_FS
360 select DEBUG_FS
361 help 360 help
362 This option exports the state of the kernel pagetables to a 361 This option exports the state of the kernel pagetables to a
363 debugfs file. This is only useful for kernel developers who are 362 debugfs file. This is only useful for kernel developers who are
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index d61c03525777..84774ccba1c2 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -1,4 +1,5 @@
1addnote 1addnote
2decompress_inflate.c
2empty.c 3empty.c
3hack-coff 4hack-coff
4inffast.c 5inffast.c
@@ -13,11 +14,13 @@ infutil.h
13kernel-vmlinux.strip.c 14kernel-vmlinux.strip.c
14kernel-vmlinux.strip.gz 15kernel-vmlinux.strip.gz
15mktree 16mktree
17otheros.bld
16uImage 18uImage
17cuImage.* 19cuImage.*
18dtbImage.* 20dtbImage.*
19*.dtb 21*.dtb
20treeImage.* 22treeImage.*
23vmlinux.strip
21zImage 24zImage
22zImage.initrd 25zImage.initrd
23zImage.bin.* 26zImage.bin.*
@@ -26,6 +29,7 @@ zImage.coff
26zImage.epapr 29zImage.epapr
27zImage.holly 30zImage.holly
28zImage.*lds 31zImage.*lds
32zImage.maple
29zImage.miboot 33zImage.miboot
30zImage.pmac 34zImage.pmac
31zImage.pseries 35zImage.pseries
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index e4d53fe5976a..ac8b8332ed82 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -26,9 +26,11 @@ CONFIG_CGROUP_FREEZER=y
26CONFIG_CPUSETS=y 26CONFIG_CPUSETS=y
27CONFIG_CGROUP_DEVICE=y 27CONFIG_CGROUP_DEVICE=y
28CONFIG_CGROUP_CPUACCT=y 28CONFIG_CGROUP_CPUACCT=y
29CONFIG_CGROUP_BPF=y
29CONFIG_CGROUP_PERF=y 30CONFIG_CGROUP_PERF=y
30CONFIG_USER_NS=y 31CONFIG_USER_NS=y
31CONFIG_BLK_DEV_INITRD=y 32CONFIG_BLK_DEV_INITRD=y
33CONFIG_BPF_SYSCALL=y
32# CONFIG_COMPAT_BRK is not set 34# CONFIG_COMPAT_BRK is not set
33CONFIG_PROFILING=y 35CONFIG_PROFILING=y
34CONFIG_OPROFILE=y 36CONFIG_OPROFILE=y
@@ -79,6 +81,11 @@ CONFIG_NETFILTER=y
79# CONFIG_NETFILTER_ADVANCED is not set 81# CONFIG_NETFILTER_ADVANCED is not set
80CONFIG_BRIDGE=m 82CONFIG_BRIDGE=m
81CONFIG_VLAN_8021Q=m 83CONFIG_VLAN_8021Q=m
84CONFIG_NET_SCHED=y
85CONFIG_NET_CLS_BPF=m
86CONFIG_NET_CLS_ACT=y
87CONFIG_NET_ACT_BPF=m
88CONFIG_BPF_JIT=y
82CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 89CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
83CONFIG_DEVTMPFS=y 90CONFIG_DEVTMPFS=y
84CONFIG_DEVTMPFS_MOUNT=y 91CONFIG_DEVTMPFS_MOUNT=y
@@ -213,10 +220,11 @@ CONFIG_HID_SUNPLUS=y
213CONFIG_USB_HIDDEV=y 220CONFIG_USB_HIDDEV=y
214CONFIG_USB=y 221CONFIG_USB=y
215CONFIG_USB_MON=m 222CONFIG_USB_MON=m
223CONFIG_USB_XHCI_HCD=y
216CONFIG_USB_EHCI_HCD=y 224CONFIG_USB_EHCI_HCD=y
217# CONFIG_USB_EHCI_HCD_PPC_OF is not set 225# CONFIG_USB_EHCI_HCD_PPC_OF is not set
218CONFIG_USB_OHCI_HCD=y 226CONFIG_USB_OHCI_HCD=y
219CONFIG_USB_STORAGE=m 227CONFIG_USB_STORAGE=y
220CONFIG_NEW_LEDS=y 228CONFIG_NEW_LEDS=y
221CONFIG_LEDS_CLASS=m 229CONFIG_LEDS_CLASS=m
222CONFIG_LEDS_POWERNV=m 230CONFIG_LEDS_POWERNV=m
@@ -289,6 +297,7 @@ CONFIG_LOCKUP_DETECTOR=y
289CONFIG_LATENCYTOP=y 297CONFIG_LATENCYTOP=y
290CONFIG_SCHED_TRACER=y 298CONFIG_SCHED_TRACER=y
291CONFIG_BLK_DEV_IO_TRACE=y 299CONFIG_BLK_DEV_IO_TRACE=y
300CONFIG_UPROBE_EVENT=y
292CONFIG_CODE_PATCHING_SELFTEST=y 301CONFIG_CODE_PATCHING_SELFTEST=y
293CONFIG_FTR_FIXUP_SELFTEST=y 302CONFIG_FTR_FIXUP_SELFTEST=y
294CONFIG_MSI_BITMAP_SELFTEST=y 303CONFIG_MSI_BITMAP_SELFTEST=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 0396126ba6a8..4f1288b04303 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -14,7 +14,9 @@ CONFIG_LOG_BUF_SHIFT=18
14CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 14CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
15CONFIG_CGROUPS=y 15CONFIG_CGROUPS=y
16CONFIG_CPUSETS=y 16CONFIG_CPUSETS=y
17CONFIG_CGROUP_BPF=y
17CONFIG_BLK_DEV_INITRD=y 18CONFIG_BLK_DEV_INITRD=y
19CONFIG_BPF_SYSCALL=y
18# CONFIG_COMPAT_BRK is not set 20# CONFIG_COMPAT_BRK is not set
19CONFIG_PROFILING=y 21CONFIG_PROFILING=y
20CONFIG_OPROFILE=y 22CONFIG_OPROFILE=y
@@ -76,6 +78,10 @@ CONFIG_INET_IPCOMP=m
76CONFIG_NETFILTER=y 78CONFIG_NETFILTER=y
77# CONFIG_NETFILTER_ADVANCED is not set 79# CONFIG_NETFILTER_ADVANCED is not set
78CONFIG_BRIDGE=m 80CONFIG_BRIDGE=m
81CONFIG_NET_SCHED=y
82CONFIG_NET_CLS_BPF=m
83CONFIG_NET_CLS_ACT=y
84CONFIG_NET_ACT_BPF=m
79CONFIG_BPF_JIT=y 85CONFIG_BPF_JIT=y
80CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 86CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
81CONFIG_DEVTMPFS=y 87CONFIG_DEVTMPFS=y
@@ -324,6 +330,7 @@ CONFIG_DEBUG_MUTEXES=y
324CONFIG_LATENCYTOP=y 330CONFIG_LATENCYTOP=y
325CONFIG_SCHED_TRACER=y 331CONFIG_SCHED_TRACER=y
326CONFIG_BLK_DEV_IO_TRACE=y 332CONFIG_BLK_DEV_IO_TRACE=y
333CONFIG_UPROBE_EVENT=y
327CONFIG_CODE_PATCHING_SELFTEST=y 334CONFIG_CODE_PATCHING_SELFTEST=y
328CONFIG_FTR_FIXUP_SELFTEST=y 335CONFIG_FTR_FIXUP_SELFTEST=y
329CONFIG_MSI_BITMAP_SELFTEST=y 336CONFIG_MSI_BITMAP_SELFTEST=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 5a06bdde1674..6d0eb02fefa4 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -24,12 +24,14 @@ CONFIG_CGROUP_FREEZER=y
24CONFIG_CGROUP_DEVICE=y 24CONFIG_CGROUP_DEVICE=y
25CONFIG_CPUSETS=y 25CONFIG_CPUSETS=y
26CONFIG_CGROUP_CPUACCT=y 26CONFIG_CGROUP_CPUACCT=y
27CONFIG_CGROUP_BPF=y
27CONFIG_MEMCG=y 28CONFIG_MEMCG=y
28CONFIG_MEMCG_SWAP=y 29CONFIG_MEMCG_SWAP=y
29CONFIG_CGROUP_PERF=y 30CONFIG_CGROUP_PERF=y
30CONFIG_CGROUP_SCHED=y 31CONFIG_CGROUP_SCHED=y
31CONFIG_USER_NS=y 32CONFIG_USER_NS=y
32CONFIG_BLK_DEV_INITRD=y 33CONFIG_BLK_DEV_INITRD=y
34CONFIG_BPF_SYSCALL=y
33# CONFIG_COMPAT_BRK is not set 35# CONFIG_COMPAT_BRK is not set
34CONFIG_PROFILING=y 36CONFIG_PROFILING=y
35CONFIG_OPROFILE=y 37CONFIG_OPROFILE=y
@@ -82,6 +84,11 @@ CONFIG_NETFILTER=y
82# CONFIG_NETFILTER_ADVANCED is not set 84# CONFIG_NETFILTER_ADVANCED is not set
83CONFIG_BRIDGE=m 85CONFIG_BRIDGE=m
84CONFIG_VLAN_8021Q=m 86CONFIG_VLAN_8021Q=m
87CONFIG_NET_SCHED=y
88CONFIG_NET_CLS_BPF=m
89CONFIG_NET_CLS_ACT=y
90CONFIG_NET_ACT_BPF=m
91CONFIG_BPF_JIT=y
85CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 92CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
86CONFIG_DEVTMPFS=y 93CONFIG_DEVTMPFS=y
87CONFIG_DEVTMPFS_MOUNT=y 94CONFIG_DEVTMPFS_MOUNT=y
@@ -289,6 +296,7 @@ CONFIG_LOCKUP_DETECTOR=y
289CONFIG_LATENCYTOP=y 296CONFIG_LATENCYTOP=y
290CONFIG_SCHED_TRACER=y 297CONFIG_SCHED_TRACER=y
291CONFIG_BLK_DEV_IO_TRACE=y 298CONFIG_BLK_DEV_IO_TRACE=y
299CONFIG_UPROBE_EVENT=y
292CONFIG_CODE_PATCHING_SELFTEST=y 300CONFIG_CODE_PATCHING_SELFTEST=y
293CONFIG_FTR_FIXUP_SELFTEST=y 301CONFIG_FTR_FIXUP_SELFTEST=y
294CONFIG_MSI_BITMAP_SELFTEST=y 302CONFIG_MSI_BITMAP_SELFTEST=y
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index ba47c70712f9..f6c5264287e5 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -120,4 +120,6 @@ extern s64 __ashrdi3(s64, int);
120extern int __cmpdi2(s64, s64); 120extern int __cmpdi2(s64, s64);
121extern int __ucmpdi2(u64, u64); 121extern int __ucmpdi2(u64, u64);
122 122
123void _mcount(void);
124
123#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ 125#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 4c935f7504f7..f7b721bbf918 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -33,9 +33,9 @@
33 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) 33 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
34#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) 34#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
35 35
36#ifdef CONFIG_TRANSPARENT_HUGEPAGE 36#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES)
37/* 37/*
38 * only with hash we need to use the second half of pmd page table 38 * only with hash 64k we need to use the second half of pmd page table
39 * to store pointer to deposited pgtable_t 39 * to store pointer to deposited pgtable_t
40 */ 40 */
41#define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1) 41#define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 2e6a823fa502..52d8d1e4b772 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -157,6 +157,7 @@ struct mmu_hash_ops {
157 unsigned long addr, 157 unsigned long addr,
158 unsigned char *hpte_slot_array, 158 unsigned char *hpte_slot_array,
159 int psize, int ssize, int local); 159 int psize, int ssize, int local);
160 int (*resize_hpt)(unsigned long shift);
160 /* 161 /*
161 * Special for kexec. 162 * Special for kexec.
162 * To be called in real mode with interrupts disabled. No locks are 163 * To be called in real mode with interrupts disabled. No locks are
@@ -525,6 +526,9 @@ extern void slb_set_size(u16 size);
525#define ESID_BITS 18 526#define ESID_BITS 18
526#define ESID_BITS_1T 6 527#define ESID_BITS_1T 6
527 528
529#define ESID_BITS_MASK ((1 << ESID_BITS) - 1)
530#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1)
531
528/* 532/*
529 * 256MB segment 533 * 256MB segment
530 * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments 534 * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
@@ -660,9 +664,9 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
660 664
661 if (ssize == MMU_SEGSIZE_256M) 665 if (ssize == MMU_SEGSIZE_256M)
662 return vsid_scramble((context << ESID_BITS) 666 return vsid_scramble((context << ESID_BITS)
663 | (ea >> SID_SHIFT), 256M); 667 | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M);
664 return vsid_scramble((context << ESID_BITS_1T) 668 return vsid_scramble((context << ESID_BITS_1T)
665 | (ea >> SID_SHIFT_1T), 1T); 669 | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T);
666} 670}
667 671
668/* 672/*
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 8afb0e00f7d9..d73e9dfa5237 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -44,10 +44,20 @@ struct patb_entry {
44}; 44};
45extern struct patb_entry *partition_tb; 45extern struct patb_entry *partition_tb;
46 46
47/* Bits in patb0 field */
47#define PATB_HR (1UL << 63) 48#define PATB_HR (1UL << 63)
48#define PATB_GR (1UL << 63)
49#define RPDB_MASK 0x0ffffffffffff00fUL 49#define RPDB_MASK 0x0ffffffffffff00fUL
50#define RPDB_SHIFT (1UL << 8) 50#define RPDB_SHIFT (1UL << 8)
51#define RTS1_SHIFT 61 /* top 2 bits of radix tree size */
52#define RTS1_MASK (3UL << RTS1_SHIFT)
53#define RTS2_SHIFT 5 /* bottom 3 bits of radix tree size */
54#define RTS2_MASK (7UL << RTS2_SHIFT)
55#define RPDS_MASK 0x1f /* root page dir. size field */
56
57/* Bits in patb1 field */
58#define PATB_GR (1UL << 63) /* guest uses radix; must match HR */
59#define PRTS_MASK 0x1f /* process table size field */
60
51/* 61/*
52 * Limit process table to PAGE_SIZE table. This 62 * Limit process table to PAGE_SIZE table. This
53 * also limit the max pid we can support. 63 * also limit the max pid we can support.
@@ -138,5 +148,11 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
138extern int (*register_process_table)(unsigned long base, unsigned long page_size, 148extern int (*register_process_table)(unsigned long base, unsigned long page_size,
139 unsigned long tbl_size); 149 unsigned long tbl_size);
140 150
151#ifdef CONFIG_PPC_PSERIES
152extern void radix_init_pseries(void);
153#else
154static inline void radix_init_pseries(void) { };
155#endif
156
141#endif /* __ASSEMBLY__ */ 157#endif /* __ASSEMBLY__ */
142#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ 158#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index 9db83b4e017d..8708a0239a56 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -47,7 +47,12 @@ static inline int hugepd_ok(hugepd_t hpd)
47 return hash__hugepd_ok(hpd); 47 return hash__hugepd_ok(hpd);
48} 48}
49#define is_hugepd(hpd) (hugepd_ok(hpd)) 49#define is_hugepd(hpd) (hugepd_ok(hpd))
50
51#else /* !CONFIG_HUGETLB_PAGE */
52static inline int pmd_huge(pmd_t pmd) { return 0; }
53static inline int pud_huge(pud_t pud) { return 0; }
50#endif /* CONFIG_HUGETLB_PAGE */ 54#endif /* CONFIG_HUGETLB_PAGE */
55
51#endif /* __ASSEMBLY__ */ 56#endif /* __ASSEMBLY__ */
52 57
53#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */ 58#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index 0d2845b44763..2ce4209399ed 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -35,10 +35,6 @@ static inline int pgd_huge(pgd_t pgd)
35} 35}
36#define pgd_huge pgd_huge 36#define pgd_huge pgd_huge
37 37
38#ifdef CONFIG_DEBUG_VM
39extern int hugepd_ok(hugepd_t hpd);
40#define is_hugepd(hpd) (hugepd_ok(hpd))
41#else
42/* 38/*
43 * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't 39 * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
44 * need to setup hugepage directory for them. Our pte and page directory format 40 * need to setup hugepage directory for them. Our pte and page directory format
@@ -49,8 +45,10 @@ static inline int hugepd_ok(hugepd_t hpd)
49 return 0; 45 return 0;
50} 46}
51#define is_hugepd(pdep) 0 47#define is_hugepd(pdep) 0
52#endif /* CONFIG_DEBUG_VM */
53 48
49#else /* !CONFIG_HUGETLB_PAGE */
50static inline int pmd_huge(pmd_t pmd) { return 0; }
51static inline int pud_huge(pud_t pud) { return 0; }
54#endif /* CONFIG_HUGETLB_PAGE */ 52#endif /* CONFIG_HUGETLB_PAGE */
55 53
56static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, 54static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 5905f0ff57d1..fef738229a68 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -371,6 +371,23 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
371 return __pte(old); 371 return __pte(old);
372} 372}
373 373
374#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
375static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
376 unsigned long addr,
377 pte_t *ptep, int full)
378{
379 if (full && radix_enabled()) {
380 /*
381 * Let's skip the DD1 style pte update here. We know that
382 * this is a full mm pte clear and hence can be sure there is
383 * no parallel set_pte.
384 */
385 return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
386 }
387 return ptep_get_and_clear(mm, addr, ptep);
388}
389
390
374static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 391static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
375 pte_t * ptep) 392 pte_t * ptep)
376{ 393{
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index b4d1302387a3..9e0bb7cd6e22 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -139,30 +139,43 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
139 139
140 unsigned long new_pte; 140 unsigned long new_pte;
141 141
142 old_pte = __radix_pte_update(ptep, ~0, 0); 142 old_pte = __radix_pte_update(ptep, ~0ul, 0);
143 /* 143 /*
144 * new value of pte 144 * new value of pte
145 */ 145 */
146 new_pte = (old_pte | set) & ~clr; 146 new_pte = (old_pte | set) & ~clr;
147 /* 147 radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr);
148 * If we are trying to clear the pte, we can skip 148 if (new_pte)
149 * the below sequence and batch the tlb flush. The
150 * tlb flush batching is done by mmu gather code
151 */
152 if (new_pte) {
153 asm volatile("ptesync" : : : "memory");
154 radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr);
155 __radix_pte_update(ptep, 0, new_pte); 149 __radix_pte_update(ptep, 0, new_pte);
156 }
157 } else 150 } else
158 old_pte = __radix_pte_update(ptep, clr, set); 151 old_pte = __radix_pte_update(ptep, clr, set);
159 asm volatile("ptesync" : : : "memory");
160 if (!huge) 152 if (!huge)
161 assert_pte_locked(mm, addr); 153 assert_pte_locked(mm, addr);
162 154
163 return old_pte; 155 return old_pte;
164} 156}
165 157
158static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
159 unsigned long addr,
160 pte_t *ptep, int full)
161{
162 unsigned long old_pte;
163
164 if (full) {
165 /*
166 * If we are trying to clear the pte, we can skip
167 * the DD1 pte update sequence and batch the tlb flush. The
168 * tlb flush batching is done by mmu gather code. We
169 * still keep the cmp_xchg update to make sure we get
170 * correct R/C bit which might be updated via Nest MMU.
171 */
172 old_pte = __radix_pte_update(ptep, ~0ul, 0);
173 } else
174 old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
175
176 return __pte(old_pte);
177}
178
166/* 179/*
167 * Set the dirty and/or accessed bits atomically in a linux PTE, this 180 * Set the dirty and/or accessed bits atomically in a linux PTE, this
168 * function doesn't need to invalidate tlb. 181 * function doesn't need to invalidate tlb.
@@ -180,7 +193,6 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
180 unsigned long old_pte, new_pte; 193 unsigned long old_pte, new_pte;
181 194
182 old_pte = __radix_pte_update(ptep, ~0, 0); 195 old_pte = __radix_pte_update(ptep, ~0, 0);
183 asm volatile("ptesync" : : : "memory");
184 /* 196 /*
185 * new value of pte 197 * new value of pte
186 */ 198 */
@@ -291,5 +303,10 @@ static inline unsigned long radix__get_tree_size(void)
291 } 303 }
292 return rts_field; 304 return rts_field;
293} 305}
306
307#ifdef CONFIG_MEMORY_HOTPLUG
308int radix__create_section_mapping(unsigned long start, unsigned long end);
309int radix__remove_section_mapping(unsigned long start, unsigned long end);
310#endif /* CONFIG_MEMORY_HOTPLUG */
294#endif /* __ASSEMBLY__ */ 311#endif /* __ASSEMBLY__ */
295#endif 312#endif
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 7657aa897a38..5a90292afbad 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -30,15 +30,22 @@
30#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) 30#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
31 31
32#if defined(__powerpc64__) && !defined(__ASSEMBLY__) 32#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
33
34struct ppc_cache_info {
35 u32 size;
36 u32 line_size;
37 u32 block_size; /* L1 only */
38 u32 log_block_size;
39 u32 blocks_per_page;
40 u32 sets;
41 u32 assoc;
42};
43
33struct ppc64_caches { 44struct ppc64_caches {
34 u32 dsize; /* L1 d-cache size */ 45 struct ppc_cache_info l1d;
35 u32 dline_size; /* L1 d-cache line size */ 46 struct ppc_cache_info l1i;
36 u32 log_dline_size; 47 struct ppc_cache_info l2;
37 u32 dlines_per_page; 48 struct ppc_cache_info l3;
38 u32 isize; /* L1 i-cache size */
39 u32 iline_size; /* L1 i-cache line size */
40 u32 log_iline_size;
41 u32 ilines_per_page;
42}; 49};
43 50
44extern struct ppc64_caches ppc64_caches; 51extern struct ppc64_caches ppc64_caches;
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 1e8fceb308a5..4e63787dc3be 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -53,17 +53,29 @@ static inline __sum16 csum_fold(__wsum sum)
53 return (__force __sum16)(~((__force u32)sum + tmp) >> 16); 53 return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
54} 54}
55 55
56static inline u32 from64to32(u64 x)
57{
58 /* add up 32-bit and 32-bit for 32+c bit */
59 x = (x & 0xffffffff) + (x >> 32);
60 /* add up carry.. */
61 x = (x & 0xffffffff) + (x >> 32);
62 return (u32)x;
63}
64
56static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, 65static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
57 __u8 proto, __wsum sum) 66 __u8 proto, __wsum sum)
58{ 67{
59#ifdef __powerpc64__ 68#ifdef __powerpc64__
60 unsigned long s = (__force u32)sum; 69 u64 s = (__force u32)sum;
61 70
62 s += (__force u32)saddr; 71 s += (__force u32)saddr;
63 s += (__force u32)daddr; 72 s += (__force u32)daddr;
73#ifdef __BIG_ENDIAN__
64 s += proto + len; 74 s += proto + len;
65 s += (s >> 32); 75#else
66 return (__force __wsum) s; 76 s += (proto + len) << 8;
77#endif
78 return (__force __wsum) from64to32(s);
67#else 79#else
68 __asm__("\n\ 80 __asm__("\n\
69 addc %0,%0,%1 \n\ 81 addc %0,%0,%1 \n\
@@ -123,8 +135,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
123 135
124 for (i = 0; i < ihl - 1; i++, ptr++) 136 for (i = 0; i < ihl - 1; i++, ptr++)
125 s += *ptr; 137 s += *ptr;
126 s += (s >> 32); 138 return (__force __wsum)from64to32(s);
127 return (__force __wsum)s;
128#else 139#else
129 __wsum sum, tmp; 140 __wsum sum, tmp;
130 141
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 2015b072422c..8ab937771068 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,7 @@
22#define BRANCH_SET_LINK 0x1 22#define BRANCH_SET_LINK 0x1
23#define BRANCH_ABSOLUTE 0x2 23#define BRANCH_ABSOLUTE 0x2
24 24
25bool is_offset_in_branch_range(long offset);
25unsigned int create_branch(const unsigned int *addr, 26unsigned int create_branch(const unsigned int *addr,
26 unsigned long target, int flags); 27 unsigned long target, int flags);
27unsigned int create_cond_branch(const unsigned int *addr, 28unsigned int create_cond_branch(const unsigned int *addr,
@@ -34,6 +35,7 @@ int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
34unsigned long branch_target(const unsigned int *instr); 35unsigned long branch_target(const unsigned int *instr);
35unsigned int translate_branch(const unsigned int *dest, 36unsigned int translate_branch(const unsigned int *dest,
36 const unsigned int *src); 37 const unsigned int *src);
38extern bool is_conditional_branch(unsigned int instr);
37#ifdef CONFIG_PPC_BOOK3E_64 39#ifdef CONFIG_PPC_BOOK3E_64
38void __patch_exception(int exc, unsigned long addr); 40void __patch_exception(int exc, unsigned long addr);
39#define patch_exception(exc, name) do { \ 41#define patch_exception(exc, name) do { \
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 3919332965af..fd321eb423cb 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -10,18 +10,62 @@
10#define PNV_CORE_IDLE_LOCK_BIT 0x100 10#define PNV_CORE_IDLE_LOCK_BIT 0x100
11#define PNV_CORE_IDLE_THREAD_BITS 0x0FF 11#define PNV_CORE_IDLE_THREAD_BITS 0x0FF
12 12
13/*
14 * ============================ NOTE =================================
15 * The older firmware populates only the RL field in the psscr_val and
16 * sets the psscr_mask to 0xf. On such a firmware, the kernel sets the
17 * remaining PSSCR fields to default values as follows:
18 *
19 * - ESL and EC bits are to 1. So wakeup from any stop state will be
20 * at vector 0x100.
21 *
22 * - MTL and PSLL are set to the maximum allowed value as per the ISA,
23 * i.e. 15.
24 *
25 * - The Transition Rate, TR is set to the Maximum value 3.
26 */
27#define PSSCR_HV_DEFAULT_VAL (PSSCR_ESL | PSSCR_EC | \
28 PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
29 PSSCR_MTL_MASK)
30
31#define PSSCR_HV_DEFAULT_MASK (PSSCR_ESL | PSSCR_EC | \
32 PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
33 PSSCR_MTL_MASK | PSSCR_RL_MASK)
34#define PSSCR_EC_SHIFT 20
35#define PSSCR_ESL_SHIFT 21
36#define GET_PSSCR_EC(x) (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT)
37#define GET_PSSCR_ESL(x) (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT)
38#define GET_PSSCR_RL(x) ((x) & PSSCR_RL_MASK)
39
40#define ERR_EC_ESL_MISMATCH -1
41#define ERR_DEEP_STATE_ESL_MISMATCH -2
42
13#ifndef __ASSEMBLY__ 43#ifndef __ASSEMBLY__
14extern u32 pnv_fastsleep_workaround_at_entry[]; 44extern u32 pnv_fastsleep_workaround_at_entry[];
15extern u32 pnv_fastsleep_workaround_at_exit[]; 45extern u32 pnv_fastsleep_workaround_at_exit[];
16 46
17extern u64 pnv_first_deep_stop_state; 47extern u64 pnv_first_deep_stop_state;
48
49int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
50static inline void report_invalid_psscr_val(u64 psscr_val, int err)
51{
52 switch (err) {
53 case ERR_EC_ESL_MISMATCH:
54 pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal",
55 psscr_val);
56 break;
57 case ERR_DEEP_STATE_ESL_MISMATCH:
58 pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state",
59 psscr_val);
60 }
61}
18#endif 62#endif
19 63
20#endif 64#endif
21 65
22/* Idle state entry routines */ 66/* Idle state entry routines */
23#ifdef CONFIG_PPC_P7_NAP 67#ifdef CONFIG_PPC_P7_NAP
24#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ 68#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
25 /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ 69 /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
26 std r0,0(r1); \ 70 std r0,0(r1); \
27 ptesync; \ 71 ptesync; \
@@ -29,6 +73,9 @@ extern u64 pnv_first_deep_stop_state;
291: cmpd cr0,r0,r0; \ 731: cmpd cr0,r0,r0; \
30 bne 1b; \ 74 bne 1b; \
31 IDLE_INST; \ 75 IDLE_INST; \
76
77#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
78 IDLE_STATE_ENTER_SEQ(IDLE_INST) \
32 b . 79 b .
33#endif /* CONFIG_PPC_P7_NAP */ 80#endif /* CONFIG_PPC_P7_NAP */
34 81
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index ee46ffef608e..93b9b84568e8 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -136,4 +136,46 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
136 136
137#endif /* CONFIG_SPU_BASE */ 137#endif /* CONFIG_SPU_BASE */
138 138
139#ifdef CONFIG_PPC64
140
141#define get_cache_geometry(level) \
142 (ppc64_caches.level.assoc << 16 | ppc64_caches.level.line_size)
143
144#define ARCH_DLINFO_CACHE_GEOMETRY \
145 NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \
146 NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \
147 NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \
148 NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \
149 NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \
150 NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \
151 NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \
152 NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, get_cache_geometry(l3))
153
154#else
155#define ARCH_DLINFO_CACHE_GEOMETRY
156#endif
157
158/*
159 * The requirements here are:
160 * - keep the final alignment of sp (sp & 0xf)
161 * - make sure the 32-bit value at the first 16 byte aligned position of
162 * AUXV is greater than 16 for glibc compatibility.
163 * AT_IGNOREPPC is used for that.
164 * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
165 * even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
166 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes
167 */
168#define ARCH_DLINFO \
169do { \
170 /* Handle glibc compatibility. */ \
171 NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
172 NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
173 /* Cache size items */ \
174 NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \
175 NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \
176 NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \
177 VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \
178 ARCH_DLINFO_CACHE_GEOMETRY; \
179} while (0)
180
139#endif /* _ASM_POWERPC_ELF_H */ 181#endif /* _ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a3eee661297..14752eee3d0c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -97,6 +97,15 @@
97 ld reg,PACAKBASE(r13); \ 97 ld reg,PACAKBASE(r13); \
98 ori reg,reg,(ABS_ADDR(label))@l; 98 ori reg,reg,(ABS_ADDR(label))@l;
99 99
100/*
101 * Branches from unrelocated code (e.g., interrupts) to labels outside
102 * head-y require >64K offsets.
103 */
104#define __LOAD_FAR_HANDLER(reg, label) \
105 ld reg,PACAKBASE(r13); \
106 ori reg,reg,(ABS_ADDR(label))@l; \
107 addis reg,reg,(ABS_ADDR(label))@h;
108
100/* Exception register prefixes */ 109/* Exception register prefixes */
101#define EXC_HV H 110#define EXC_HV H
102#define EXC_STD 111#define EXC_STD
@@ -227,13 +236,49 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
227 mtctr reg; \ 236 mtctr reg; \
228 bctr 237 bctr
229 238
239#define BRANCH_LINK_TO_FAR(reg, label) \
240 __LOAD_FAR_HANDLER(reg, label); \
241 mtctr reg; \
242 bctrl
243
244/*
245 * KVM requires __LOAD_FAR_HANDLER.
246 *
247 * __BRANCH_TO_KVM_EXIT branches are also a special case because they
248 * explicitly use r9 then reload it from PACA before branching. Hence
249 * the double-underscore.
250 */
251#define __BRANCH_TO_KVM_EXIT(area, label) \
252 mfctr r9; \
253 std r9,HSTATE_SCRATCH1(r13); \
254 __LOAD_FAR_HANDLER(r9, label); \
255 mtctr r9; \
256 ld r9,area+EX_R9(r13); \
257 bctr
258
259#define BRANCH_TO_KVM(reg, label) \
260 __LOAD_FAR_HANDLER(reg, label); \
261 mtctr reg; \
262 bctr
263
230#else 264#else
231#define BRANCH_TO_COMMON(reg, label) \ 265#define BRANCH_TO_COMMON(reg, label) \
232 b label 266 b label
233 267
268#define BRANCH_LINK_TO_FAR(reg, label) \
269 bl label
270
271#define BRANCH_TO_KVM(reg, label) \
272 b label
273
274#define __BRANCH_TO_KVM_EXIT(area, label) \
275 ld r9,area+EX_R9(r13); \
276 b label
277
234#endif 278#endif
235 279
236#define __KVM_HANDLER_PROLOG(area, n) \ 280
281#define __KVM_HANDLER(area, h, n) \
237 BEGIN_FTR_SECTION_NESTED(947) \ 282 BEGIN_FTR_SECTION_NESTED(947) \
238 ld r10,area+EX_CFAR(r13); \ 283 ld r10,area+EX_CFAR(r13); \
239 std r10,HSTATE_CFAR(r13); \ 284 std r10,HSTATE_CFAR(r13); \
@@ -243,30 +288,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
243 std r10,HSTATE_PPR(r13); \ 288 std r10,HSTATE_PPR(r13); \
244 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 289 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
245 ld r10,area+EX_R10(r13); \ 290 ld r10,area+EX_R10(r13); \
246 stw r9,HSTATE_SCRATCH1(r13); \
247 ld r9,area+EX_R9(r13); \
248 std r12,HSTATE_SCRATCH0(r13); \ 291 std r12,HSTATE_SCRATCH0(r13); \
249 292 sldi r12,r9,32; \
250#define __KVM_HANDLER(area, h, n) \ 293 ori r12,r12,(n); \
251 __KVM_HANDLER_PROLOG(area, n) \ 294 /* This reloads r9 before branching to kvmppc_interrupt */ \
252 li r12,n; \ 295 __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt)
253 b kvmppc_interrupt
254 296
255#define __KVM_HANDLER_SKIP(area, h, n) \ 297#define __KVM_HANDLER_SKIP(area, h, n) \
256 cmpwi r10,KVM_GUEST_MODE_SKIP; \ 298 cmpwi r10,KVM_GUEST_MODE_SKIP; \
257 ld r10,area+EX_R10(r13); \
258 beq 89f; \ 299 beq 89f; \
259 stw r9,HSTATE_SCRATCH1(r13); \
260 BEGIN_FTR_SECTION_NESTED(948) \ 300 BEGIN_FTR_SECTION_NESTED(948) \
261 ld r9,area+EX_PPR(r13); \ 301 ld r10,area+EX_PPR(r13); \
262 std r9,HSTATE_PPR(r13); \ 302 std r10,HSTATE_PPR(r13); \
263 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 303 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
264 ld r9,area+EX_R9(r13); \ 304 ld r10,area+EX_R10(r13); \
265 std r12,HSTATE_SCRATCH0(r13); \ 305 std r12,HSTATE_SCRATCH0(r13); \
266 li r12,n; \ 306 sldi r12,r9,32; \
267 b kvmppc_interrupt; \ 307 ori r12,r12,(n); \
308 /* This reloads r9 before branching to kvmppc_interrupt */ \
309 __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \
26889: mtocrf 0x80,r9; \ 31089: mtocrf 0x80,r9; \
269 ld r9,area+EX_R9(r13); \ 311 ld r9,area+EX_R9(r13); \
312 ld r10,area+EX_R10(r13); \
270 b kvmppc_skip_##h##interrupt 313 b kvmppc_skip_##h##interrupt
271 314
272#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 315#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
@@ -393,12 +436,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
393 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) 436 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD)
394 437
395#define STD_RELON_EXCEPTION_HV(loc, vec, label) \ 438#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
396 /* No guest interrupts come through here */ \
397 SET_SCRATCH0(r13); /* save r13 */ \ 439 SET_SCRATCH0(r13); /* save r13 */ \
398 EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec); 440 EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \
441 EXC_HV, KVMTEST_HV, vec);
399 442
400#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ 443#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
401 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ 444 EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \
402 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) 445 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
403 446
404/* This associate vector numbers with bits in paca->irq_happened */ 447/* This associate vector numbers with bits in paca->irq_happened */
@@ -475,10 +518,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
475 518
476#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ 519#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
477 _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ 520 _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
478 EXC_HV, SOFTEN_NOTEST_HV) 521 EXC_HV, SOFTEN_TEST_HV)
479 522
480#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ 523#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
481 EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \ 524 EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
482 EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) 525 EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
483 526
484/* 527/*
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 1e0b5a5d660a..8645897472b1 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -42,7 +42,7 @@
42#define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000) 42#define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000)
43#define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000) 43#define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000)
44#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) 44#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
45/* Free ASM_CONST(0x0000000001000000) */ 45#define FW_FEATURE_HPT_RESIZE ASM_CONST(0x0000000001000000)
46#define FW_FEATURE_CMO ASM_CONST(0x0000000002000000) 46#define FW_FEATURE_CMO ASM_CONST(0x0000000002000000)
47#define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000) 47#define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000)
48#define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000) 48#define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000)
@@ -66,7 +66,8 @@ enum {
66 FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | 66 FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
67 FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | 67 FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
68 FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | 68 FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
69 FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN, 69 FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
70 FW_FEATURE_HPT_RESIZE,
70 FW_FEATURE_PSERIES_ALWAYS = 0, 71 FW_FEATURE_PSERIES_ALWAYS = 0,
71 FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, 72 FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
72 FW_FEATURE_POWERNV_ALWAYS = 0, 73 FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index fca7033839a9..5067048daad4 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -38,8 +38,8 @@
38 * li r10,128 38 * li r10,128
39 * mv r11,r10 39 * mv r11,r10
40 40
41 * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address) 41 * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address, size)
42 * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, end_address) 42 * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, start_address, size)
43 * CLOSE_FIXED_SECTION(section_name) 43 * CLOSE_FIXED_SECTION(section_name)
44 * 44 *
45 * ZERO_FIXED_SECTION can be used to emit zeroed data. 45 * ZERO_FIXED_SECTION can be used to emit zeroed data.
@@ -102,9 +102,15 @@ name:
102#define FIXED_SECTION_ENTRY_BEGIN(sname, name) \ 102#define FIXED_SECTION_ENTRY_BEGIN(sname, name) \
103 __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES) 103 __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES)
104 104
105#define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \ 105#define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start, size) \
106 USE_FIXED_SECTION(sname); \ 106 USE_FIXED_SECTION(sname); \
107 name##_start = (start); \ 107 name##_start = (start); \
108 .if ((start) % (size) != 0); \
109 .error "Fixed section exception vector misalignment"; \
110 .endif; \
111 .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100); \
112 .error "Fixed section exception vector bad size"; \
113 .endif; \
108 .if (start) < sname##_start; \ 114 .if (start) < sname##_start; \
109 .error "Fixed section underflow"; \ 115 .error "Fixed section underflow"; \
110 .abort; \ 116 .abort; \
@@ -113,16 +119,16 @@ name:
113 .global name; \ 119 .global name; \
114name: 120name:
115 121
116#define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, end) \ 122#define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, start, size) \
117 .if (end) > sname##_end; \ 123 .if (start) + (size) > sname##_end; \
118 .error "Fixed section overflow"; \ 124 .error "Fixed section overflow"; \
119 .abort; \ 125 .abort; \
120 .endif; \ 126 .endif; \
121 .if (. - name > end - name##_start); \ 127 .if (. - name > (start) + (size) - name##_start); \
122 .error "Fixed entry overflow"; \ 128 .error "Fixed entry overflow"; \
123 .abort; \ 129 .abort; \
124 .endif; \ 130 .endif; \
125 . = ((end) - sname##_start); \ 131 . = ((start) + (size) - sname##_start); \
126 132
127 133
128/* 134/*
@@ -147,12 +153,12 @@ name:
147 * Following are the BOOK3S exception handler helper macros. 153 * Following are the BOOK3S exception handler helper macros.
148 * Handlers come in a number of types, and each type has a number of varieties. 154 * Handlers come in a number of types, and each type has a number of varieties.
149 * 155 *
150 * EXC_REAL_* - real, unrelocated exception vectors 156 * EXC_REAL_* - real, unrelocated exception vectors
151 * EXC_VIRT_* - virt (AIL), unrelocated exception vectors 157 * EXC_VIRT_* - virt (AIL), unrelocated exception vectors
152 * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) 158 * TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
153 * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) 159 * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
154 * TRAMP_KVM - KVM handlers that get put into real, unrelocated 160 * TRAMP_KVM - KVM handlers that get put into real, unrelocated
155 * EXC_COMMON_* - virt, relocated common handlers 161 * EXC_COMMON_* - virt, relocated common handlers
156 * 162 *
157 * The EXC handlers are given a name, and branch to name_common, or the 163 * The EXC handlers are given a name, and branch to name_common, or the
158 * appropriate KVM or masking function. Vector handler verieties are as 164 * appropriate KVM or masking function. Vector handler verieties are as
@@ -191,23 +197,23 @@ name:
191 * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. 197 * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
192 */ 198 */
193 199
194#define EXC_REAL_BEGIN(name, start, end) \ 200#define EXC_REAL_BEGIN(name, start, size) \
195 FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start) 201 FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
196 202
197#define EXC_REAL_END(name, start, end) \ 203#define EXC_REAL_END(name, start, size) \
198 FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, end) 204 FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
199 205
200#define EXC_VIRT_BEGIN(name, start, end) \ 206#define EXC_VIRT_BEGIN(name, start, size) \
201 FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start) 207 FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
202 208
203#define EXC_VIRT_END(name, start, end) \ 209#define EXC_VIRT_END(name, start, size) \
204 FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end) 210 FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
205 211
206#define EXC_COMMON_BEGIN(name) \ 212#define EXC_COMMON_BEGIN(name) \
207 USE_TEXT_SECTION(); \ 213 USE_TEXT_SECTION(); \
208 .balign IFETCH_ALIGN_BYTES; \ 214 .balign IFETCH_ALIGN_BYTES; \
209 .global name; \ 215 .global name; \
210 DEFINE_FIXED_SYMBOL(name); \ 216 DEFINE_FIXED_SYMBOL(name); \
211name: 217name:
212 218
213#define TRAMP_REAL_BEGIN(name) \ 219#define TRAMP_REAL_BEGIN(name) \
@@ -217,147 +223,147 @@ name:
217 FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name) 223 FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
218 224
219#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 225#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
220#define TRAMP_KVM_BEGIN(name) \ 226#define TRAMP_KVM_BEGIN(name) \
221 TRAMP_REAL_BEGIN(name) 227 TRAMP_VIRT_BEGIN(name)
222#else 228#else
223#define TRAMP_KVM_BEGIN(name) 229#define TRAMP_KVM_BEGIN(name)
224#endif 230#endif
225 231
226#define EXC_REAL_NONE(start, end) \ 232#define EXC_REAL_NONE(start, size) \
227 FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start); \ 233 FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
228 FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, end) 234 FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
229 235
230#define EXC_VIRT_NONE(start, end) \ 236#define EXC_VIRT_NONE(start, size) \
231 FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start); \ 237 FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
232 FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, end); 238 FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size);
233 239
234 240
235#define EXC_REAL(name, start, end) \ 241#define EXC_REAL(name, start, size) \
236 EXC_REAL_BEGIN(name, start, end); \ 242 EXC_REAL_BEGIN(name, start, size); \
237 STD_EXCEPTION_PSERIES(start, name##_common); \ 243 STD_EXCEPTION_PSERIES(start, name##_common); \
238 EXC_REAL_END(name, start, end); 244 EXC_REAL_END(name, start, size);
239 245
240#define EXC_VIRT(name, start, end, realvec) \ 246#define EXC_VIRT(name, start, size, realvec) \
241 EXC_VIRT_BEGIN(name, start, end); \ 247 EXC_VIRT_BEGIN(name, start, size); \
242 STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ 248 STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
243 EXC_VIRT_END(name, start, end); 249 EXC_VIRT_END(name, start, size);
244 250
245#define EXC_REAL_MASKABLE(name, start, end) \ 251#define EXC_REAL_MASKABLE(name, start, size) \
246 EXC_REAL_BEGIN(name, start, end); \ 252 EXC_REAL_BEGIN(name, start, size); \
247 MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \ 253 MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \
248 EXC_REAL_END(name, start, end); 254 EXC_REAL_END(name, start, size);
249 255
250#define EXC_VIRT_MASKABLE(name, start, end, realvec) \ 256#define EXC_VIRT_MASKABLE(name, start, size, realvec) \
251 EXC_VIRT_BEGIN(name, start, end); \ 257 EXC_VIRT_BEGIN(name, start, size); \
252 MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ 258 MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
253 EXC_VIRT_END(name, start, end); 259 EXC_VIRT_END(name, start, size);
254 260
255#define EXC_REAL_HV(name, start, end) \ 261#define EXC_REAL_HV(name, start, size) \
256 EXC_REAL_BEGIN(name, start, end); \ 262 EXC_REAL_BEGIN(name, start, size); \
257 STD_EXCEPTION_HV(start, start, name##_common); \ 263 STD_EXCEPTION_HV(start, start, name##_common); \
258 EXC_REAL_END(name, start, end); 264 EXC_REAL_END(name, start, size);
259 265
260#define EXC_VIRT_HV(name, start, end, realvec) \ 266#define EXC_VIRT_HV(name, start, size, realvec) \
261 EXC_VIRT_BEGIN(name, start, end); \ 267 EXC_VIRT_BEGIN(name, start, size); \
262 STD_RELON_EXCEPTION_HV(start, realvec, name##_common); \ 268 STD_RELON_EXCEPTION_HV(start, realvec, name##_common); \
263 EXC_VIRT_END(name, start, end); 269 EXC_VIRT_END(name, start, size);
264 270
265#define __EXC_REAL_OOL(name, start, end) \ 271#define __EXC_REAL_OOL(name, start, size) \
266 EXC_REAL_BEGIN(name, start, end); \ 272 EXC_REAL_BEGIN(name, start, size); \
267 __OOL_EXCEPTION(start, label, tramp_real_##name); \ 273 __OOL_EXCEPTION(start, label, tramp_real_##name); \
268 EXC_REAL_END(name, start, end); 274 EXC_REAL_END(name, start, size);
269 275
270#define __TRAMP_REAL_REAL_OOL(name, vec) \ 276#define __TRAMP_REAL_OOL(name, vec) \
271 TRAMP_REAL_BEGIN(tramp_real_##name); \ 277 TRAMP_REAL_BEGIN(tramp_real_##name); \
272 STD_EXCEPTION_PSERIES_OOL(vec, name##_common); \ 278 STD_EXCEPTION_PSERIES_OOL(vec, name##_common); \
273 279
274#define EXC_REAL_OOL(name, start, end) \ 280#define EXC_REAL_OOL(name, start, size) \
275 __EXC_REAL_OOL(name, start, end); \ 281 __EXC_REAL_OOL(name, start, size); \
276 __TRAMP_REAL_REAL_OOL(name, start); 282 __TRAMP_REAL_OOL(name, start);
277 283
278#define __EXC_REAL_OOL_MASKABLE(name, start, end) \ 284#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
279 __EXC_REAL_OOL(name, start, end); 285 __EXC_REAL_OOL(name, start, size);
280 286
281#define __TRAMP_REAL_REAL_OOL_MASKABLE(name, vec) \ 287#define __TRAMP_REAL_OOL_MASKABLE(name, vec) \
282 TRAMP_REAL_BEGIN(tramp_real_##name); \ 288 TRAMP_REAL_BEGIN(tramp_real_##name); \
283 MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \ 289 MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \
284 290
285#define EXC_REAL_OOL_MASKABLE(name, start, end) \ 291#define EXC_REAL_OOL_MASKABLE(name, start, size) \
286 __EXC_REAL_OOL_MASKABLE(name, start, end); \ 292 __EXC_REAL_OOL_MASKABLE(name, start, size); \
287 __TRAMP_REAL_REAL_OOL_MASKABLE(name, start); 293 __TRAMP_REAL_OOL_MASKABLE(name, start);
288 294
289#define __EXC_REAL_OOL_HV_DIRECT(name, start, end, handler) \ 295#define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \
290 EXC_REAL_BEGIN(name, start, end); \ 296 EXC_REAL_BEGIN(name, start, size); \
291 __OOL_EXCEPTION(start, label, handler); \ 297 __OOL_EXCEPTION(start, label, handler); \
292 EXC_REAL_END(name, start, end); 298 EXC_REAL_END(name, start, size);
293 299
294#define __EXC_REAL_OOL_HV(name, start, end) \ 300#define __EXC_REAL_OOL_HV(name, start, size) \
295 __EXC_REAL_OOL(name, start, end); 301 __EXC_REAL_OOL(name, start, size);
296 302
297#define __TRAMP_REAL_REAL_OOL_HV(name, vec) \ 303#define __TRAMP_REAL_OOL_HV(name, vec) \
298 TRAMP_REAL_BEGIN(tramp_real_##name); \ 304 TRAMP_REAL_BEGIN(tramp_real_##name); \
299 STD_EXCEPTION_HV_OOL(vec, name##_common); \ 305 STD_EXCEPTION_HV_OOL(vec, name##_common); \
300 306
301#define EXC_REAL_OOL_HV(name, start, end) \ 307#define EXC_REAL_OOL_HV(name, start, size) \
302 __EXC_REAL_OOL_HV(name, start, end); \ 308 __EXC_REAL_OOL_HV(name, start, size); \
303 __TRAMP_REAL_REAL_OOL_HV(name, start); 309 __TRAMP_REAL_OOL_HV(name, start);
304 310
305#define __EXC_REAL_OOL_MASKABLE_HV(name, start, end) \ 311#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
306 __EXC_REAL_OOL(name, start, end); 312 __EXC_REAL_OOL(name, start, size);
307 313
308#define __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, vec) \ 314#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \
309 TRAMP_REAL_BEGIN(tramp_real_##name); \ 315 TRAMP_REAL_BEGIN(tramp_real_##name); \
310 MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \ 316 MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \
311 317
312#define EXC_REAL_OOL_MASKABLE_HV(name, start, end) \ 318#define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
313 __EXC_REAL_OOL_MASKABLE_HV(name, start, end); \ 319 __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
314 __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, start); 320 __TRAMP_REAL_OOL_MASKABLE_HV(name, start);
315 321
316#define __EXC_VIRT_OOL(name, start, end) \ 322#define __EXC_VIRT_OOL(name, start, size) \
317 EXC_VIRT_BEGIN(name, start, end); \ 323 EXC_VIRT_BEGIN(name, start, size); \
318 __OOL_EXCEPTION(start, label, tramp_virt_##name); \ 324 __OOL_EXCEPTION(start, label, tramp_virt_##name); \
319 EXC_VIRT_END(name, start, end); 325 EXC_VIRT_END(name, start, size);
320 326
321#define __TRAMP_REAL_VIRT_OOL(name, realvec) \ 327#define __TRAMP_VIRT_OOL(name, realvec) \
322 TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 328 TRAMP_VIRT_BEGIN(tramp_virt_##name); \
323 STD_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ 329 STD_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \
324 330
325#define EXC_VIRT_OOL(name, start, end, realvec) \ 331#define EXC_VIRT_OOL(name, start, size, realvec) \
326 __EXC_VIRT_OOL(name, start, end); \ 332 __EXC_VIRT_OOL(name, start, size); \
327 __TRAMP_REAL_VIRT_OOL(name, realvec); 333 __TRAMP_VIRT_OOL(name, realvec);
328 334
329#define __EXC_VIRT_OOL_MASKABLE(name, start, end) \ 335#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
330 __EXC_VIRT_OOL(name, start, end); 336 __EXC_VIRT_OOL(name, start, size);
331 337
332#define __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec) \ 338#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \
333 TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 339 TRAMP_VIRT_BEGIN(tramp_virt_##name); \
334 MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ 340 MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \
335 341
336#define EXC_VIRT_OOL_MASKABLE(name, start, end, realvec) \ 342#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \
337 __EXC_VIRT_OOL_MASKABLE(name, start, end); \ 343 __EXC_VIRT_OOL_MASKABLE(name, start, size); \
338 __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec); 344 __TRAMP_VIRT_OOL_MASKABLE(name, realvec);
339 345
340#define __EXC_VIRT_OOL_HV(name, start, end) \ 346#define __EXC_VIRT_OOL_HV(name, start, size) \
341 __EXC_VIRT_OOL(name, start, end); 347 __EXC_VIRT_OOL(name, start, size);
342 348
343#define __TRAMP_REAL_VIRT_OOL_HV(name, realvec) \ 349#define __TRAMP_VIRT_OOL_HV(name, realvec) \
344 TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 350 TRAMP_VIRT_BEGIN(tramp_virt_##name); \
345 STD_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ 351 STD_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
346 352
347#define EXC_VIRT_OOL_HV(name, start, end, realvec) \ 353#define EXC_VIRT_OOL_HV(name, start, size, realvec) \
348 __EXC_VIRT_OOL_HV(name, start, end); \ 354 __EXC_VIRT_OOL_HV(name, start, size); \
349 __TRAMP_REAL_VIRT_OOL_HV(name, realvec); 355 __TRAMP_VIRT_OOL_HV(name, realvec);
350 356
351#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, end) \ 357#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
352 __EXC_VIRT_OOL(name, start, end); 358 __EXC_VIRT_OOL(name, start, size);
353 359
354#define __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec) \ 360#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \
355 TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 361 TRAMP_VIRT_BEGIN(tramp_virt_##name); \
356 MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ 362 MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
357 363
358#define EXC_VIRT_OOL_MASKABLE_HV(name, start, end, realvec) \ 364#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \
359 __EXC_VIRT_OOL_MASKABLE_HV(name, start, end); \ 365 __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
360 __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec); 366 __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec);
361 367
362#define TRAMP_KVM(area, n) \ 368#define TRAMP_KVM(area, n) \
363 TRAMP_KVM_BEGIN(do_kvm_##n); \ 369 TRAMP_KVM_BEGIN(do_kvm_##n); \
@@ -378,16 +384,16 @@ name:
378 TRAMP_KVM_BEGIN(do_kvm_H##n); \ 384 TRAMP_KVM_BEGIN(do_kvm_H##n); \
379 KVM_HANDLER_SKIP(area, EXC_HV, n + 0x2); \ 385 KVM_HANDLER_SKIP(area, EXC_HV, n + 0x2); \
380 386
381#define EXC_COMMON(name, realvec, hdlr) \ 387#define EXC_COMMON(name, realvec, hdlr) \
382 EXC_COMMON_BEGIN(name); \ 388 EXC_COMMON_BEGIN(name); \
383 STD_EXCEPTION_COMMON(realvec, name, hdlr); \ 389 STD_EXCEPTION_COMMON(realvec, name, hdlr); \
384 390
385#define EXC_COMMON_ASYNC(name, realvec, hdlr) \ 391#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
386 EXC_COMMON_BEGIN(name); \ 392 EXC_COMMON_BEGIN(name); \
387 STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \ 393 STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \
388 394
389#define EXC_COMMON_HV(name, realvec, hdlr) \ 395#define EXC_COMMON_HV(name, realvec, hdlr) \
390 EXC_COMMON_BEGIN(name); \ 396 EXC_COMMON_BEGIN(name); \
391 STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \ 397 STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \
392 398
393#endif /* _ASM_POWERPC_HEAD_64_H */ 399#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 77ff1ba99d1f..3cc12a86ef5d 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -276,6 +276,9 @@
276#define H_GET_MPP_X 0x314 276#define H_GET_MPP_X 0x314
277#define H_SET_MODE 0x31C 277#define H_SET_MODE 0x31C
278#define H_CLEAR_HPT 0x358 278#define H_CLEAR_HPT 0x358
279#define H_RESIZE_HPT_PREPARE 0x36C
280#define H_RESIZE_HPT_COMMIT 0x370
281#define H_REGISTER_PROC_TBL 0x37C
279#define H_SIGNAL_SYS_RESET 0x380 282#define H_SIGNAL_SYS_RESET 0x380
280#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET 283#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET
281 284
@@ -313,6 +316,16 @@
313#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 316#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
314/* >= 0 values are CPU number */ 317/* >= 0 values are CPU number */
315 318
319/* Flag values used in H_REGISTER_PROC_TBL hcall */
320#define PROC_TABLE_OP_MASK 0x18
321#define PROC_TABLE_DEREG 0x10
322#define PROC_TABLE_NEW 0x18
323#define PROC_TABLE_TYPE_MASK 0x06
324#define PROC_TABLE_HPT_SLB 0x00
325#define PROC_TABLE_HPT_PT 0x02
326#define PROC_TABLE_RADIX 0x04
327#define PROC_TABLE_GTSE 0x01
328
316#ifndef __ASSEMBLY__ 329#ifndef __ASSEMBLY__
317 330
318/** 331/**
diff --git a/arch/powerpc/include/asm/isa-bridge.h b/arch/powerpc/include/asm/isa-bridge.h
new file mode 100644
index 000000000000..a3a7c1d63a7c
--- /dev/null
+++ b/arch/powerpc/include/asm/isa-bridge.h
@@ -0,0 +1,29 @@
1#ifndef __ISA_BRIDGE_H
2#define __ISA_BRIDGE_H
3
4#ifdef CONFIG_PPC64
5
6extern void isa_bridge_find_early(struct pci_controller *hose);
7extern void isa_bridge_init_non_pci(struct device_node *np);
8
9static inline int isa_vaddr_is_ioport(void __iomem *address)
10{
11 /* Check if address hits the reserved legacy IO range */
12 unsigned long ea = (unsigned long)address;
13 return ea >= ISA_IO_BASE && ea < ISA_IO_END;
14}
15
16#else
17
18static inline int isa_vaddr_is_ioport(void __iomem *address)
19{
20 /* No specific ISA handling on ppc32 at this stage, it
21 * all goes through PCI
22 */
23 return 0;
24}
25
26#endif
27
28#endif /* __ISA_BRIDGE_H */
29
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 97b8c1f83453..d821835ade86 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -29,6 +29,7 @@
29#include <linux/types.h> 29#include <linux/types.h>
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/percpu.h> 31#include <linux/percpu.h>
32#include <linux/module.h>
32#include <asm/probes.h> 33#include <asm/probes.h>
33#include <asm/code-patching.h> 34#include <asm/code-patching.h>
34 35
@@ -39,7 +40,23 @@ struct pt_regs;
39struct kprobe; 40struct kprobe;
40 41
41typedef ppc_opcode_t kprobe_opcode_t; 42typedef ppc_opcode_t kprobe_opcode_t;
42#define MAX_INSN_SIZE 1 43
44extern kprobe_opcode_t optinsn_slot;
45
46/* Optinsn template address */
47extern kprobe_opcode_t optprobe_template_entry[];
48extern kprobe_opcode_t optprobe_template_op_address[];
49extern kprobe_opcode_t optprobe_template_call_handler[];
50extern kprobe_opcode_t optprobe_template_insn[];
51extern kprobe_opcode_t optprobe_template_call_emulate[];
52extern kprobe_opcode_t optprobe_template_ret[];
53extern kprobe_opcode_t optprobe_template_end[];
54
55/* Fixed instruction size for powerpc */
56#define MAX_INSN_SIZE 1
57#define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */
58#define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry)
59#define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */
43 60
44#ifdef PPC64_ELF_ABI_v2 61#ifdef PPC64_ELF_ABI_v2
45/* PPC64 ABIv2 needs local entry point */ 62/* PPC64 ABIv2 needs local entry point */
@@ -61,7 +78,7 @@ typedef ppc_opcode_t kprobe_opcode_t;
61#define kprobe_lookup_name(name, addr) \ 78#define kprobe_lookup_name(name, addr) \
62{ \ 79{ \
63 char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; \ 80 char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; \
64 char *modsym; \ 81 const char *modsym; \
65 bool dot_appended = false; \ 82 bool dot_appended = false; \
66 if ((modsym = strchr(name, ':')) != NULL) { \ 83 if ((modsym = strchr(name, ':')) != NULL) { \
67 modsym++; \ 84 modsym++; \
@@ -125,6 +142,12 @@ struct kprobe_ctlblk {
125 struct prev_kprobe prev_kprobe; 142 struct prev_kprobe prev_kprobe;
126}; 143};
127 144
145struct arch_optimized_insn {
146 kprobe_opcode_t copied_insn[1];
147 /* detour buffer */
148 kprobe_opcode_t *insn;
149};
150
128extern int kprobe_exceptions_notify(struct notifier_block *self, 151extern int kprobe_exceptions_notify(struct notifier_block *self,
129 unsigned long val, void *data); 152 unsigned long val, void *data);
130extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); 153extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 5cf306ae0ac3..2bf35017ffc0 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -170,6 +170,8 @@ extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
170 unsigned long status); 170 unsigned long status);
171extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, 171extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
172 unsigned long slb_v, unsigned long valid); 172 unsigned long slb_v, unsigned long valid);
173extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
174 unsigned long gpa, gva_t ea, int is_store);
173 175
174extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 176extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
175extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); 177extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
@@ -182,6 +184,25 @@ extern void kvmppc_mmu_hpte_sysexit(void);
182extern int kvmppc_mmu_hv_init(void); 184extern int kvmppc_mmu_hv_init(void);
183extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); 185extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
184 186
187extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
188 struct kvm_vcpu *vcpu,
189 unsigned long ea, unsigned long dsisr);
190extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
191 struct kvmppc_pte *gpte, bool data, bool iswrite);
192extern int kvmppc_init_vm_radix(struct kvm *kvm);
193extern void kvmppc_free_radix(struct kvm *kvm);
194extern int kvmppc_radix_init(void);
195extern void kvmppc_radix_exit(void);
196extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
197 unsigned long gfn);
198extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
199 unsigned long gfn);
200extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
201 unsigned long gfn);
202extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
203 struct kvm_memory_slot *memslot, unsigned long *map);
204extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
205
185/* XXX remove this export when load_last_inst() is generic */ 206/* XXX remove this export when load_last_inst() is generic */
186extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); 207extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
187extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); 208extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
@@ -211,8 +232,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
211extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 232extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
212 unsigned long pte_index, unsigned long avpn, 233 unsigned long pte_index, unsigned long avpn,
213 unsigned long *hpret); 234 unsigned long *hpret);
214extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, 235extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
215 struct kvm_memory_slot *memslot, unsigned long *map); 236 struct kvm_memory_slot *memslot, unsigned long *map);
237extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
238 struct kvm_memory_slot *memslot,
239 unsigned long *map);
216extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, 240extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
217 unsigned long mask); 241 unsigned long mask);
218extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); 242extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 848292176908..0db010cc4e65 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -36,6 +36,12 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
36#endif 36#endif
37 37
38#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 38#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
39
40static inline bool kvm_is_radix(struct kvm *kvm)
41{
42 return kvm->arch.radix;
43}
44
39#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 45#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
40#endif 46#endif
41 47
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e59b172666cd..b2dbeac3f450 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -263,7 +263,11 @@ struct kvm_arch {
263 unsigned long hpt_mask; 263 unsigned long hpt_mask;
264 atomic_t hpte_mod_interest; 264 atomic_t hpte_mod_interest;
265 cpumask_t need_tlb_flush; 265 cpumask_t need_tlb_flush;
266 cpumask_t cpu_in_guest;
266 int hpt_cma_alloc; 267 int hpt_cma_alloc;
268 u8 radix;
269 pgd_t *pgtable;
270 u64 process_table;
267 struct dentry *debugfs_dir; 271 struct dentry *debugfs_dir;
268 struct dentry *htab_dentry; 272 struct dentry *htab_dentry;
269#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 273#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -603,6 +607,7 @@ struct kvm_vcpu_arch {
603 ulong fault_dar; 607 ulong fault_dar;
604 u32 fault_dsisr; 608 u32 fault_dsisr;
605 unsigned long intr_msr; 609 unsigned long intr_msr;
610 ulong fault_gpa; /* guest real address of page fault (POWER9) */
606#endif 611#endif
607 612
608#ifdef CONFIG_BOOKE 613#ifdef CONFIG_BOOKE
@@ -657,6 +662,7 @@ struct kvm_vcpu_arch {
657 int state; 662 int state;
658 int ptid; 663 int ptid;
659 int thread_cpu; 664 int thread_cpu;
665 int prev_cpu;
660 bool timer_running; 666 bool timer_running;
661 wait_queue_head_t cpu_run; 667 wait_queue_head_t cpu_run;
662 668
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2da67bf1f2ec..48c760f89590 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -291,6 +291,8 @@ struct kvmppc_ops {
291 struct irq_bypass_producer *); 291 struct irq_bypass_producer *);
292 void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, 292 void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
293 struct irq_bypass_producer *); 293 struct irq_bypass_producer *);
294 int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
295 int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
294}; 296};
295 297
296extern struct kvmppc_ops *kvmppc_hv_ops; 298extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 233a7e8cc8e3..065e762fae85 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -136,6 +136,7 @@ enum {
136 MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | 136 MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
137 MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | 137 MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
138 MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | 138 MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
139 MMU_FTR_KERNEL_RO |
139#ifdef CONFIG_PPC_RADIX_MMU 140#ifdef CONFIG_PPC_RADIX_MMU
140 MMU_FTR_TYPE_RADIX | 141 MMU_FTR_TYPE_RADIX |
141#endif 142#endif
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 0e2e57bcab50..a0aa285869b5 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -167,7 +167,8 @@
167#define OPAL_INT_EOI 124 167#define OPAL_INT_EOI 124
168#define OPAL_INT_SET_MFRR 125 168#define OPAL_INT_SET_MFRR 125
169#define OPAL_PCI_TCE_KILL 126 169#define OPAL_PCI_TCE_KILL 126
170#define OPAL_LAST 126 170#define OPAL_NMMU_SET_PTCR 127
171#define OPAL_LAST 127
171 172
172/* Device tree flags */ 173/* Device tree flags */
173 174
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5c7db0f1a708..1ff03a6da76e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -67,7 +67,6 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
67int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func, 67int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
68 uint64_t offset, uint32_t data); 68 uint64_t offset, uint32_t data);
69int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority); 69int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
70int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
71int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority); 70int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
72int64_t opal_register_exception_handler(uint64_t opal_exception, 71int64_t opal_register_exception_handler(uint64_t opal_exception,
73 uint64_t handler_address, 72 uint64_t handler_address,
@@ -220,18 +219,13 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
220int64_t opal_pci_poll2(uint64_t id, uint64_t data); 219int64_t opal_pci_poll2(uint64_t id, uint64_t data);
221 220
222int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); 221int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
223int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll);
224int64_t opal_int_set_cppr(uint8_t cppr); 222int64_t opal_int_set_cppr(uint8_t cppr);
225int64_t opal_int_eoi(uint32_t xirr); 223int64_t opal_int_eoi(uint32_t xirr);
226int64_t opal_rm_int_eoi(uint32_t xirr);
227int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); 224int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
228int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
229int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, 225int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
230 uint32_t pe_num, uint32_t tce_size, 226 uint32_t pe_num, uint32_t tce_size,
231 uint64_t dma_addr, uint32_t npages); 227 uint64_t dma_addr, uint32_t npages);
232int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, 228int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
233 uint32_t pe_num, uint32_t tce_size,
234 uint64_t dma_addr, uint32_t npages);
235 229
236/* Internal functions */ 230/* Internal functions */
237extern int early_init_dt_scan_opal(unsigned long node, const char *uname, 231extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index dd5f0712afa2..3e83d2a20b6f 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -47,14 +47,14 @@ static inline void clear_page(void *addr)
47 unsigned long iterations; 47 unsigned long iterations;
48 unsigned long onex, twox, fourx, eightx; 48 unsigned long onex, twox, fourx, eightx;
49 49
50 iterations = ppc64_caches.dlines_per_page / 8; 50 iterations = ppc64_caches.l1d.blocks_per_page / 8;
51 51
52 /* 52 /*
53 * Some verisions of gcc use multiply instructions to 53 * Some verisions of gcc use multiply instructions to
54 * calculate the offsets so lets give it a hand to 54 * calculate the offsets so lets give it a hand to
55 * do better. 55 * do better.
56 */ 56 */
57 onex = ppc64_caches.dline_size; 57 onex = ppc64_caches.l1d.block_size;
58 twox = onex << 1; 58 twox = onex << 1;
59 fourx = onex << 2; 59 fourx = onex << 2;
60 eightx = onex << 3; 60 eightx = onex << 3;
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index c0309c59bed8..56c67d3f0108 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -174,14 +174,6 @@ extern int pci_device_from_OF_node(struct device_node *node,
174 u8 *bus, u8 *devfn); 174 u8 *bus, u8 *devfn);
175extern void pci_create_OF_bus_map(void); 175extern void pci_create_OF_bus_map(void);
176 176
177static inline int isa_vaddr_is_ioport(void __iomem *address)
178{
179 /* No specific ISA handling on ppc32 at this stage, it
180 * all goes through PCI
181 */
182 return 0;
183}
184
185#else /* CONFIG_PPC64 */ 177#else /* CONFIG_PPC64 */
186 178
187/* 179/*
@@ -269,16 +261,6 @@ extern void pci_hp_remove_devices(struct pci_bus *bus);
269/** Discover new pci devices under this bus, and add them */ 261/** Discover new pci devices under this bus, and add them */
270extern void pci_hp_add_devices(struct pci_bus *bus); 262extern void pci_hp_add_devices(struct pci_bus *bus);
271 263
272
273extern void isa_bridge_find_early(struct pci_controller *hose);
274
275static inline int isa_vaddr_is_ioport(void __iomem *address)
276{
277 /* Check if address hits the reserved legacy IO range */
278 unsigned long ea = (unsigned long)address;
279 return ea >= ISA_IO_BASE && ea < ISA_IO_END;
280}
281
282extern int pcibios_unmap_io_space(struct pci_bus *bus); 264extern int pcibios_unmap_io_space(struct pci_bus *bus);
283extern int pcibios_map_io_space(struct pci_bus *bus); 265extern int pcibios_map_io_space(struct pci_bus *bus);
284 266
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 0bcc75e295e3..c7b164836bc3 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -210,6 +210,18 @@ static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
210 return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn); 210 return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn);
211} 211}
212 212
213static inline long plpar_resize_hpt_prepare(unsigned long flags,
214 unsigned long shift)
215{
216 return plpar_hcall_norets(H_RESIZE_HPT_PREPARE, flags, shift);
217}
218
219static inline long plpar_resize_hpt_commit(unsigned long flags,
220 unsigned long shift)
221{
222 return plpar_hcall_norets(H_RESIZE_HPT_COMMIT, flags, shift);
223}
224
213static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba, 225static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba,
214 unsigned long *tce_ret) 226 unsigned long *tce_ret)
215{ 227{
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
new file mode 100644
index 000000000000..0e9c2402dd20
--- /dev/null
+++ b/arch/powerpc/include/asm/powernv.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright 2017 IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#ifndef _ASM_POWERNV_H
11#define _ASM_POWERNV_H
12
13#ifdef CONFIG_PPC_POWERNV
14extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
15#else
16static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
17#endif
18
19#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index c4ced1d01d57..d99bd442aacb 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -306,6 +306,7 @@
306#define __PPC_WC(w) (((w) & 0x3) << 21) 306#define __PPC_WC(w) (((w) & 0x3) << 21)
307#define __PPC_WS(w) (((w) & 0x1f) << 11) 307#define __PPC_WS(w) (((w) & 0x1f) << 11)
308#define __PPC_SH(s) __PPC_WS(s) 308#define __PPC_SH(s) __PPC_WS(s)
309#define __PPC_SH64(s) (__PPC_SH(s) | (((s) & 0x20) >> 4))
309#define __PPC_MB(s) (((s) & 0x1f) << 6) 310#define __PPC_MB(s) (((s) & 0x1f) << 6)
310#define __PPC_ME(s) (((s) & 0x1f) << 1) 311#define __PPC_ME(s) (((s) & 0x1f) << 1)
311#define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20)) 312#define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20))
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 1ba814436c73..21e0b52685b5 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -454,7 +454,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */
454extern unsigned long power7_nap(int check_irq); 454extern unsigned long power7_nap(int check_irq);
455extern unsigned long power7_sleep(void); 455extern unsigned long power7_sleep(void);
456extern unsigned long power7_winkle(void); 456extern unsigned long power7_winkle(void);
457extern unsigned long power9_idle_stop(unsigned long stop_level); 457extern unsigned long power9_idle_stop(unsigned long stop_psscr_val,
458 unsigned long stop_psscr_mask);
458 459
459extern void flush_instruction_cache(void); 460extern void flush_instruction_cache(void);
460extern void hard_reset_now(void); 461extern void hard_reset_now(void);
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 5e57705b4759..2c8001cc93b6 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -121,6 +121,8 @@ struct of_drconf_cell {
121#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ 121#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */
122#define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ 122#define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */
123 123
124#define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */
125
124/* Option vector 2: Open Firmware options supported */ 126/* Option vector 2: Open Firmware options supported */
125#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ 127#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */
126 128
@@ -151,10 +153,18 @@ struct of_drconf_cell {
151#define OV5_XCMO 0x0440 /* Page Coalescing */ 153#define OV5_XCMO 0x0440 /* Page Coalescing */
152#define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ 154#define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */
153#define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ 155#define OV5_PRRN 0x0540 /* Platform Resource Reassignment */
154#define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */ 156#define OV5_RESIZE_HPT 0x0601 /* Hash Page Table resizing */
155#define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */ 157#define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */
156#define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */ 158#define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */
157#define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */ 159#define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */
160#define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */
161#define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */
162#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */
163#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */
164#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */
165#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */
166#define OV5_MMU_SLB 0x1800 /* always use SLB */
167#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */
158 168
159/* Option Vector 6: IBM PAPR hints */ 169/* Option Vector 6: IBM PAPR hints */
160#define OV6_LINUX 0x02 /* Linux is our OS */ 170#define OV6_LINUX 0x02 /* Linux is our OS */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index dff79798903d..cb02d32db147 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -274,10 +274,14 @@
274#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ 274#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
275#define DSISR_NOHPTE 0x40000000 /* no translation found */ 275#define DSISR_NOHPTE 0x40000000 /* no translation found */
276#define DSISR_PROTFAULT 0x08000000 /* protection fault */ 276#define DSISR_PROTFAULT 0x08000000 /* protection fault */
277#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */
277#define DSISR_ISSTORE 0x02000000 /* access was a store */ 278#define DSISR_ISSTORE 0x02000000 /* access was a store */
278#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ 279#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
279#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ 280#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */
280#define DSISR_KEYFAULT 0x00200000 /* Key fault */ 281#define DSISR_KEYFAULT 0x00200000 /* Key fault */
282#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */
283#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */
284#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */
281#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ 285#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
282#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ 286#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
283#define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ 287#define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */
@@ -338,7 +342,7 @@
338#define LPCR_DPFD_SH 52 342#define LPCR_DPFD_SH 52
339#define LPCR_DPFD (ASM_CONST(7) << LPCR_DPFD_SH) 343#define LPCR_DPFD (ASM_CONST(7) << LPCR_DPFD_SH)
340#define LPCR_VRMASD_SH 47 344#define LPCR_VRMASD_SH 47
341#define LPCR_VRMASD (ASM_CONST(1) << LPCR_VRMASD_SH) 345#define LPCR_VRMASD (ASM_CONST(0x1f) << LPCR_VRMASD_SH)
342#define LPCR_VRMA_L ASM_CONST(0x0008000000000000) 346#define LPCR_VRMA_L ASM_CONST(0x0008000000000000)
343#define LPCR_VRMA_LP0 ASM_CONST(0x0001000000000000) 347#define LPCR_VRMA_LP0 ASM_CONST(0x0001000000000000)
344#define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000) 348#define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9c23baa10b81..076b89247ab5 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -318,6 +318,7 @@ struct pseries_hp_errorlog {
318 318
319#define PSERIES_HP_ELOG_ACTION_ADD 1 319#define PSERIES_HP_ELOG_ACTION_ADD 1
320#define PSERIES_HP_ELOG_ACTION_REMOVE 2 320#define PSERIES_HP_ELOG_ACTION_REMOVE 2
321#define PSERIES_HP_ELOG_ACTION_READD 3
321 322
322#define PSERIES_HP_ELOG_ID_DRC_NAME 1 323#define PSERIES_HP_ELOG_ID_DRC_NAME 1
323#define PSERIES_HP_ELOG_ID_DRC_INDEX 2 324#define PSERIES_HP_ELOG_ID_DRC_INDEX 2
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index f6fc0ee813d7..c88930c9db7f 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -18,6 +18,13 @@
18#ifdef CONFIG_MEMORY_HOTPLUG 18#ifdef CONFIG_MEMORY_HOTPLUG
19extern int create_section_mapping(unsigned long start, unsigned long end); 19extern int create_section_mapping(unsigned long start, unsigned long end);
20extern int remove_section_mapping(unsigned long start, unsigned long end); 20extern int remove_section_mapping(unsigned long start, unsigned long end);
21
22#ifdef CONFIG_PPC_BOOK3S_64
23extern void resize_hpt_for_hotplug(unsigned long new_mem_size);
24#else
25static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { }
26#endif
27
21#ifdef CONFIG_NUMA 28#ifdef CONFIG_NUMA
22extern int hot_add_scn_to_nid(unsigned long scn_addr); 29extern int hot_add_scn_to_nid(unsigned long scn_addr);
23#else 30#else
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a15d84d59356..0e6add3187bc 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -261,7 +261,7 @@ do { \
261({ \ 261({ \
262 long __gu_err; \ 262 long __gu_err; \
263 unsigned long __gu_val; \ 263 unsigned long __gu_val; \
264 __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 264 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
265 __chk_user_ptr(ptr); \ 265 __chk_user_ptr(ptr); \
266 if (!is_kernel_addr((unsigned long)__gu_addr)) \ 266 if (!is_kernel_addr((unsigned long)__gu_addr)) \
267 might_fault(); \ 267 might_fault(); \
@@ -274,7 +274,7 @@ do { \
274({ \ 274({ \
275 long __gu_err = -EFAULT; \ 275 long __gu_err = -EFAULT; \
276 unsigned long __gu_val = 0; \ 276 unsigned long __gu_val = 0; \
277 __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 277 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
278 might_fault(); \ 278 might_fault(); \
279 if (access_ok(VERIFY_READ, __gu_addr, (size))) \ 279 if (access_ok(VERIFY_READ, __gu_addr, (size))) \
280 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ 280 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
@@ -286,7 +286,7 @@ do { \
286({ \ 286({ \
287 long __gu_err; \ 287 long __gu_err; \
288 unsigned long __gu_val; \ 288 unsigned long __gu_val; \
289 __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 289 const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
290 __chk_user_ptr(ptr); \ 290 __chk_user_ptr(ptr); \
291 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ 291 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
292 (x) = (__force __typeof__(*(ptr)))__gu_val; \ 292 (x) = (__force __typeof__(*(ptr)))__gu_val; \
diff --git a/arch/powerpc/include/uapi/asm/auxvec.h b/arch/powerpc/include/uapi/asm/auxvec.h
index ce17d2c9eb4e..be6e94ecec42 100644
--- a/arch/powerpc/include/uapi/asm/auxvec.h
+++ b/arch/powerpc/include/uapi/asm/auxvec.h
@@ -16,6 +16,37 @@
16 */ 16 */
17#define AT_SYSINFO_EHDR 33 17#define AT_SYSINFO_EHDR 33
18 18
19#define AT_VECTOR_SIZE_ARCH 6 /* entries in ARCH_DLINFO */ 19/*
20 * AT_*CACHEBSIZE above represent the cache *block* size which is
21 * the size that is affected by the cache management instructions.
22 *
23 * It doesn't nececssarily matches the cache *line* size which is
24 * more of a performance tuning hint. Additionally the latter can
25 * be different for the different cache levels.
26 *
27 * The set of entries below represent more extensive information
28 * about the caches, in the form of two entry per cache type,
29 * one entry containing the cache size in bytes, and the other
30 * containing the cache line size in bytes in the bottom 16 bits
31 * and the cache associativity in the next 16 bits.
32 *
33 * The associativity is such that if N is the 16-bit value, the
34 * cache is N way set associative. A value if 0xffff means fully
35 * associative, a value of 1 means directly mapped.
36 *
37 * For all these fields, a value of 0 means that the information
38 * is not known.
39 */
40
41#define AT_L1I_CACHESIZE 40
42#define AT_L1I_CACHEGEOMETRY 41
43#define AT_L1D_CACHESIZE 42
44#define AT_L1D_CACHEGEOMETRY 43
45#define AT_L2_CACHESIZE 44
46#define AT_L2_CACHEGEOMETRY 45
47#define AT_L3_CACHESIZE 46
48#define AT_L3_CACHEGEOMETRY 47
49
50#define AT_VECTOR_SIZE_ARCH 14 /* entries in ARCH_DLINFO */
20 51
21#endif 52#endif
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 3a9e44c45c78..b2c6fdd5ac30 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -162,29 +162,6 @@ typedef elf_vrreg_t elf_vrregset_t32[ELF_NVRREG32];
162typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; 162typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG];
163#endif 163#endif
164 164
165
166/*
167 * The requirements here are:
168 * - keep the final alignment of sp (sp & 0xf)
169 * - make sure the 32-bit value at the first 16 byte aligned position of
170 * AUXV is greater than 16 for glibc compatibility.
171 * AT_IGNOREPPC is used for that.
172 * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
173 * even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
174 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes
175 */
176#define ARCH_DLINFO \
177do { \
178 /* Handle glibc compatibility. */ \
179 NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
180 NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
181 /* Cache size items */ \
182 NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \
183 NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \
184 NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \
185 VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \
186} while (0)
187
188/* PowerPC64 relocations defined by the ABIs */ 165/* PowerPC64 relocations defined by the ABIs */
189#define R_PPC64_NONE R_PPC_NONE 166#define R_PPC64_NONE R_PPC_NONE
190#define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address. */ 167#define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address. */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 3603b6f51b11..cc0908b6c2a0 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
413 __u16 n_invalid; 413 __u16 n_invalid;
414}; 414};
415 415
416/* For KVM_PPC_CONFIGURE_V3_MMU */
417struct kvm_ppc_mmuv3_cfg {
418 __u64 flags;
419 __u64 process_table; /* second doubleword of partition table entry */
420};
421
422/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
423#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
424#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
425
426/* For KVM_PPC_GET_RMMU_INFO */
427struct kvm_ppc_rmmu_info {
428 struct kvm_ppc_radix_geom {
429 __u8 page_shift;
430 __u8 level_bits[4];
431 __u8 pad[3];
432 } geometries[8];
433 __u32 ap_encodings[8];
434};
435
416/* Per-vcpu XICS interrupt controller state */ 436/* Per-vcpu XICS interrupt controller state */
417#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) 437#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
418 438
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f4c2b52e58b3..811f441a125f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -15,7 +15,7 @@ CFLAGS_btext.o += -fPIC
15endif 15endif
16 16
17CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 17CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
18CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 18CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
19CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 19CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
20CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 20CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
21 21
@@ -96,6 +96,7 @@ obj-$(CONFIG_KGDB) += kgdb.o
96obj-$(CONFIG_BOOTX_TEXT) += btext.o 96obj-$(CONFIG_BOOTX_TEXT) += btext.o
97obj-$(CONFIG_SMP) += smp.o 97obj-$(CONFIG_SMP) += smp.o
98obj-$(CONFIG_KPROBES) += kprobes.o 98obj-$(CONFIG_KPROBES) += kprobes.o
99obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
99obj-$(CONFIG_UPROBES) += uprobes.o 100obj-$(CONFIG_UPROBES) += uprobes.o
100obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o 101obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
101obj-$(CONFIG_STACKTRACE) += stacktrace.o 102obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 8d58c61908f7..cbc7c42cdb74 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -204,7 +204,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
204 int i, size; 204 int i, size;
205 205
206#ifdef __powerpc64__ 206#ifdef __powerpc64__
207 size = ppc64_caches.dline_size; 207 size = ppc64_caches.l1d.block_size;
208#else 208#else
209 size = L1_CACHE_BYTES; 209 size = L1_CACHE_BYTES;
210#endif 210#endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 9e8e771f8acb..f25239b3a06f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -160,12 +160,12 @@ int main(void)
160 DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); 160 DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
161 161
162#ifdef CONFIG_PPC64 162#ifdef CONFIG_PPC64
163 DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); 163 DEFINE(DCACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1d.block_size));
164 DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); 164 DEFINE(DCACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1d.log_block_size));
165 DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); 165 DEFINE(DCACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1d.blocks_per_page));
166 DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); 166 DEFINE(ICACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1i.block_size));
167 DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); 167 DEFINE(ICACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1i.log_block_size));
168 DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); 168 DEFINE(ICACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1i.blocks_per_page));
169 /* paca */ 169 /* paca */
170 DEFINE(PACA_SIZE, sizeof(struct paca_struct)); 170 DEFINE(PACA_SIZE, sizeof(struct paca_struct));
171 DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); 171 DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
@@ -495,6 +495,7 @@ int main(void)
495 DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); 495 DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
496 DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); 496 DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
497 DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); 497 DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
498 DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix));
498 DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); 499 DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
499 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); 500 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
500 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 501 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
@@ -534,6 +535,7 @@ int main(void)
534 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); 535 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
535 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); 536 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
536 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); 537 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
538 DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa));
537 DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); 539 DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
538 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 540 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
539 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); 541 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index d39d6118c6e9..857bf7c5b946 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -93,7 +93,7 @@ USE_FIXED_SECTION(real_vectors)
93__start_interrupts: 93__start_interrupts:
94 94
95/* No virt vectors corresponding with 0x0..0x100 */ 95/* No virt vectors corresponding with 0x0..0x100 */
96EXC_VIRT_NONE(0x4000, 0x4100) 96EXC_VIRT_NONE(0x4000, 0x100)
97 97
98 98
99#ifdef CONFIG_PPC_P7_NAP 99#ifdef CONFIG_PPC_P7_NAP
@@ -114,15 +114,15 @@ EXC_VIRT_NONE(0x4000, 0x4100)
114#define IDLETEST NOTEST 114#define IDLETEST NOTEST
115#endif 115#endif
116 116
117EXC_REAL_BEGIN(system_reset, 0x100, 0x200) 117EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
118 SET_SCRATCH0(r13) 118 SET_SCRATCH0(r13)
119 GET_PACA(r13) 119 GET_PACA(r13)
120 clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ 120 clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
121 EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, 121 EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
122 IDLETEST, 0x100) 122 IDLETEST, 0x100)
123 123
124EXC_REAL_END(system_reset, 0x100, 0x200) 124EXC_REAL_END(system_reset, 0x100, 0x100)
125EXC_VIRT_NONE(0x4100, 0x4200) 125EXC_VIRT_NONE(0x4100, 0x100)
126 126
127#ifdef CONFIG_PPC_P7_NAP 127#ifdef CONFIG_PPC_P7_NAP
128EXC_COMMON_BEGIN(system_reset_idle_common) 128EXC_COMMON_BEGIN(system_reset_idle_common)
@@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
142 lbz r0,HSTATE_HWTHREAD_REQ(r13) 142 lbz r0,HSTATE_HWTHREAD_REQ(r13)
143 cmpwi r0,0 143 cmpwi r0,0
144 beq 1f 144 beq 1f
145 b kvm_start_guest 145 BRANCH_TO_KVM(r10, kvm_start_guest)
1461: 1461:
147#endif 147#endif
148 148
@@ -166,7 +166,7 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi)
166#endif /* CONFIG_PPC_PSERIES */ 166#endif /* CONFIG_PPC_PSERIES */
167 167
168 168
169EXC_REAL_BEGIN(machine_check, 0x200, 0x300) 169EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
170 /* This is moved out of line as it can be patched by FW, but 170 /* This is moved out of line as it can be patched by FW, but
171 * some code path might still want to branch into the original 171 * some code path might still want to branch into the original
172 * vector 172 * vector
@@ -186,8 +186,8 @@ BEGIN_FTR_SECTION
186FTR_SECTION_ELSE 186FTR_SECTION_ELSE
187 b machine_check_pSeries_0 187 b machine_check_pSeries_0
188ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 188ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
189EXC_REAL_END(machine_check, 0x200, 0x300) 189EXC_REAL_END(machine_check, 0x200, 0x100)
190EXC_VIRT_NONE(0x4200, 0x4300) 190EXC_VIRT_NONE(0x4200, 0x100)
191TRAMP_REAL_BEGIN(machine_check_powernv_early) 191TRAMP_REAL_BEGIN(machine_check_powernv_early)
192BEGIN_FTR_SECTION 192BEGIN_FTR_SECTION
193 EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) 193 EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
@@ -381,12 +381,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
381 lbz r3,PACA_THREAD_IDLE_STATE(r13) 381 lbz r3,PACA_THREAD_IDLE_STATE(r13)
382 cmpwi r3,PNV_THREAD_NAP 382 cmpwi r3,PNV_THREAD_NAP
383 bgt 10f 383 bgt 10f
384 IDLE_STATE_ENTER_SEQ(PPC_NAP) 384 IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
385 /* No return */ 385 /* No return */
38610: 38610:
387 cmpwi r3,PNV_THREAD_SLEEP 387 cmpwi r3,PNV_THREAD_SLEEP
388 bgt 2f 388 bgt 2f
389 IDLE_STATE_ENTER_SEQ(PPC_SLEEP) 389 IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
390 /* No return */ 390 /* No return */
391 391
3922: 3922:
@@ -400,7 +400,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
400 */ 400 */
401 ori r13,r13,1 401 ori r13,r13,1
402 SET_PACA(r13) 402 SET_PACA(r13)
403 IDLE_STATE_ENTER_SEQ(PPC_WINKLE) 403 IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
404 /* No return */ 404 /* No return */
4054: 4054:
406#endif 406#endif
@@ -483,8 +483,8 @@ EXC_COMMON_BEGIN(unrecover_mce)
483 b 1b 483 b 1b
484 484
485 485
486EXC_REAL(data_access, 0x300, 0x380) 486EXC_REAL(data_access, 0x300, 0x80)
487EXC_VIRT(data_access, 0x4300, 0x4380, 0x300) 487EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
488TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) 488TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
489 489
490EXC_COMMON_BEGIN(data_access_common) 490EXC_COMMON_BEGIN(data_access_common)
@@ -512,7 +512,7 @@ MMU_FTR_SECTION_ELSE
512ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 512ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
513 513
514 514
515EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) 515EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
516 SET_SCRATCH0(r13) 516 SET_SCRATCH0(r13)
517 EXCEPTION_PROLOG_0(PACA_EXSLB) 517 EXCEPTION_PROLOG_0(PACA_EXSLB)
518 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) 518 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
@@ -533,9 +533,9 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400)
533 mtctr r10 533 mtctr r10
534 bctr 534 bctr
535#endif 535#endif
536EXC_REAL_END(data_access_slb, 0x380, 0x400) 536EXC_REAL_END(data_access_slb, 0x380, 0x80)
537 537
538EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) 538EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
539 SET_SCRATCH0(r13) 539 SET_SCRATCH0(r13)
540 EXCEPTION_PROLOG_0(PACA_EXSLB) 540 EXCEPTION_PROLOG_0(PACA_EXSLB)
541 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) 541 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
@@ -556,12 +556,12 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400)
556 mtctr r10 556 mtctr r10
557 bctr 557 bctr
558#endif 558#endif
559EXC_VIRT_END(data_access_slb, 0x4380, 0x4400) 559EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
560TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) 560TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
561 561
562 562
563EXC_REAL(instruction_access, 0x400, 0x480) 563EXC_REAL(instruction_access, 0x400, 0x80)
564EXC_VIRT(instruction_access, 0x4400, 0x4480, 0x400) 564EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
565TRAMP_KVM(PACA_EXGEN, 0x400) 565TRAMP_KVM(PACA_EXGEN, 0x400)
566 566
567EXC_COMMON_BEGIN(instruction_access_common) 567EXC_COMMON_BEGIN(instruction_access_common)
@@ -580,7 +580,7 @@ MMU_FTR_SECTION_ELSE
580ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 580ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
581 581
582 582
583EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) 583EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
584 SET_SCRATCH0(r13) 584 SET_SCRATCH0(r13)
585 EXCEPTION_PROLOG_0(PACA_EXSLB) 585 EXCEPTION_PROLOG_0(PACA_EXSLB)
586 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) 586 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
@@ -596,9 +596,9 @@ EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500)
596 mtctr r10 596 mtctr r10
597 bctr 597 bctr
598#endif 598#endif
599EXC_REAL_END(instruction_access_slb, 0x480, 0x500) 599EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
600 600
601EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) 601EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
602 SET_SCRATCH0(r13) 602 SET_SCRATCH0(r13)
603 EXCEPTION_PROLOG_0(PACA_EXSLB) 603 EXCEPTION_PROLOG_0(PACA_EXSLB)
604 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) 604 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
@@ -614,7 +614,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500)
614 mtctr r10 614 mtctr r10
615 bctr 615 bctr
616#endif 616#endif
617EXC_VIRT_END(instruction_access_slb, 0x4480, 0x4500) 617EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
618TRAMP_KVM(PACA_EXSLB, 0x480) 618TRAMP_KVM(PACA_EXSLB, 0x480)
619 619
620 620
@@ -711,23 +711,19 @@ EXC_COMMON_BEGIN(bad_addr_slb)
711 bl slb_miss_bad_addr 711 bl slb_miss_bad_addr
712 b ret_from_except 712 b ret_from_except
713 713
714EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x600) 714EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
715 .globl hardware_interrupt_hv; 715 .globl hardware_interrupt_hv;
716hardware_interrupt_hv: 716hardware_interrupt_hv:
717 BEGIN_FTR_SECTION 717 BEGIN_FTR_SECTION
718 _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, 718 _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
719 EXC_HV, SOFTEN_TEST_HV) 719 EXC_HV, SOFTEN_TEST_HV)
720do_kvm_H0x500:
721 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
722 FTR_SECTION_ELSE 720 FTR_SECTION_ELSE
723 _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, 721 _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
724 EXC_STD, SOFTEN_TEST_PR) 722 EXC_STD, SOFTEN_TEST_PR)
725do_kvm_0x500:
726 KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
727 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) 723 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
728EXC_REAL_END(hardware_interrupt, 0x500, 0x600) 724EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
729 725
730EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600) 726EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
731 .globl hardware_interrupt_relon_hv; 727 .globl hardware_interrupt_relon_hv;
732hardware_interrupt_relon_hv: 728hardware_interrupt_relon_hv:
733 BEGIN_FTR_SECTION 729 BEGIN_FTR_SECTION
@@ -735,13 +731,15 @@ hardware_interrupt_relon_hv:
735 FTR_SECTION_ELSE 731 FTR_SECTION_ELSE
736 _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) 732 _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
737 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 733 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
738EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) 734EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
739 735
736TRAMP_KVM(PACA_EXGEN, 0x500)
737TRAMP_KVM_HV(PACA_EXGEN, 0x500)
740EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) 738EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
741 739
742 740
743EXC_REAL(alignment, 0x600, 0x700) 741EXC_REAL(alignment, 0x600, 0x100)
744EXC_VIRT(alignment, 0x4600, 0x4700, 0x600) 742EXC_VIRT(alignment, 0x4600, 0x100, 0x600)
745TRAMP_KVM(PACA_EXGEN, 0x600) 743TRAMP_KVM(PACA_EXGEN, 0x600)
746EXC_COMMON_BEGIN(alignment_common) 744EXC_COMMON_BEGIN(alignment_common)
747 mfspr r10,SPRN_DAR 745 mfspr r10,SPRN_DAR
@@ -760,8 +758,8 @@ EXC_COMMON_BEGIN(alignment_common)
760 b ret_from_except 758 b ret_from_except
761 759
762 760
763EXC_REAL(program_check, 0x700, 0x800) 761EXC_REAL(program_check, 0x700, 0x100)
764EXC_VIRT(program_check, 0x4700, 0x4800, 0x700) 762EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
765TRAMP_KVM(PACA_EXGEN, 0x700) 763TRAMP_KVM(PACA_EXGEN, 0x700)
766EXC_COMMON_BEGIN(program_check_common) 764EXC_COMMON_BEGIN(program_check_common)
767 EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) 765 EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
@@ -772,8 +770,8 @@ EXC_COMMON_BEGIN(program_check_common)
772 b ret_from_except 770 b ret_from_except
773 771
774 772
775EXC_REAL(fp_unavailable, 0x800, 0x900) 773EXC_REAL(fp_unavailable, 0x800, 0x100)
776EXC_VIRT(fp_unavailable, 0x4800, 0x4900, 0x800) 774EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800)
777TRAMP_KVM(PACA_EXGEN, 0x800) 775TRAMP_KVM(PACA_EXGEN, 0x800)
778EXC_COMMON_BEGIN(fp_unavailable_common) 776EXC_COMMON_BEGIN(fp_unavailable_common)
779 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) 777 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
@@ -805,20 +803,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
805#endif 803#endif
806 804
807 805
808EXC_REAL_MASKABLE(decrementer, 0x900, 0x980) 806EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
809EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900) 807EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
810TRAMP_KVM(PACA_EXGEN, 0x900) 808TRAMP_KVM(PACA_EXGEN, 0x900)
811EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) 809EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
812 810
813 811
814EXC_REAL_HV(hdecrementer, 0x980, 0xa00) 812EXC_REAL_HV(hdecrementer, 0x980, 0x80)
815EXC_VIRT_HV(hdecrementer, 0x4980, 0x4a00, 0x980) 813EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980)
816TRAMP_KVM_HV(PACA_EXGEN, 0x980) 814TRAMP_KVM_HV(PACA_EXGEN, 0x980)
817EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) 815EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
818 816
819 817
820EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0xb00) 818EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100)
821EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x4b00, 0xa00) 819EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00)
822TRAMP_KVM(PACA_EXGEN, 0xa00) 820TRAMP_KVM(PACA_EXGEN, 0xa00)
823#ifdef CONFIG_PPC_DOORBELL 821#ifdef CONFIG_PPC_DOORBELL
824EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) 822EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
@@ -827,11 +825,36 @@ EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
827#endif 825#endif
828 826
829 827
830EXC_REAL(trap_0b, 0xb00, 0xc00) 828EXC_REAL(trap_0b, 0xb00, 0x100)
831EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) 829EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
832TRAMP_KVM(PACA_EXGEN, 0xb00) 830TRAMP_KVM(PACA_EXGEN, 0xb00)
833EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) 831EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
834 832
833#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
834 /*
835 * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
836 * that support it) before changing to HMT_MEDIUM. That allows the KVM
837 * code to save that value into the guest state (it is the guest's PPR
838 * value). Otherwise just change to HMT_MEDIUM as userspace has
839 * already saved the PPR.
840 */
841#define SYSCALL_KVMTEST \
842 SET_SCRATCH0(r13); \
843 GET_PACA(r13); \
844 std r9,PACA_EXGEN+EX_R9(r13); \
845 OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
846 HMT_MEDIUM; \
847 std r10,PACA_EXGEN+EX_R10(r13); \
848 OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \
849 mfcr r9; \
850 KVMTEST_PR(0xc00); \
851 GET_SCRATCH0(r13)
852
853#else
854#define SYSCALL_KVMTEST \
855 HMT_MEDIUM
856#endif
857
835#define LOAD_SYSCALL_HANDLER(reg) \ 858#define LOAD_SYSCALL_HANDLER(reg) \
836 __LOAD_HANDLER(reg, system_call_common) 859 __LOAD_HANDLER(reg, system_call_common)
837 860
@@ -884,50 +907,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
884 b system_call_common ; 907 b system_call_common ;
885#endif 908#endif
886 909
887EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) 910EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
888 /* 911 SYSCALL_KVMTEST
889 * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
890 * that support it) before changing to HMT_MEDIUM. That allows the KVM
891 * code to save that value into the guest state (it is the guest's PPR
892 * value). Otherwise just change to HMT_MEDIUM as userspace has
893 * already saved the PPR.
894 */
895#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
896 SET_SCRATCH0(r13)
897 GET_PACA(r13)
898 std r9,PACA_EXGEN+EX_R9(r13)
899 OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);
900 HMT_MEDIUM;
901 std r10,PACA_EXGEN+EX_R10(r13)
902 OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR);
903 mfcr r9
904 KVMTEST_PR(0xc00)
905 GET_SCRATCH0(r13)
906#else
907 HMT_MEDIUM;
908#endif
909 SYSCALL_PSERIES_1 912 SYSCALL_PSERIES_1
910 SYSCALL_PSERIES_2_RFID 913 SYSCALL_PSERIES_2_RFID
911 SYSCALL_PSERIES_3 914 SYSCALL_PSERIES_3
912EXC_REAL_END(system_call, 0xc00, 0xd00) 915EXC_REAL_END(system_call, 0xc00, 0x100)
913 916
914EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) 917EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
915 HMT_MEDIUM 918 SYSCALL_KVMTEST
916 SYSCALL_PSERIES_1 919 SYSCALL_PSERIES_1
917 SYSCALL_PSERIES_2_DIRECT 920 SYSCALL_PSERIES_2_DIRECT
918 SYSCALL_PSERIES_3 921 SYSCALL_PSERIES_3
919EXC_VIRT_END(system_call, 0x4c00, 0x4d00) 922EXC_VIRT_END(system_call, 0x4c00, 0x100)
920 923
921TRAMP_KVM(PACA_EXGEN, 0xc00) 924TRAMP_KVM(PACA_EXGEN, 0xc00)
922 925
923 926
924EXC_REAL(single_step, 0xd00, 0xe00) 927EXC_REAL(single_step, 0xd00, 0x100)
925EXC_VIRT(single_step, 0x4d00, 0x4e00, 0xd00) 928EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00)
926TRAMP_KVM(PACA_EXGEN, 0xd00) 929TRAMP_KVM(PACA_EXGEN, 0xd00)
927EXC_COMMON(single_step_common, 0xd00, single_step_exception) 930EXC_COMMON(single_step_common, 0xd00, single_step_exception)
928 931
929EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) 932EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20)
930EXC_VIRT_NONE(0x4e00, 0x4e20) 933EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00)
931TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) 934TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
932EXC_COMMON_BEGIN(h_data_storage_common) 935EXC_COMMON_BEGIN(h_data_storage_common)
933 mfspr r10,SPRN_HDAR 936 mfspr r10,SPRN_HDAR
@@ -942,14 +945,14 @@ EXC_COMMON_BEGIN(h_data_storage_common)
942 b ret_from_except 945 b ret_from_except
943 946
944 947
945EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) 948EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
946EXC_VIRT_NONE(0x4e20, 0x4e40) 949EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20)
947TRAMP_KVM_HV(PACA_EXGEN, 0xe20) 950TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
948EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) 951EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
949 952
950 953
951EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0xe60) 954EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20)
952EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x4e60, 0xe40) 955EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40)
953TRAMP_KVM_HV(PACA_EXGEN, 0xe40) 956TRAMP_KVM_HV(PACA_EXGEN, 0xe40)
954EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) 957EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
955 958
@@ -959,9 +962,9 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
959 * first, and then eventaully from there to the trampoline to get into virtual 962 * first, and then eventaully from there to the trampoline to get into virtual
960 * mode. 963 * mode.
961 */ 964 */
962__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0xe80, hmi_exception_early) 965__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
963__TRAMP_REAL_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) 966__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60)
964EXC_VIRT_NONE(0x4e60, 0x4e80) 967EXC_VIRT_NONE(0x4e60, 0x20)
965TRAMP_KVM_HV(PACA_EXGEN, 0xe60) 968TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
966TRAMP_REAL_BEGIN(hmi_exception_early) 969TRAMP_REAL_BEGIN(hmi_exception_early)
967 EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) 970 EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60)
@@ -979,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
979 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) 982 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
980 EXCEPTION_PROLOG_COMMON_3(0xe60) 983 EXCEPTION_PROLOG_COMMON_3(0xe60)
981 addi r3,r1,STACK_FRAME_OVERHEAD 984 addi r3,r1,STACK_FRAME_OVERHEAD
982 bl hmi_exception_realmode 985 BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode)
983 /* Windup the stack. */ 986 /* Windup the stack. */
984 /* Move original HSRR0 and HSRR1 into the respective regs */ 987 /* Move original HSRR0 and HSRR1 into the respective regs */
985 ld r9,_MSR(r1) 988 ld r9,_MSR(r1)
@@ -1015,8 +1018,8 @@ hmi_exception_after_realmode:
1015EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) 1018EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception)
1016 1019
1017 1020
1018EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0xea0) 1021EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
1019EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x4ea0, 0xe80) 1022EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
1020TRAMP_KVM_HV(PACA_EXGEN, 0xe80) 1023TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
1021#ifdef CONFIG_PPC_DOORBELL 1024#ifdef CONFIG_PPC_DOORBELL
1022EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) 1025EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
@@ -1025,24 +1028,26 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
1025#endif 1028#endif
1026 1029
1027 1030
1028EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0xec0) 1031EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20)
1029EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x4ec0, 0xea0) 1032EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0)
1030TRAMP_KVM_HV(PACA_EXGEN, 0xea0) 1033TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
1031EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) 1034EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
1032 1035
1033 1036
1034EXC_REAL_NONE(0xec0, 0xf00) 1037EXC_REAL_NONE(0xec0, 0x20)
1035EXC_VIRT_NONE(0x4ec0, 0x4f00) 1038EXC_VIRT_NONE(0x4ec0, 0x20)
1039EXC_REAL_NONE(0xee0, 0x20)
1040EXC_VIRT_NONE(0x4ee0, 0x20)
1036 1041
1037 1042
1038EXC_REAL_OOL(performance_monitor, 0xf00, 0xf20) 1043EXC_REAL_OOL(performance_monitor, 0xf00, 0x20)
1039EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x4f20, 0xf00) 1044EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00)
1040TRAMP_KVM(PACA_EXGEN, 0xf00) 1045TRAMP_KVM(PACA_EXGEN, 0xf00)
1041EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) 1046EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
1042 1047
1043 1048
1044EXC_REAL_OOL(altivec_unavailable, 0xf20, 0xf40) 1049EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20)
1045EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x4f40, 0xf20) 1050EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20)
1046TRAMP_KVM(PACA_EXGEN, 0xf20) 1051TRAMP_KVM(PACA_EXGEN, 0xf20)
1047EXC_COMMON_BEGIN(altivec_unavailable_common) 1052EXC_COMMON_BEGIN(altivec_unavailable_common)
1048 EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) 1053 EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
@@ -1078,8 +1083,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
1078 b ret_from_except 1083 b ret_from_except
1079 1084
1080 1085
1081EXC_REAL_OOL(vsx_unavailable, 0xf40, 0xf60) 1086EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20)
1082EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x4f60, 0xf40) 1087EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40)
1083TRAMP_KVM(PACA_EXGEN, 0xf40) 1088TRAMP_KVM(PACA_EXGEN, 0xf40)
1084EXC_COMMON_BEGIN(vsx_unavailable_common) 1089EXC_COMMON_BEGIN(vsx_unavailable_common)
1085 EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) 1090 EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
@@ -1114,41 +1119,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
1114 b ret_from_except 1119 b ret_from_except
1115 1120
1116 1121
1117EXC_REAL_OOL(facility_unavailable, 0xf60, 0xf80) 1122EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20)
1118EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x4f80, 0xf60) 1123EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60)
1119TRAMP_KVM(PACA_EXGEN, 0xf60) 1124TRAMP_KVM(PACA_EXGEN, 0xf60)
1120EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) 1125EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
1121 1126
1122 1127
1123EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0xfa0) 1128EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20)
1124EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x4fa0, 0xf80) 1129EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80)
1125TRAMP_KVM_HV(PACA_EXGEN, 0xf80) 1130TRAMP_KVM_HV(PACA_EXGEN, 0xf80)
1126EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) 1131EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
1127 1132
1128 1133
1129EXC_REAL_NONE(0xfa0, 0x1200) 1134EXC_REAL_NONE(0xfa0, 0x20)
1130EXC_VIRT_NONE(0x4fa0, 0x5200) 1135EXC_VIRT_NONE(0x4fa0, 0x20)
1136EXC_REAL_NONE(0xfc0, 0x20)
1137EXC_VIRT_NONE(0x4fc0, 0x20)
1138EXC_REAL_NONE(0xfe0, 0x20)
1139EXC_VIRT_NONE(0x4fe0, 0x20)
1140
1141EXC_REAL_NONE(0x1000, 0x100)
1142EXC_VIRT_NONE(0x5000, 0x100)
1143EXC_REAL_NONE(0x1100, 0x100)
1144EXC_VIRT_NONE(0x5100, 0x100)
1131 1145
1132#ifdef CONFIG_CBE_RAS 1146#ifdef CONFIG_CBE_RAS
1133EXC_REAL_HV(cbe_system_error, 0x1200, 0x1300) 1147EXC_REAL_HV(cbe_system_error, 0x1200, 0x100)
1134EXC_VIRT_NONE(0x5200, 0x5300) 1148EXC_VIRT_NONE(0x5200, 0x100)
1135TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200) 1149TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200)
1136EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) 1150EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
1137#else /* CONFIG_CBE_RAS */ 1151#else /* CONFIG_CBE_RAS */
1138EXC_REAL_NONE(0x1200, 0x1300) 1152EXC_REAL_NONE(0x1200, 0x100)
1139EXC_VIRT_NONE(0x5200, 0x5300) 1153EXC_VIRT_NONE(0x5200, 0x100)
1140#endif 1154#endif
1141 1155
1142 1156
1143EXC_REAL(instruction_breakpoint, 0x1300, 0x1400) 1157EXC_REAL(instruction_breakpoint, 0x1300, 0x100)
1144EXC_VIRT(instruction_breakpoint, 0x5300, 0x5400, 0x1300) 1158EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300)
1145TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300) 1159TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300)
1146EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) 1160EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
1147 1161
1148EXC_REAL_NONE(0x1400, 0x1500) 1162EXC_REAL_NONE(0x1400, 0x100)
1149EXC_VIRT_NONE(0x5400, 0x5500) 1163EXC_VIRT_NONE(0x5400, 0x100)
1150 1164
1151EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) 1165EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
1152 mtspr SPRN_SPRG_HSCRATCH0,r13 1166 mtspr SPRN_SPRG_HSCRATCH0,r13
1153 EXCEPTION_PROLOG_0(PACA_EXGEN) 1167 EXCEPTION_PROLOG_0(PACA_EXGEN)
1154 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) 1168 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
@@ -1163,14 +1177,14 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600)
1163 1177
1164 KVMTEST_PR(0x1500) 1178 KVMTEST_PR(0x1500)
1165 EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) 1179 EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
1166EXC_REAL_END(denorm_exception_hv, 0x1500, 0x1600) 1180EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
1167 1181
1168#ifdef CONFIG_PPC_DENORMALISATION 1182#ifdef CONFIG_PPC_DENORMALISATION
1169EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x5600) 1183EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
1170 b exc_real_0x1500_denorm_exception_hv 1184 b exc_real_0x1500_denorm_exception_hv
1171EXC_VIRT_END(denorm_exception, 0x5500, 0x5600) 1185EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
1172#else 1186#else
1173EXC_VIRT_NONE(0x5500, 0x5600) 1187EXC_VIRT_NONE(0x5500, 0x100)
1174#endif 1188#endif
1175 1189
1176TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) 1190TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500)
@@ -1243,18 +1257,18 @@ EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception)
1243 1257
1244 1258
1245#ifdef CONFIG_CBE_RAS 1259#ifdef CONFIG_CBE_RAS
1246EXC_REAL_HV(cbe_maintenance, 0x1600, 0x1700) 1260EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100)
1247EXC_VIRT_NONE(0x5600, 0x5700) 1261EXC_VIRT_NONE(0x5600, 0x100)
1248TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600) 1262TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600)
1249EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) 1263EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
1250#else /* CONFIG_CBE_RAS */ 1264#else /* CONFIG_CBE_RAS */
1251EXC_REAL_NONE(0x1600, 0x1700) 1265EXC_REAL_NONE(0x1600, 0x100)
1252EXC_VIRT_NONE(0x5600, 0x5700) 1266EXC_VIRT_NONE(0x5600, 0x100)
1253#endif 1267#endif
1254 1268
1255 1269
1256EXC_REAL(altivec_assist, 0x1700, 0x1800) 1270EXC_REAL(altivec_assist, 0x1700, 0x100)
1257EXC_VIRT(altivec_assist, 0x5700, 0x5800, 0x1700) 1271EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700)
1258TRAMP_KVM(PACA_EXGEN, 0x1700) 1272TRAMP_KVM(PACA_EXGEN, 0x1700)
1259#ifdef CONFIG_ALTIVEC 1273#ifdef CONFIG_ALTIVEC
1260EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) 1274EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
@@ -1264,13 +1278,13 @@ EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
1264 1278
1265 1279
1266#ifdef CONFIG_CBE_RAS 1280#ifdef CONFIG_CBE_RAS
1267EXC_REAL_HV(cbe_thermal, 0x1800, 0x1900) 1281EXC_REAL_HV(cbe_thermal, 0x1800, 0x100)
1268EXC_VIRT_NONE(0x5800, 0x5900) 1282EXC_VIRT_NONE(0x5800, 0x100)
1269TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800) 1283TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800)
1270EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) 1284EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
1271#else /* CONFIG_CBE_RAS */ 1285#else /* CONFIG_CBE_RAS */
1272EXC_REAL_NONE(0x1800, 0x1900) 1286EXC_REAL_NONE(0x1800, 0x100)
1273EXC_VIRT_NONE(0x5800, 0x5900) 1287EXC_VIRT_NONE(0x5800, 0x100)
1274#endif 1288#endif
1275 1289
1276 1290
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8f0c7c5d93f2..8ff0dd4e77a7 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -406,12 +406,35 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
406void crash_fadump(struct pt_regs *regs, const char *str) 406void crash_fadump(struct pt_regs *regs, const char *str)
407{ 407{
408 struct fadump_crash_info_header *fdh = NULL; 408 struct fadump_crash_info_header *fdh = NULL;
409 int old_cpu, this_cpu;
409 410
410 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) 411 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
411 return; 412 return;
412 413
414 /*
415 * old_cpu == -1 means this is the first CPU which has come here,
416 * go ahead and trigger fadump.
417 *
418 * old_cpu != -1 means some other CPU has already on it's way
419 * to trigger fadump, just keep looping here.
420 */
421 this_cpu = smp_processor_id();
422 old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
423
424 if (old_cpu != -1) {
425 /*
426 * We can't loop here indefinitely. Wait as long as fadump
427 * is in force. If we race with fadump un-registration this
428 * loop will break and then we go down to normal panic path
429 * and reboot. If fadump is in force the first crashing
430 * cpu will definitely trigger fadump.
431 */
432 while (fw_dump.dump_registered)
433 cpu_relax();
434 return;
435 }
436
413 fdh = __va(fw_dump.fadumphdr_addr); 437 fdh = __va(fw_dump.fadumphdr_addr);
414 crashing_cpu = smp_processor_id();
415 fdh->crashing_cpu = crashing_cpu; 438 fdh->crashing_cpu = crashing_cpu;
416 crash_save_vmcoreinfo(); 439 crash_save_vmcoreinfo();
417 440
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 4d3aa05e28be..53cc9270aac8 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -228,8 +228,10 @@ int hw_breakpoint_handler(struct die_args *args)
228 rcu_read_lock(); 228 rcu_read_lock();
229 229
230 bp = __this_cpu_read(bp_per_reg); 230 bp = __this_cpu_read(bp_per_reg);
231 if (!bp) 231 if (!bp) {
232 rc = NOTIFY_DONE;
232 goto out; 233 goto out;
234 }
233 info = counter_arch_bp(bp); 235 info = counter_arch_bp(bp);
234 236
235 /* 237 /*
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 72dac0b58061..5f61cc0349c0 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -40,9 +40,7 @@
40#define _WORC GPR11 40#define _WORC GPR11
41#define _PTCR GPR12 41#define _PTCR GPR12
42 42
43#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ 43#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
44 PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
45 PSSCR_MTL_MASK
46 44
47 .text 45 .text
48 46
@@ -205,7 +203,7 @@ pnv_enter_arch207_idle_mode:
205 stb r3,PACA_THREAD_IDLE_STATE(r13) 203 stb r3,PACA_THREAD_IDLE_STATE(r13)
206 cmpwi cr3,r3,PNV_THREAD_SLEEP 204 cmpwi cr3,r3,PNV_THREAD_SLEEP
207 bge cr3,2f 205 bge cr3,2f
208 IDLE_STATE_ENTER_SEQ(PPC_NAP) 206 IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
209 /* No return */ 207 /* No return */
2102: 2082:
211 /* Sleep or winkle */ 209 /* Sleep or winkle */
@@ -239,7 +237,7 @@ pnv_fastsleep_workaround_at_entry:
239 237
240common_enter: /* common code for all the threads entering sleep or winkle */ 238common_enter: /* common code for all the threads entering sleep or winkle */
241 bgt cr3,enter_winkle 239 bgt cr3,enter_winkle
242 IDLE_STATE_ENTER_SEQ(PPC_SLEEP) 240 IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
243 241
244fastsleep_workaround_at_entry: 242fastsleep_workaround_at_entry:
245 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT 243 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
@@ -250,7 +248,7 @@ fastsleep_workaround_at_entry:
250 /* Fast sleep workaround */ 248 /* Fast sleep workaround */
251 li r3,1 249 li r3,1
252 li r4,1 250 li r4,1
253 bl opal_rm_config_cpu_idle_state 251 bl opal_config_cpu_idle_state
254 252
255 /* Clear Lock bit */ 253 /* Clear Lock bit */
256 li r0,0 254 li r0,0
@@ -261,10 +259,10 @@ fastsleep_workaround_at_entry:
261enter_winkle: 259enter_winkle:
262 bl save_sprs_to_stack 260 bl save_sprs_to_stack
263 261
264 IDLE_STATE_ENTER_SEQ(PPC_WINKLE) 262 IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
265 263
266/* 264/*
267 * r3 - requested stop state 265 * r3 - PSSCR value corresponding to the requested stop state.
268 */ 266 */
269power_enter_stop: 267power_enter_stop:
270#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 268#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -274,13 +272,22 @@ power_enter_stop:
274 stb r4,HSTATE_HWTHREAD_STATE(r13) 272 stb r4,HSTATE_HWTHREAD_STATE(r13)
275#endif 273#endif
276/* 274/*
275 * Check if we are executing the lite variant with ESL=EC=0
276 */
277 andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
278 clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
279 bne 1f
280 IDLE_STATE_ENTER_SEQ(PPC_STOP)
281 li r3,0 /* Since we didn't lose state, return 0 */
282 b pnv_wakeup_noloss
283/*
277 * Check if the requested state is a deep idle state. 284 * Check if the requested state is a deep idle state.
278 */ 285 */
279 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) 2861: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
280 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) 287 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
281 cmpd r3,r4 288 cmpd r3,r4
282 bge 2f 289 bge 2f
283 IDLE_STATE_ENTER_SEQ(PPC_STOP) 290 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
2842: 2912:
285/* 292/*
286 * Entering deep idle state. 293 * Entering deep idle state.
@@ -302,7 +309,7 @@ lwarx_loop_stop:
302 309
303 bl save_sprs_to_stack 310 bl save_sprs_to_stack
304 311
305 IDLE_STATE_ENTER_SEQ(PPC_STOP) 312 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
306 313
307_GLOBAL(power7_idle) 314_GLOBAL(power7_idle)
308 /* Now check if user or arch enabled NAP mode */ 315 /* Now check if user or arch enabled NAP mode */
@@ -353,16 +360,17 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
353 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 360 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
35420: nop; 36120: nop;
355 362
356
357/* 363/*
358 * r3 - requested stop state 364 * r3 - The PSSCR value corresponding to the stop state.
365 * r4 - The PSSCR mask corrresonding to the stop state.
359 */ 366 */
360_GLOBAL(power9_idle_stop) 367_GLOBAL(power9_idle_stop)
361 LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) 368 mfspr r5,SPRN_PSSCR
362 or r4,r4,r3 369 andc r5,r5,r4
363 mtspr SPRN_PSSCR, r4 370 or r3,r3,r5
364 li r4, 1 371 mtspr SPRN_PSSCR,r3
365 LOAD_REG_ADDR(r5,power_enter_stop) 372 LOAD_REG_ADDR(r5,power_enter_stop)
373 li r4,1
366 b pnv_powersave_common 374 b pnv_powersave_common
367 /* No return */ 375 /* No return */
368/* 376/*
@@ -544,7 +552,7 @@ timebase_resync:
544 */ 552 */
545 ble cr3,clear_lock 553 ble cr3,clear_lock
546 /* Time base re-sync */ 554 /* Time base re-sync */
547 bl opal_rm_resync_timebase; 555 bl opal_resync_timebase;
548 /* 556 /*
549 * If waking up from sleep, per core state is not lost, skip to 557 * If waking up from sleep, per core state is not lost, skip to
550 * clear_lock. 558 * clear_lock.
@@ -633,7 +641,7 @@ hypervisor_state_restored:
633fastsleep_workaround_at_exit: 641fastsleep_workaround_at_exit:
634 li r3,1 642 li r3,1
635 li r4,0 643 li r4,0
636 bl opal_rm_config_cpu_idle_state 644 bl opal_config_cpu_idle_state
637 b timebase_resync 645 b timebase_resync
638 646
639/* 647/*
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 3963f0b68d52..a1854d1ded8b 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -8,6 +8,7 @@
8#include <linux/export.h> 8#include <linux/export.h>
9#include <asm/io.h> 9#include <asm/io.h>
10#include <asm/pci-bridge.h> 10#include <asm/pci-bridge.h>
11#include <asm/isa-bridge.h>
11 12
12/* 13/*
13 * Here comes the ppc64 implementation of the IOMAP 14 * Here comes the ppc64 implementation of the IOMAP
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index ae1316106e2b..bb6f8993412e 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -29,6 +29,7 @@
29#include <asm/pci-bridge.h> 29#include <asm/pci-bridge.h>
30#include <asm/machdep.h> 30#include <asm/machdep.h>
31#include <asm/ppc-pci.h> 31#include <asm/ppc-pci.h>
32#include <asm/isa-bridge.h>
32 33
33unsigned long isa_io_base; /* NULL if no ISA bus */ 34unsigned long isa_io_base; /* NULL if no ISA bus */
34EXPORT_SYMBOL(isa_io_base); 35EXPORT_SYMBOL(isa_io_base);
@@ -167,6 +168,97 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
167} 168}
168 169
169/** 170/**
171 * isa_bridge_find_early - Find and map the ISA IO space early before
172 * main PCI discovery. This is optionally called by
173 * the arch code when adding PCI PHBs to get early
174 * access to ISA IO ports
175 */
176void __init isa_bridge_init_non_pci(struct device_node *np)
177{
178 const __be32 *ranges, *pbasep = NULL;
179 int rlen, i, rs;
180 u32 na, ns, pna;
181 u64 cbase, pbase, size = 0;
182
183 /* If we already have an ISA bridge, bail off */
184 if (isa_bridge_devnode != NULL)
185 return;
186
187 pna = of_n_addr_cells(np);
188 if (of_property_read_u32(np, "#address-cells", &na) ||
189 of_property_read_u32(np, "#size-cells", &ns)) {
190 pr_warn("ISA: Non-PCI bridge %s is missing address format\n",
191 np->full_name);
192 return;
193 }
194
195 /* Check it's a supported address format */
196 if (na != 2 || ns != 1) {
197 pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n",
198 np->full_name);
199 return;
200 }
201 rs = na + ns + pna;
202
203 /* Grab the ranges property */
204 ranges = of_get_property(np, "ranges", &rlen);
205 if (ranges == NULL || rlen < rs) {
206 pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n",
207 np->full_name);
208 return;
209 }
210
211 /* Parse it. We are only looking for IO space */
212 for (i = 0; (i + rs - 1) < rlen; i += rs) {
213 if (be32_to_cpup(ranges + i) != 1)
214 continue;
215 cbase = be32_to_cpup(ranges + i + 1);
216 size = of_read_number(ranges + i + na + pna, ns);
217 pbasep = ranges + i + na;
218 break;
219 }
220
221 /* Got something ? */
222 if (!size || !pbasep) {
223 pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n",
224 np->full_name);
225 return;
226 }
227
228 /* Align size and make sure it's cropped to 64K */
229 size = PAGE_ALIGN(size);
230 if (size > 0x10000)
231 size = 0x10000;
232
233 /* Map pbase */
234 pbase = of_translate_address(np, pbasep);
235 if (pbase == OF_BAD_ADDR) {
236 pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n",
237 np->full_name);
238 return;
239 }
240
241 /* We need page alignment */
242 if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
243 pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n",
244 np->full_name);
245 return;
246 }
247
248 /* Got it */
249 isa_bridge_devnode = np;
250
251 /* Set the global ISA io base to indicate we have an ISA bridge
252 * and map it
253 */
254 isa_io_base = ISA_IO_BASE;
255 __ioremap_at(pbase, (void *)ISA_IO_BASE,
256 size, pgprot_val(pgprot_noncached(__pgprot(0))));
257
258 pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name);
259}
260
261/**
170 * isa_bridge_find_late - Find and map the ISA IO space upon discovery of 262 * isa_bridge_find_late - Find and map the ISA IO space upon discovery of
171 * a new ISA bridge 263 * a new ISA bridge
172 */ 264 */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 735ff3d3f77d..fce05a38851c 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -285,6 +285,7 @@ asm(".global kretprobe_trampoline\n"
285 ".type kretprobe_trampoline, @function\n" 285 ".type kretprobe_trampoline, @function\n"
286 "kretprobe_trampoline:\n" 286 "kretprobe_trampoline:\n"
287 "nop\n" 287 "nop\n"
288 "blr\n"
288 ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); 289 ".size kretprobe_trampoline, .-kretprobe_trampoline\n");
289 290
290/* 291/*
@@ -337,6 +338,13 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
337 338
338 kretprobe_assert(ri, orig_ret_address, trampoline_address); 339 kretprobe_assert(ri, orig_ret_address, trampoline_address);
339 regs->nip = orig_ret_address; 340 regs->nip = orig_ret_address;
341 /*
342 * Make LR point to the orig_ret_address.
343 * When the 'nop' inside the kretprobe_trampoline
344 * is optimized, we can do a 'blr' after executing the
345 * detour buffer code.
346 */
347 regs->link = orig_ret_address;
340 348
341 reset_current_kprobe(); 349 reset_current_kprobe();
342 kretprobe_hash_unlock(current, &flags); 350 kretprobe_hash_unlock(current, &flags);
@@ -467,15 +475,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
467 return 0; 475 return 0;
468} 476}
469 477
470/*
471 * Wrapper routine to for handling exceptions.
472 */
473int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
474 unsigned long val, void *data)
475{
476 return NOTIFY_DONE;
477}
478
479unsigned long arch_deref_entry_point(void *entry) 478unsigned long arch_deref_entry_point(void *entry)
480{ 479{
481 return ppc_global_function_entry(entry); 480 return ppc_global_function_entry(entry);
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index bc525ea0dc09..0694d20f85b6 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -233,7 +233,8 @@ static int __init add_legacy_isa_port(struct device_node *np,
233 * 233 *
234 * Note: Don't even try on P8 lpc, we know it's not directly mapped 234 * Note: Don't even try on P8 lpc, we know it's not directly mapped
235 */ 235 */
236 if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) { 236 if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") ||
237 of_get_property(isa_brg, "ranges", NULL)) {
237 taddr = of_translate_address(np, reg); 238 taddr = of_translate_address(np, reg);
238 if (taddr == OF_BAD_ADDR) 239 if (taddr == OF_BAD_ADDR)
239 taddr = 0; 240 taddr = 0;
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 32be2a844947..ae179cb1bb3c 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -80,12 +80,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
80 * each other. 80 * each other.
81 */ 81 */
82 ld r10,PPC64_CACHES@toc(r2) 82 ld r10,PPC64_CACHES@toc(r2)
83 lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ 83 lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */
84 addi r5,r7,-1 84 addi r5,r7,-1
85 andc r6,r3,r5 /* round low to line bdy */ 85 andc r6,r3,r5 /* round low to line bdy */
86 subf r8,r6,r4 /* compute length */ 86 subf r8,r6,r4 /* compute length */
87 add r8,r8,r5 /* ensure we get enough */ 87 add r8,r8,r5 /* ensure we get enough */
88 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ 88 lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */
89 srw. r8,r8,r9 /* compute line count */ 89 srw. r8,r8,r9 /* compute line count */
90 beqlr /* nothing to do? */ 90 beqlr /* nothing to do? */
91 mtctr r8 91 mtctr r8
@@ -96,12 +96,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
96 96
97/* Now invalidate the instruction cache */ 97/* Now invalidate the instruction cache */
98 98
99 lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ 99 lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */
100 addi r5,r7,-1 100 addi r5,r7,-1
101 andc r6,r3,r5 /* round low to line bdy */ 101 andc r6,r3,r5 /* round low to line bdy */
102 subf r8,r6,r4 /* compute length */ 102 subf r8,r6,r4 /* compute length */
103 add r8,r8,r5 103 add r8,r8,r5
104 lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ 104 lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */
105 srw. r8,r8,r9 /* compute line count */ 105 srw. r8,r8,r9 /* compute line count */
106 beqlr /* nothing to do? */ 106 beqlr /* nothing to do? */
107 mtctr r8 107 mtctr r8
@@ -128,12 +128,12 @@ _GLOBAL(flush_dcache_range)
128 * Different systems have different cache line sizes 128 * Different systems have different cache line sizes
129 */ 129 */
130 ld r10,PPC64_CACHES@toc(r2) 130 ld r10,PPC64_CACHES@toc(r2)
131 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ 131 lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
132 addi r5,r7,-1 132 addi r5,r7,-1
133 andc r6,r3,r5 /* round low to line bdy */ 133 andc r6,r3,r5 /* round low to line bdy */
134 subf r8,r6,r4 /* compute length */ 134 subf r8,r6,r4 /* compute length */
135 add r8,r8,r5 /* ensure we get enough */ 135 add r8,r8,r5 /* ensure we get enough */
136 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ 136 lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
137 srw. r8,r8,r9 /* compute line count */ 137 srw. r8,r8,r9 /* compute line count */
138 beqlr /* nothing to do? */ 138 beqlr /* nothing to do? */
139 mtctr r8 139 mtctr r8
@@ -156,12 +156,12 @@ EXPORT_SYMBOL(flush_dcache_range)
156 */ 156 */
157_GLOBAL(flush_dcache_phys_range) 157_GLOBAL(flush_dcache_phys_range)
158 ld r10,PPC64_CACHES@toc(r2) 158 ld r10,PPC64_CACHES@toc(r2)
159 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ 159 lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
160 addi r5,r7,-1 160 addi r5,r7,-1
161 andc r6,r3,r5 /* round low to line bdy */ 161 andc r6,r3,r5 /* round low to line bdy */
162 subf r8,r6,r4 /* compute length */ 162 subf r8,r6,r4 /* compute length */
163 add r8,r8,r5 /* ensure we get enough */ 163 add r8,r8,r5 /* ensure we get enough */
164 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ 164 lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
165 srw. r8,r8,r9 /* compute line count */ 165 srw. r8,r8,r9 /* compute line count */
166 beqlr /* nothing to do? */ 166 beqlr /* nothing to do? */
167 mfmsr r5 /* Disable MMU Data Relocation */ 167 mfmsr r5 /* Disable MMU Data Relocation */
@@ -184,12 +184,12 @@ _GLOBAL(flush_dcache_phys_range)
184 184
185_GLOBAL(flush_inval_dcache_range) 185_GLOBAL(flush_inval_dcache_range)
186 ld r10,PPC64_CACHES@toc(r2) 186 ld r10,PPC64_CACHES@toc(r2)
187 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ 187 lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
188 addi r5,r7,-1 188 addi r5,r7,-1
189 andc r6,r3,r5 /* round low to line bdy */ 189 andc r6,r3,r5 /* round low to line bdy */
190 subf r8,r6,r4 /* compute length */ 190 subf r8,r6,r4 /* compute length */
191 add r8,r8,r5 /* ensure we get enough */ 191 add r8,r8,r5 /* ensure we get enough */
192 lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ 192 lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
193 srw. r8,r8,r9 /* compute line count */ 193 srw. r8,r8,r9 /* compute line count */
194 beqlr /* nothing to do? */ 194 beqlr /* nothing to do? */
195 sync 195 sync
@@ -225,8 +225,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
225/* Flush the dcache */ 225/* Flush the dcache */
226 ld r7,PPC64_CACHES@toc(r2) 226 ld r7,PPC64_CACHES@toc(r2)
227 clrrdi r3,r3,PAGE_SHIFT /* Page align */ 227 clrrdi r3,r3,PAGE_SHIFT /* Page align */
228 lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ 228 lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */
229 lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ 229 lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */
230 mr r6,r3 230 mr r6,r3
231 mtctr r4 231 mtctr r4
2320: dcbst 0,r6 2320: dcbst 0,r6
@@ -236,8 +236,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
236 236
237/* Now invalidate the icache */ 237/* Now invalidate the icache */
238 238
239 lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ 239 lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */
240 lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ 240 lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */
241 mtctr r4 241 mtctr r4
2421: icbi 0,r3 2421: icbi 0,r3
243 add r3,r3,r5 243 add r3,r3,r5
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000000000000..2282bf4e63cd
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,347 @@
1/*
2 * Code for Kernel probes Jump optimization.
3 *
4 * Copyright 2017, Anju T, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kprobes.h>
13#include <linux/jump_label.h>
14#include <linux/types.h>
15#include <linux/slab.h>
16#include <linux/list.h>
17#include <asm/kprobes.h>
18#include <asm/ptrace.h>
19#include <asm/cacheflush.h>
20#include <asm/code-patching.h>
21#include <asm/sstep.h>
22#include <asm/ppc-opcode.h>
23
24#define TMPL_CALL_HDLR_IDX \
25 (optprobe_template_call_handler - optprobe_template_entry)
26#define TMPL_EMULATE_IDX \
27 (optprobe_template_call_emulate - optprobe_template_entry)
28#define TMPL_RET_IDX \
29 (optprobe_template_ret - optprobe_template_entry)
30#define TMPL_OP_IDX \
31 (optprobe_template_op_address - optprobe_template_entry)
32#define TMPL_INSN_IDX \
33 (optprobe_template_insn - optprobe_template_entry)
34#define TMPL_END_IDX \
35 (optprobe_template_end - optprobe_template_entry)
36
37DEFINE_INSN_CACHE_OPS(ppc_optinsn);
38
39static bool insn_page_in_use;
40
41static void *__ppc_alloc_insn_page(void)
42{
43 if (insn_page_in_use)
44 return NULL;
45 insn_page_in_use = true;
46 return &optinsn_slot;
47}
48
49static void __ppc_free_insn_page(void *page __maybe_unused)
50{
51 insn_page_in_use = false;
52}
53
54struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
55 .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
56 .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
57 /* insn_size initialized later */
58 .alloc = __ppc_alloc_insn_page,
59 .free = __ppc_free_insn_page,
60 .nr_garbage = 0,
61};
62
63/*
64 * Check if we can optimize this probe. Returns NIP post-emulation if this can
65 * be optimized and 0 otherwise.
66 */
67static unsigned long can_optimize(struct kprobe *p)
68{
69 struct pt_regs regs;
70 struct instruction_op op;
71 unsigned long nip = 0;
72
73 /*
74 * kprobe placed for kretprobe during boot time
75 * has a 'nop' instruction, which can be emulated.
76 * So further checks can be skipped.
77 */
78 if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
79 return (unsigned long)p->addr + sizeof(kprobe_opcode_t);
80
81 /*
82 * We only support optimizing kernel addresses, but not
83 * module addresses.
84 *
85 * FIXME: Optimize kprobes placed in module addresses.
86 */
87 if (!is_kernel_addr((unsigned long)p->addr))
88 return 0;
89
90 memset(&regs, 0, sizeof(struct pt_regs));
91 regs.nip = (unsigned long)p->addr;
92 regs.trap = 0x0;
93 regs.msr = MSR_KERNEL;
94
95 /*
96 * Kprobe placed in conditional branch instructions are
97 * not optimized, as we can't predict the nip prior with
98 * dummy pt_regs and can not ensure that the return branch
99 * from detour buffer falls in the range of address (i.e 32MB).
100 * A branch back from trampoline is set up in the detour buffer
101 * to the nip returned by the analyse_instr() here.
102 *
103 * Ensure that the instruction is not a conditional branch,
104 * and that can be emulated.
105 */
106 if (!is_conditional_branch(*p->ainsn.insn) &&
107 analyse_instr(&op, &regs, *p->ainsn.insn))
108 nip = regs.nip;
109
110 return nip;
111}
112
113static void optimized_callback(struct optimized_kprobe *op,
114 struct pt_regs *regs)
115{
116 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
117 unsigned long flags;
118
119 /* This is possible if op is under delayed unoptimizing */
120 if (kprobe_disabled(&op->kp))
121 return;
122
123 local_irq_save(flags);
124 hard_irq_disable();
125
126 if (kprobe_running()) {
127 kprobes_inc_nmissed_count(&op->kp);
128 } else {
129 __this_cpu_write(current_kprobe, &op->kp);
130 regs->nip = (unsigned long)op->kp.addr;
131 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
132 opt_pre_handler(&op->kp, regs);
133 __this_cpu_write(current_kprobe, NULL);
134 }
135
136 /*
137 * No need for an explicit __hard_irq_enable() here.
138 * local_irq_restore() will re-enable interrupts,
139 * if they were hard disabled.
140 */
141 local_irq_restore(flags);
142}
143NOKPROBE_SYMBOL(optimized_callback);
144
145void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
146{
147 if (op->optinsn.insn) {
148 free_ppc_optinsn_slot(op->optinsn.insn, 1);
149 op->optinsn.insn = NULL;
150 }
151}
152
153/*
154 * emulate_step() requires insn to be emulated as
155 * second parameter. Load register 'r4' with the
156 * instruction.
157 */
158void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
159{
160 /* addis r4,0,(insn)@h */
161 *addr++ = PPC_INST_ADDIS | ___PPC_RT(4) |
162 ((val >> 16) & 0xffff);
163
164 /* ori r4,r4,(insn)@l */
165 *addr = PPC_INST_ORI | ___PPC_RA(4) | ___PPC_RS(4) |
166 (val & 0xffff);
167}
168
169/*
170 * Generate instructions to load provided immediate 64-bit value
171 * to register 'r3' and patch these instructions at 'addr'.
172 */
173void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr)
174{
175 /* lis r3,(op)@highest */
176 *addr++ = PPC_INST_ADDIS | ___PPC_RT(3) |
177 ((val >> 48) & 0xffff);
178
179 /* ori r3,r3,(op)@higher */
180 *addr++ = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) |
181 ((val >> 32) & 0xffff);
182
183 /* rldicr r3,r3,32,31 */
184 *addr++ = PPC_INST_RLDICR | ___PPC_RA(3) | ___PPC_RS(3) |
185 __PPC_SH64(32) | __PPC_ME64(31);
186
187 /* oris r3,r3,(op)@h */
188 *addr++ = PPC_INST_ORIS | ___PPC_RA(3) | ___PPC_RS(3) |
189 ((val >> 16) & 0xffff);
190
191 /* ori r3,r3,(op)@l */
192 *addr = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) |
193 (val & 0xffff);
194}
195
196int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
197{
198 kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step;
199 kprobe_opcode_t *op_callback_addr, *emulate_step_addr;
200 long b_offset;
201 unsigned long nip;
202
203 kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
204
205 nip = can_optimize(p);
206 if (!nip)
207 return -EILSEQ;
208
209 /* Allocate instruction slot for detour buffer */
210 buff = get_ppc_optinsn_slot();
211 if (!buff)
212 return -ENOMEM;
213
214 /*
215 * OPTPROBE uses 'b' instruction to branch to optinsn.insn.
216 *
217 * The target address has to be relatively nearby, to permit use
218 * of branch instruction in powerpc, because the address is specified
219 * in an immediate field in the instruction opcode itself, ie 24 bits
220 * in the opcode specify the address. Therefore the address should
221 * be within 32MB on either side of the current instruction.
222 */
223 b_offset = (unsigned long)buff - (unsigned long)p->addr;
224 if (!is_offset_in_branch_range(b_offset))
225 goto error;
226
227 /* Check if the return address is also within 32MB range */
228 b_offset = (unsigned long)(buff + TMPL_RET_IDX) -
229 (unsigned long)nip;
230 if (!is_offset_in_branch_range(b_offset))
231 goto error;
232
233 /* Setup template */
234 memcpy(buff, optprobe_template_entry,
235 TMPL_END_IDX * sizeof(kprobe_opcode_t));
236
237 /*
238 * Fixup the template with instructions to:
239 * 1. load the address of the actual probepoint
240 */
241 patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX);
242
243 /*
244 * 2. branch to optimized_callback() and emulate_step()
245 */
246 kprobe_lookup_name("optimized_callback", op_callback_addr);
247 kprobe_lookup_name("emulate_step", emulate_step_addr);
248 if (!op_callback_addr || !emulate_step_addr) {
249 WARN(1, "kprobe_lookup_name() failed\n");
250 goto error;
251 }
252
253 branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX,
254 (unsigned long)op_callback_addr,
255 BRANCH_SET_LINK);
256
257 branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX,
258 (unsigned long)emulate_step_addr,
259 BRANCH_SET_LINK);
260
261 if (!branch_op_callback || !branch_emulate_step)
262 goto error;
263
264 buff[TMPL_CALL_HDLR_IDX] = branch_op_callback;
265 buff[TMPL_EMULATE_IDX] = branch_emulate_step;
266
267 /*
268 * 3. load instruction to be emulated into relevant register, and
269 */
270 patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX);
271
272 /*
273 * 4. branch back from trampoline
274 */
275 buff[TMPL_RET_IDX] = create_branch((unsigned int *)buff + TMPL_RET_IDX,
276 (unsigned long)nip, 0);
277
278 flush_icache_range((unsigned long)buff,
279 (unsigned long)(&buff[TMPL_END_IDX]));
280
281 op->optinsn.insn = buff;
282
283 return 0;
284
285error:
286 free_ppc_optinsn_slot(buff, 0);
287 return -ERANGE;
288
289}
290
291int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
292{
293 return optinsn->insn != NULL;
294}
295
296/*
297 * On powerpc, Optprobes always replaces one instruction (4 bytes
298 * aligned and 4 bytes long). It is impossible to encounter another
299 * kprobe in this address range. So always return 0.
300 */
301int arch_check_optimized_kprobe(struct optimized_kprobe *op)
302{
303 return 0;
304}
305
306void arch_optimize_kprobes(struct list_head *oplist)
307{
308 struct optimized_kprobe *op;
309 struct optimized_kprobe *tmp;
310
311 list_for_each_entry_safe(op, tmp, oplist, list) {
312 /*
313 * Backup instructions which will be replaced
314 * by jump address
315 */
316 memcpy(op->optinsn.copied_insn, op->kp.addr,
317 RELATIVEJUMP_SIZE);
318 patch_instruction(op->kp.addr,
319 create_branch((unsigned int *)op->kp.addr,
320 (unsigned long)op->optinsn.insn, 0));
321 list_del_init(&op->list);
322 }
323}
324
325void arch_unoptimize_kprobe(struct optimized_kprobe *op)
326{
327 arch_arm_kprobe(&op->kp);
328}
329
330void arch_unoptimize_kprobes(struct list_head *oplist,
331 struct list_head *done_list)
332{
333 struct optimized_kprobe *op;
334 struct optimized_kprobe *tmp;
335
336 list_for_each_entry_safe(op, tmp, oplist, list) {
337 arch_unoptimize_kprobe(op);
338 list_move(&op->list, done_list);
339 }
340}
341
342int arch_within_optimized_kprobe(struct optimized_kprobe *op,
343 unsigned long addr)
344{
345 return ((unsigned long)op->kp.addr <= addr &&
346 (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
347}
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 000000000000..53e429b5a29d
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,135 @@
1/*
2 * Code to prepare detour buffer for optprobes in Kernel.
3 *
4 * Copyright 2017, Anju T, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <asm/ppc_asm.h>
13#include <asm/ptrace.h>
14#include <asm/asm-offsets.h>
15
16#define OPT_SLOT_SIZE 65536
17
18 .balign 4
19
20 /*
21 * Reserve an area to allocate slots for detour buffer.
22 * This is part of .text section (rather than vmalloc area)
23 * as this needs to be within 32MB of the probed address.
24 */
25 .global optinsn_slot
26optinsn_slot:
27 .space OPT_SLOT_SIZE
28
29 /*
30 * Optprobe template:
31 * This template gets copied into one of the slots in optinsn_slot
32 * and gets fixed up with real optprobe structures et al.
33 */
34 .global optprobe_template_entry
35optprobe_template_entry:
36 /* Create an in-memory pt_regs */
37 stdu r1,-INT_FRAME_SIZE(r1)
38 SAVE_GPR(0,r1)
39 /* Save the previous SP into stack */
40 addi r0,r1,INT_FRAME_SIZE
41 std r0,GPR1(r1)
42 SAVE_10GPRS(2,r1)
43 SAVE_10GPRS(12,r1)
44 SAVE_10GPRS(22,r1)
45 /* Save SPRS */
46 mfmsr r5
47 std r5,_MSR(r1)
48 li r5,0x700
49 std r5,_TRAP(r1)
50 li r5,0
51 std r5,ORIG_GPR3(r1)
52 std r5,RESULT(r1)
53 mfctr r5
54 std r5,_CTR(r1)
55 mflr r5
56 std r5,_LINK(r1)
57 mfspr r5,SPRN_XER
58 std r5,_XER(r1)
59 mfcr r5
60 std r5,_CCR(r1)
61 lbz r5,PACASOFTIRQEN(r13)
62 std r5,SOFTE(r1)
63 mfdar r5
64 std r5,_DAR(r1)
65 mfdsisr r5
66 std r5,_DSISR(r1)
67
68 .global optprobe_template_op_address
69optprobe_template_op_address:
70 /*
71 * Parameters to optimized_callback():
72 * 1. optimized_kprobe structure in r3
73 */
74 nop
75 nop
76 nop
77 nop
78 nop
79 /* 2. pt_regs pointer in r4 */
80 addi r4,r1,STACK_FRAME_OVERHEAD
81
82 .global optprobe_template_call_handler
83optprobe_template_call_handler:
84 /* Branch to optimized_callback() */
85 nop
86
87 /*
88 * Parameters for instruction emulation:
89 * 1. Pass SP in register r3.
90 */
91 addi r3,r1,STACK_FRAME_OVERHEAD
92
93 .global optprobe_template_insn
94optprobe_template_insn:
95 /* 2, Pass instruction to be emulated in r4 */
96 nop
97 nop
98
99 .global optprobe_template_call_emulate
100optprobe_template_call_emulate:
101 /* Branch to emulate_step() */
102 nop
103
104 /*
105 * All done.
106 * Now, restore the registers...
107 */
108 ld r5,_MSR(r1)
109 mtmsr r5
110 ld r5,_CTR(r1)
111 mtctr r5
112 ld r5,_LINK(r1)
113 mtlr r5
114 ld r5,_XER(r1)
115 mtxer r5
116 ld r5,_CCR(r1)
117 mtcr r5
118 ld r5,_DAR(r1)
119 mtdar r5
120 ld r5,_DSISR(r1)
121 mtdsisr r5
122 REST_GPR(0,r1)
123 REST_10GPRS(2,r1)
124 REST_10GPRS(12,r1)
125 REST_10GPRS(22,r1)
126 /* Restore the previous SP */
127 addi r1,r1,INT_FRAME_SIZE
128
129 .global optprobe_template_ret
130optprobe_template_ret:
131 /* ... and jump back from trampoline */
132 nop
133
134 .global optprobe_template_end
135optprobe_template_end:
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ac83eb04a8b8..616de028f7f8 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -649,6 +649,7 @@ static void __init early_cmdline_parse(void)
649struct option_vector1 { 649struct option_vector1 {
650 u8 byte1; 650 u8 byte1;
651 u8 arch_versions; 651 u8 arch_versions;
652 u8 arch_versions3;
652} __packed; 653} __packed;
653 654
654struct option_vector2 { 655struct option_vector2 {
@@ -691,6 +692,9 @@ struct option_vector5 {
691 u8 reserved2; 692 u8 reserved2;
692 __be16 reserved3; 693 __be16 reserved3;
693 u8 subprocessors; 694 u8 subprocessors;
695 u8 byte22;
696 u8 intarch;
697 u8 mmu;
694} __packed; 698} __packed;
695 699
696struct option_vector6 { 700struct option_vector6 {
@@ -700,7 +704,7 @@ struct option_vector6 {
700} __packed; 704} __packed;
701 705
702struct ibm_arch_vec { 706struct ibm_arch_vec {
703 struct { u32 mask, val; } pvrs[10]; 707 struct { u32 mask, val; } pvrs[12];
704 708
705 u8 num_vectors; 709 u8 num_vectors;
706 710
@@ -750,6 +754,14 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
750 .val = cpu_to_be32(0x004d0000), 754 .val = cpu_to_be32(0x004d0000),
751 }, 755 },
752 { 756 {
757 .mask = cpu_to_be32(0xffff0000), /* POWER9 */
758 .val = cpu_to_be32(0x004e0000),
759 },
760 {
761 .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */
762 .val = cpu_to_be32(0x0f000005),
763 },
764 {
753 .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ 765 .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */
754 .val = cpu_to_be32(0x0f000004), 766 .val = cpu_to_be32(0x0f000004),
755 }, 767 },
@@ -774,6 +786,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
774 .byte1 = 0, 786 .byte1 = 0,
775 .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | 787 .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
776 OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, 788 OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
789 .arch_versions3 = OV1_PPC_3_00,
777 }, 790 },
778 791
779 .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), 792 .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
@@ -826,7 +839,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
826 0, 839 0,
827#endif 840#endif
828 .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), 841 .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
829 .bin_opts = 0, 842 .bin_opts = OV5_FEAT(OV5_RESIZE_HPT),
830 .micro_checkpoint = 0, 843 .micro_checkpoint = 0,
831 .reserved0 = 0, 844 .reserved0 = 0,
832 .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */ 845 .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */
@@ -836,6 +849,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
836 .reserved2 = 0, 849 .reserved2 = 0,
837 .reserved3 = 0, 850 .reserved3 = 0,
838 .subprocessors = 1, 851 .subprocessors = 1,
852 .intarch = 0,
853 .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) |
854 OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE),
839 }, 855 },
840 856
841 /* option vector 6: IBM PAPR hints */ 857 /* option vector 6: IBM PAPR hints */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 112cc3b2ee1a..b8a4987f58cf 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1145,31 +1145,29 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
1145void __init rtas_initialize(void) 1145void __init rtas_initialize(void)
1146{ 1146{
1147 unsigned long rtas_region = RTAS_INSTANTIATE_MAX; 1147 unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
1148 u32 base, size, entry;
1149 int no_base, no_size, no_entry;
1148 1150
1149 /* Get RTAS dev node and fill up our "rtas" structure with infos 1151 /* Get RTAS dev node and fill up our "rtas" structure with infos
1150 * about it. 1152 * about it.
1151 */ 1153 */
1152 rtas.dev = of_find_node_by_name(NULL, "rtas"); 1154 rtas.dev = of_find_node_by_name(NULL, "rtas");
1153 if (rtas.dev) {
1154 const __be32 *basep, *entryp, *sizep;
1155
1156 basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
1157 sizep = of_get_property(rtas.dev, "rtas-size", NULL);
1158 if (basep != NULL && sizep != NULL) {
1159 rtas.base = __be32_to_cpu(*basep);
1160 rtas.size = __be32_to_cpu(*sizep);
1161 entryp = of_get_property(rtas.dev,
1162 "linux,rtas-entry", NULL);
1163 if (entryp == NULL) /* Ugh */
1164 rtas.entry = rtas.base;
1165 else
1166 rtas.entry = __be32_to_cpu(*entryp);
1167 } else
1168 rtas.dev = NULL;
1169 }
1170 if (!rtas.dev) 1155 if (!rtas.dev)
1171 return; 1156 return;
1172 1157
1158 no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base);
1159 no_size = of_property_read_u32(rtas.dev, "rtas-size", &size);
1160 if (no_base || no_size) {
1161 of_node_put(rtas.dev);
1162 rtas.dev = NULL;
1163 return;
1164 }
1165
1166 rtas.base = base;
1167 rtas.size = size;
1168 no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
1169 rtas.entry = no_entry ? rtas.base : entry;
1170
1173 /* If RTAS was found, allocate the RMO buffer for it and look for 1171 /* If RTAS was found, allocate the RMO buffer for it and look for
1174 * the stop-self token if any 1172 * the stop-self token if any
1175 */ 1173 */
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 2bf1f9b5b34b..3650732639ed 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -21,6 +21,7 @@
21#include <linux/cpu.h> 21#include <linux/cpu.h>
22#include <linux/workqueue.h> 22#include <linux/workqueue.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/topology.h>
24 25
25#include <linux/uaccess.h> 26#include <linux/uaccess.h>
26#include <asm/io.h> 27#include <asm/io.h>
@@ -282,6 +283,7 @@ static void prrn_work_fn(struct work_struct *work)
282 * the RTAS event. 283 * the RTAS event.
283 */ 284 */
284 pseries_devicetree_update(-prrn_update_scope); 285 pseries_devicetree_update(-prrn_update_scope);
286 arch_update_cpu_topology();
285} 287}
286 288
287static DECLARE_WORK(prrn_work, prrn_work_fn); 289static DECLARE_WORK(prrn_work, prrn_work_fn);
@@ -434,7 +436,10 @@ static void do_event_scan(void)
434 } 436 }
435 437
436 if (error == 0) { 438 if (error == 0) {
437 pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); 439 if (rtas_error_type((struct rtas_error_log *)logdata) !=
440 RTAS_TYPE_PRRN)
441 pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG,
442 0);
438 handle_rtas_event((struct rtas_error_log *)logdata); 443 handle_rtas_event((struct rtas_error_log *)logdata);
439 } 444 }
440 445
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index f516ac508ae3..4697da895133 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -87,6 +87,15 @@ EXPORT_SYMBOL(machine_id);
87int boot_cpuid = -1; 87int boot_cpuid = -1;
88EXPORT_SYMBOL_GPL(boot_cpuid); 88EXPORT_SYMBOL_GPL(boot_cpuid);
89 89
90/*
91 * These are used in binfmt_elf.c to put aux entries on the stack
92 * for each elf executable being started.
93 */
94int dcache_bsize;
95int icache_bsize;
96int ucache_bsize;
97
98
90unsigned long klimit = (unsigned long) _end; 99unsigned long klimit = (unsigned long) _end;
91 100
92/* 101/*
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 7fcf1f7f01c1..2f88f6cf1a42 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -59,14 +59,6 @@ EXPORT_SYMBOL(DMA_MODE_READ);
59EXPORT_SYMBOL(DMA_MODE_WRITE); 59EXPORT_SYMBOL(DMA_MODE_WRITE);
60 60
61/* 61/*
62 * These are used in binfmt_elf.c to put aux entries on the stack
63 * for each elf executable being started.
64 */
65int dcache_bsize;
66int icache_bsize;
67int ucache_bsize;
68
69/*
70 * We're called here very early in the boot. 62 * We're called here very early in the boot.
71 * 63 *
72 * Note that the kernel may be running at an address which is different 64 * Note that the kernel may be running at an address which is different
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6824157e4d2e..b9855f1b290a 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -77,25 +77,18 @@
77int spinning_secondaries; 77int spinning_secondaries;
78u64 ppc64_pft_size; 78u64 ppc64_pft_size;
79 79
80/* Pick defaults since we might want to patch instructions
81 * before we've read this from the device tree.
82 */
83struct ppc64_caches ppc64_caches = { 80struct ppc64_caches ppc64_caches = {
84 .dline_size = 0x40, 81 .l1d = {
85 .log_dline_size = 6, 82 .block_size = 0x40,
86 .iline_size = 0x40, 83 .log_block_size = 6,
87 .log_iline_size = 6 84 },
85 .l1i = {
86 .block_size = 0x40,
87 .log_block_size = 6
88 },
88}; 89};
89EXPORT_SYMBOL_GPL(ppc64_caches); 90EXPORT_SYMBOL_GPL(ppc64_caches);
90 91
91/*
92 * These are used in binfmt_elf.c to put aux entries on the stack
93 * for each elf executable being started.
94 */
95int dcache_bsize;
96int icache_bsize;
97int ucache_bsize;
98
99#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) 92#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
100void __init setup_tlb_core_data(void) 93void __init setup_tlb_core_data(void)
101{ 94{
@@ -408,74 +401,135 @@ void smp_release_cpus(void)
408 * cache informations about the CPU that will be used by cache flush 401 * cache informations about the CPU that will be used by cache flush
409 * routines and/or provided to userland 402 * routines and/or provided to userland
410 */ 403 */
404
405static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
406 u32 bsize, u32 sets)
407{
408 info->size = size;
409 info->sets = sets;
410 info->line_size = lsize;
411 info->block_size = bsize;
412 info->log_block_size = __ilog2(bsize);
413 info->blocks_per_page = PAGE_SIZE / bsize;
414
415 if (sets == 0)
416 info->assoc = 0xffff;
417 else
418 info->assoc = size / (sets * lsize);
419}
420
421static bool __init parse_cache_info(struct device_node *np,
422 bool icache,
423 struct ppc_cache_info *info)
424{
425 static const char *ipropnames[] __initdata = {
426 "i-cache-size",
427 "i-cache-sets",
428 "i-cache-block-size",
429 "i-cache-line-size",
430 };
431 static const char *dpropnames[] __initdata = {
432 "d-cache-size",
433 "d-cache-sets",
434 "d-cache-block-size",
435 "d-cache-line-size",
436 };
437 const char **propnames = icache ? ipropnames : dpropnames;
438 const __be32 *sizep, *lsizep, *bsizep, *setsp;
439 u32 size, lsize, bsize, sets;
440 bool success = true;
441
442 size = 0;
443 sets = -1u;
444 lsize = bsize = cur_cpu_spec->dcache_bsize;
445 sizep = of_get_property(np, propnames[0], NULL);
446 if (sizep != NULL)
447 size = be32_to_cpu(*sizep);
448 setsp = of_get_property(np, propnames[1], NULL);
449 if (setsp != NULL)
450 sets = be32_to_cpu(*setsp);
451 bsizep = of_get_property(np, propnames[2], NULL);
452 lsizep = of_get_property(np, propnames[3], NULL);
453 if (bsizep == NULL)
454 bsizep = lsizep;
455 if (lsizep != NULL)
456 lsize = be32_to_cpu(*lsizep);
457 if (bsizep != NULL)
458 bsize = be32_to_cpu(*bsizep);
459 if (sizep == NULL || bsizep == NULL || lsizep == NULL)
460 success = false;
461
462 /*
463 * OF is weird .. it represents fully associative caches
464 * as "1 way" which doesn't make much sense and doesn't
465 * leave room for direct mapped. We'll assume that 0
466 * in OF means direct mapped for that reason.
467 */
468 if (sets == 1)
469 sets = 0;
470 else if (sets == 0)
471 sets = 1;
472
473 init_cache_info(info, size, lsize, bsize, sets);
474
475 return success;
476}
477
411void __init initialize_cache_info(void) 478void __init initialize_cache_info(void)
412{ 479{
413 struct device_node *np; 480 struct device_node *cpu = NULL, *l2, *l3 = NULL;
414 unsigned long num_cpus = 0; 481 u32 pvr;
415 482
416 DBG(" -> initialize_cache_info()\n"); 483 DBG(" -> initialize_cache_info()\n");
417 484
418 for_each_node_by_type(np, "cpu") { 485 /*
419 num_cpus += 1; 486 * All shipping POWER8 machines have a firmware bug that
487 * puts incorrect information in the device-tree. This will
488 * be (hopefully) fixed for future chips but for now hard
489 * code the values if we are running on one of these
490 */
491 pvr = PVR_VER(mfspr(SPRN_PVR));
492 if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
493 pvr == PVR_POWER8NVL) {
494 /* size lsize blk sets */
495 init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32);
496 init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64);
497 init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512);
498 init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192);
499 } else
500 cpu = of_find_node_by_type(NULL, "cpu");
501
502 /*
503 * We're assuming *all* of the CPUs have the same
504 * d-cache and i-cache sizes... -Peter
505 */
506 if (cpu) {
507 if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
508 DBG("Argh, can't find dcache properties !\n");
509
510 if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
511 DBG("Argh, can't find icache properties !\n");
420 512
421 /* 513 /*
422 * We're assuming *all* of the CPUs have the same 514 * Try to find the L2 and L3 if any. Assume they are
423 * d-cache and i-cache sizes... -Peter 515 * unified and use the D-side properties.
424 */ 516 */
425 if (num_cpus == 1) { 517 l2 = of_find_next_cache_node(cpu);
426 const __be32 *sizep, *lsizep; 518 of_node_put(cpu);
427 u32 size, lsize; 519 if (l2) {
428 520 parse_cache_info(l2, false, &ppc64_caches.l2);
429 size = 0; 521 l3 = of_find_next_cache_node(l2);
430 lsize = cur_cpu_spec->dcache_bsize; 522 of_node_put(l2);
431 sizep = of_get_property(np, "d-cache-size", NULL); 523 }
432 if (sizep != NULL) 524 if (l3) {
433 size = be32_to_cpu(*sizep); 525 parse_cache_info(l3, false, &ppc64_caches.l3);
434 lsizep = of_get_property(np, "d-cache-block-size", 526 of_node_put(l3);
435 NULL);
436 /* fallback if block size missing */
437 if (lsizep == NULL)
438 lsizep = of_get_property(np,
439 "d-cache-line-size",
440 NULL);
441 if (lsizep != NULL)
442 lsize = be32_to_cpu(*lsizep);
443 if (sizep == NULL || lsizep == NULL)
444 DBG("Argh, can't find dcache properties ! "
445 "sizep: %p, lsizep: %p\n", sizep, lsizep);
446
447 ppc64_caches.dsize = size;
448 ppc64_caches.dline_size = lsize;
449 ppc64_caches.log_dline_size = __ilog2(lsize);
450 ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
451
452 size = 0;
453 lsize = cur_cpu_spec->icache_bsize;
454 sizep = of_get_property(np, "i-cache-size", NULL);
455 if (sizep != NULL)
456 size = be32_to_cpu(*sizep);
457 lsizep = of_get_property(np, "i-cache-block-size",
458 NULL);
459 if (lsizep == NULL)
460 lsizep = of_get_property(np,
461 "i-cache-line-size",
462 NULL);
463 if (lsizep != NULL)
464 lsize = be32_to_cpu(*lsizep);
465 if (sizep == NULL || lsizep == NULL)
466 DBG("Argh, can't find icache properties ! "
467 "sizep: %p, lsizep: %p\n", sizep, lsizep);
468
469 ppc64_caches.isize = size;
470 ppc64_caches.iline_size = lsize;
471 ppc64_caches.log_iline_size = __ilog2(lsize);
472 ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
473 } 527 }
474 } 528 }
475 529
476 /* For use by binfmt_elf */ 530 /* For use by binfmt_elf */
477 dcache_bsize = ppc64_caches.dline_size; 531 dcache_bsize = ppc64_caches.l1d.block_size;
478 icache_bsize = ppc64_caches.iline_size; 532 icache_bsize = ppc64_caches.l1i.block_size;
479 533
480 DBG(" <- initialize_cache_info()\n"); 534 DBG(" <- initialize_cache_info()\n");
481} 535}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 4111d30badfa..22b01a3962f0 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -736,16 +736,14 @@ static int __init vdso_init(void)
736 if (firmware_has_feature(FW_FEATURE_LPAR)) 736 if (firmware_has_feature(FW_FEATURE_LPAR))
737 vdso_data->platform |= 1; 737 vdso_data->platform |= 1;
738 vdso_data->physicalMemorySize = memblock_phys_mem_size(); 738 vdso_data->physicalMemorySize = memblock_phys_mem_size();
739 vdso_data->dcache_size = ppc64_caches.dsize; 739 vdso_data->dcache_size = ppc64_caches.l1d.size;
740 vdso_data->dcache_line_size = ppc64_caches.dline_size; 740 vdso_data->dcache_line_size = ppc64_caches.l1d.line_size;
741 vdso_data->icache_size = ppc64_caches.isize; 741 vdso_data->icache_size = ppc64_caches.l1i.size;
742 vdso_data->icache_line_size = ppc64_caches.iline_size; 742 vdso_data->icache_line_size = ppc64_caches.l1i.line_size;
743 743 vdso_data->dcache_block_size = ppc64_caches.l1d.block_size;
744 /* XXXOJN: Blocks should be added to ppc64_caches and used instead */ 744 vdso_data->icache_block_size = ppc64_caches.l1i.block_size;
745 vdso_data->dcache_block_size = ppc64_caches.dline_size; 745 vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
746 vdso_data->icache_block_size = ppc64_caches.iline_size; 746 vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size;
747 vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size;
748 vdso_data->icache_log_block_size = ppc64_caches.log_iline_size;
749 747
750 /* 748 /*
751 * Calculate the size of the 64 bits vDSO 749 * Calculate the size of the 64 bits vDSO
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 7dd89b79d038..b87ccde2137a 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -70,7 +70,8 @@ endif
70kvm-hv-y += \ 70kvm-hv-y += \
71 book3s_hv.o \ 71 book3s_hv.o \
72 book3s_hv_interrupts.o \ 72 book3s_hv_interrupts.o \
73 book3s_64_mmu_hv.o 73 book3s_64_mmu_hv.o \
74 book3s_64_mmu_radix.o
74 75
75kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 76kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
76 book3s_hv_rm_xics.o 77 book3s_hv_rm_xics.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 019f008775b9..b6b5c185bd92 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -239,6 +239,7 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
239 kvmppc_set_dsisr(vcpu, flags); 239 kvmppc_set_dsisr(vcpu, flags);
240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); 240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
241} 241}
242EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */
242 243
243void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) 244void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
244{ 245{
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b795dd1ac2ef..9df3d940acec 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -119,6 +119,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
119 long err = -EBUSY; 119 long err = -EBUSY;
120 long order; 120 long order;
121 121
122 if (kvm_is_radix(kvm))
123 return -EINVAL;
124
122 mutex_lock(&kvm->lock); 125 mutex_lock(&kvm->lock);
123 if (kvm->arch.hpte_setup_done) { 126 if (kvm->arch.hpte_setup_done) {
124 kvm->arch.hpte_setup_done = 0; 127 kvm->arch.hpte_setup_done = 0;
@@ -152,12 +155,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
152 155
153void kvmppc_free_hpt(struct kvm *kvm) 156void kvmppc_free_hpt(struct kvm *kvm)
154{ 157{
155 kvmppc_free_lpid(kvm->arch.lpid);
156 vfree(kvm->arch.revmap); 158 vfree(kvm->arch.revmap);
157 if (kvm->arch.hpt_cma_alloc) 159 if (kvm->arch.hpt_cma_alloc)
158 kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), 160 kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
159 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); 161 1 << (kvm->arch.hpt_order - PAGE_SHIFT));
160 else 162 else if (kvm->arch.hpt_virt)
161 free_pages(kvm->arch.hpt_virt, 163 free_pages(kvm->arch.hpt_virt,
162 kvm->arch.hpt_order - PAGE_SHIFT); 164 kvm->arch.hpt_order - PAGE_SHIFT);
163} 165}
@@ -392,8 +394,8 @@ static int instruction_is_store(unsigned int instr)
392 return (instr & mask) != 0; 394 return (instr & mask) != 0;
393} 395}
394 396
395static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 397int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
396 unsigned long gpa, gva_t ea, int is_store) 398 unsigned long gpa, gva_t ea, int is_store)
397{ 399{
398 u32 last_inst; 400 u32 last_inst;
399 401
@@ -458,6 +460,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
458 unsigned long rcbits; 460 unsigned long rcbits;
459 long mmio_update; 461 long mmio_update;
460 462
463 if (kvm_is_radix(kvm))
464 return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
465
461 /* 466 /*
462 * Real-mode code has already searched the HPT and found the 467 * Real-mode code has already searched the HPT and found the
463 * entry we're interested in. Lock the entry and check that 468 * entry we're interested in. Lock the entry and check that
@@ -695,12 +700,13 @@ static void kvmppc_rmap_reset(struct kvm *kvm)
695 srcu_read_unlock(&kvm->srcu, srcu_idx); 700 srcu_read_unlock(&kvm->srcu, srcu_idx);
696} 701}
697 702
703typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot,
704 unsigned long gfn);
705
698static int kvm_handle_hva_range(struct kvm *kvm, 706static int kvm_handle_hva_range(struct kvm *kvm,
699 unsigned long start, 707 unsigned long start,
700 unsigned long end, 708 unsigned long end,
701 int (*handler)(struct kvm *kvm, 709 hva_handler_fn handler)
702 unsigned long *rmapp,
703 unsigned long gfn))
704{ 710{
705 int ret; 711 int ret;
706 int retval = 0; 712 int retval = 0;
@@ -725,9 +731,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
725 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 731 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
726 732
727 for (; gfn < gfn_end; ++gfn) { 733 for (; gfn < gfn_end; ++gfn) {
728 gfn_t gfn_offset = gfn - memslot->base_gfn; 734 ret = handler(kvm, memslot, gfn);
729
730 ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
731 retval |= ret; 735 retval |= ret;
732 } 736 }
733 } 737 }
@@ -736,20 +740,21 @@ static int kvm_handle_hva_range(struct kvm *kvm,
736} 740}
737 741
738static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 742static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
739 int (*handler)(struct kvm *kvm, unsigned long *rmapp, 743 hva_handler_fn handler)
740 unsigned long gfn))
741{ 744{
742 return kvm_handle_hva_range(kvm, hva, hva + 1, handler); 745 return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
743} 746}
744 747
745static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 748static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
746 unsigned long gfn) 749 unsigned long gfn)
747{ 750{
748 struct revmap_entry *rev = kvm->arch.revmap; 751 struct revmap_entry *rev = kvm->arch.revmap;
749 unsigned long h, i, j; 752 unsigned long h, i, j;
750 __be64 *hptep; 753 __be64 *hptep;
751 unsigned long ptel, psize, rcbits; 754 unsigned long ptel, psize, rcbits;
755 unsigned long *rmapp;
752 756
757 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
753 for (;;) { 758 for (;;) {
754 lock_rmap(rmapp); 759 lock_rmap(rmapp);
755 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 760 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
@@ -810,26 +815,36 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
810 815
811int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) 816int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
812{ 817{
813 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 818 hva_handler_fn handler;
819
820 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
821 kvm_handle_hva(kvm, hva, handler);
814 return 0; 822 return 0;
815} 823}
816 824
817int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) 825int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
818{ 826{
819 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 827 hva_handler_fn handler;
828
829 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
830 kvm_handle_hva_range(kvm, start, end, handler);
820 return 0; 831 return 0;
821} 832}
822 833
823void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 834void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
824 struct kvm_memory_slot *memslot) 835 struct kvm_memory_slot *memslot)
825{ 836{
826 unsigned long *rmapp;
827 unsigned long gfn; 837 unsigned long gfn;
828 unsigned long n; 838 unsigned long n;
839 unsigned long *rmapp;
829 840
830 rmapp = memslot->arch.rmap;
831 gfn = memslot->base_gfn; 841 gfn = memslot->base_gfn;
832 for (n = memslot->npages; n; --n) { 842 rmapp = memslot->arch.rmap;
843 for (n = memslot->npages; n; --n, ++gfn) {
844 if (kvm_is_radix(kvm)) {
845 kvm_unmap_radix(kvm, memslot, gfn);
846 continue;
847 }
833 /* 848 /*
834 * Testing the present bit without locking is OK because 849 * Testing the present bit without locking is OK because
835 * the memslot has been marked invalid already, and hence 850 * the memslot has been marked invalid already, and hence
@@ -837,20 +852,21 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
837 * thus the present bit can't go from 0 to 1. 852 * thus the present bit can't go from 0 to 1.
838 */ 853 */
839 if (*rmapp & KVMPPC_RMAP_PRESENT) 854 if (*rmapp & KVMPPC_RMAP_PRESENT)
840 kvm_unmap_rmapp(kvm, rmapp, gfn); 855 kvm_unmap_rmapp(kvm, memslot, gfn);
841 ++rmapp; 856 ++rmapp;
842 ++gfn;
843 } 857 }
844} 858}
845 859
846static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 860static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
847 unsigned long gfn) 861 unsigned long gfn)
848{ 862{
849 struct revmap_entry *rev = kvm->arch.revmap; 863 struct revmap_entry *rev = kvm->arch.revmap;
850 unsigned long head, i, j; 864 unsigned long head, i, j;
851 __be64 *hptep; 865 __be64 *hptep;
852 int ret = 0; 866 int ret = 0;
867 unsigned long *rmapp;
853 868
869 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
854 retry: 870 retry:
855 lock_rmap(rmapp); 871 lock_rmap(rmapp);
856 if (*rmapp & KVMPPC_RMAP_REFERENCED) { 872 if (*rmapp & KVMPPC_RMAP_REFERENCED) {
@@ -898,17 +914,22 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
898 914
899int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) 915int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
900{ 916{
901 return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); 917 hva_handler_fn handler;
918
919 handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp;
920 return kvm_handle_hva_range(kvm, start, end, handler);
902} 921}
903 922
904static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 923static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
905 unsigned long gfn) 924 unsigned long gfn)
906{ 925{
907 struct revmap_entry *rev = kvm->arch.revmap; 926 struct revmap_entry *rev = kvm->arch.revmap;
908 unsigned long head, i, j; 927 unsigned long head, i, j;
909 unsigned long *hp; 928 unsigned long *hp;
910 int ret = 1; 929 int ret = 1;
930 unsigned long *rmapp;
911 931
932 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
912 if (*rmapp & KVMPPC_RMAP_REFERENCED) 933 if (*rmapp & KVMPPC_RMAP_REFERENCED)
913 return 1; 934 return 1;
914 935
@@ -934,12 +955,18 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
934 955
935int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) 956int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
936{ 957{
937 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 958 hva_handler_fn handler;
959
960 handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp;
961 return kvm_handle_hva(kvm, hva, handler);
938} 962}
939 963
940void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) 964void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
941{ 965{
942 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 966 hva_handler_fn handler;
967
968 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
969 kvm_handle_hva(kvm, hva, handler);
943} 970}
944 971
945static int vcpus_running(struct kvm *kvm) 972static int vcpus_running(struct kvm *kvm)
@@ -1040,7 +1067,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1040 return npages_dirty; 1067 return npages_dirty;
1041} 1068}
1042 1069
1043static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, 1070void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1044 struct kvm_memory_slot *memslot, 1071 struct kvm_memory_slot *memslot,
1045 unsigned long *map) 1072 unsigned long *map)
1046{ 1073{
@@ -1058,12 +1085,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1058 __set_bit_le(gfn - memslot->base_gfn, map); 1085 __set_bit_le(gfn - memslot->base_gfn, map);
1059} 1086}
1060 1087
1061long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1088long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
1062 unsigned long *map) 1089 struct kvm_memory_slot *memslot, unsigned long *map)
1063{ 1090{
1064 unsigned long i, j; 1091 unsigned long i, j;
1065 unsigned long *rmapp; 1092 unsigned long *rmapp;
1066 struct kvm_vcpu *vcpu;
1067 1093
1068 preempt_disable(); 1094 preempt_disable();
1069 rmapp = memslot->arch.rmap; 1095 rmapp = memslot->arch.rmap;
@@ -1079,15 +1105,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
1079 __set_bit_le(j, map); 1105 __set_bit_le(j, map);
1080 ++rmapp; 1106 ++rmapp;
1081 } 1107 }
1082
1083 /* Harvest dirty bits from VPA and DTL updates */
1084 /* Note: we never modify the SLB shadow buffer areas */
1085 kvm_for_each_vcpu(i, vcpu, kvm) {
1086 spin_lock(&vcpu->arch.vpa_update_lock);
1087 harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
1088 harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
1089 spin_unlock(&vcpu->arch.vpa_update_lock);
1090 }
1091 preempt_enable(); 1108 preempt_enable();
1092 return 0; 1109 return 0;
1093} 1110}
@@ -1142,10 +1159,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1142 srcu_idx = srcu_read_lock(&kvm->srcu); 1159 srcu_idx = srcu_read_lock(&kvm->srcu);
1143 memslot = gfn_to_memslot(kvm, gfn); 1160 memslot = gfn_to_memslot(kvm, gfn);
1144 if (memslot) { 1161 if (memslot) {
1145 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1162 if (!kvm_is_radix(kvm)) {
1146 lock_rmap(rmap); 1163 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
1147 *rmap |= KVMPPC_RMAP_CHANGED; 1164 lock_rmap(rmap);
1148 unlock_rmap(rmap); 1165 *rmap |= KVMPPC_RMAP_CHANGED;
1166 unlock_rmap(rmap);
1167 } else if (memslot->dirty_bitmap) {
1168 mark_page_dirty(kvm, gfn);
1169 }
1149 } 1170 }
1150 srcu_read_unlock(&kvm->srcu, srcu_idx); 1171 srcu_read_unlock(&kvm->srcu, srcu_idx);
1151} 1172}
@@ -1675,7 +1696,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
1675 1696
1676 vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ 1697 vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
1677 1698
1678 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1699 if (kvm_is_radix(vcpu->kvm))
1700 mmu->xlate = kvmppc_mmu_radix_xlate;
1701 else
1702 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
1679 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 1703 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
1680 1704
1681 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; 1705 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
new file mode 100644
index 000000000000..4344651f408c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -0,0 +1,716 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
7 */
8
9#include <linux/types.h>
10#include <linux/string.h>
11#include <linux/kvm.h>
12#include <linux/kvm_host.h>
13
14#include <asm/kvm_ppc.h>
15#include <asm/kvm_book3s.h>
16#include <asm/page.h>
17#include <asm/mmu.h>
18#include <asm/pgtable.h>
19#include <asm/pgalloc.h>
20
21/*
22 * Supported radix tree geometry.
23 * Like p9, we support either 5 or 9 bits at the first (lowest) level,
24 * for a page size of 64k or 4k.
25 */
26static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
27
28int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
29 struct kvmppc_pte *gpte, bool data, bool iswrite)
30{
31 struct kvm *kvm = vcpu->kvm;
32 u32 pid;
33 int ret, level, ps;
34 __be64 prte, rpte;
35 unsigned long root, pte, index;
36 unsigned long rts, bits, offset;
37 unsigned long gpa;
38 unsigned long proc_tbl_size;
39
40 /* Work out effective PID */
41 switch (eaddr >> 62) {
42 case 0:
43 pid = vcpu->arch.pid;
44 break;
45 case 3:
46 pid = 0;
47 break;
48 default:
49 return -EINVAL;
50 }
51 proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12);
52 if (pid * 16 >= proc_tbl_size)
53 return -EINVAL;
54
55 /* Read partition table to find root of tree for effective PID */
56 ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16,
57 &prte, sizeof(prte));
58 if (ret)
59 return ret;
60
61 root = be64_to_cpu(prte);
62 rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
63 ((root & RTS2_MASK) >> RTS2_SHIFT);
64 bits = root & RPDS_MASK;
65 root = root & RPDB_MASK;
66
67 /* P9 DD1 interprets RTS (radix tree size) differently */
68 offset = rts + 31;
69 if (cpu_has_feature(CPU_FTR_POWER9_DD1))
70 offset -= 3;
71
72 /* current implementations only support 52-bit space */
73 if (offset != 52)
74 return -EINVAL;
75
76 for (level = 3; level >= 0; --level) {
77 if (level && bits != p9_supported_radix_bits[level])
78 return -EINVAL;
79 if (level == 0 && !(bits == 5 || bits == 9))
80 return -EINVAL;
81 offset -= bits;
82 index = (eaddr >> offset) & ((1UL << bits) - 1);
83 /* check that low bits of page table base are zero */
84 if (root & ((1UL << (bits + 3)) - 1))
85 return -EINVAL;
86 ret = kvm_read_guest(kvm, root + index * 8,
87 &rpte, sizeof(rpte));
88 if (ret)
89 return ret;
90 pte = __be64_to_cpu(rpte);
91 if (!(pte & _PAGE_PRESENT))
92 return -ENOENT;
93 if (pte & _PAGE_PTE)
94 break;
95 bits = pte & 0x1f;
96 root = pte & 0x0fffffffffffff00ul;
97 }
98 /* need a leaf at lowest level; 512GB pages not supported */
99 if (level < 0 || level == 3)
100 return -EINVAL;
101
102 /* offset is now log base 2 of the page size */
103 gpa = pte & 0x01fffffffffff000ul;
104 if (gpa & ((1ul << offset) - 1))
105 return -EINVAL;
106 gpa += eaddr & ((1ul << offset) - 1);
107 for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
108 if (offset == mmu_psize_defs[ps].shift)
109 break;
110 gpte->page_size = ps;
111
112 gpte->eaddr = eaddr;
113 gpte->raddr = gpa;
114
115 /* Work out permissions */
116 gpte->may_read = !!(pte & _PAGE_READ);
117 gpte->may_write = !!(pte & _PAGE_WRITE);
118 gpte->may_execute = !!(pte & _PAGE_EXEC);
119 if (kvmppc_get_msr(vcpu) & MSR_PR) {
120 if (pte & _PAGE_PRIVILEGED) {
121 gpte->may_read = 0;
122 gpte->may_write = 0;
123 gpte->may_execute = 0;
124 }
125 } else {
126 if (!(pte & _PAGE_PRIVILEGED)) {
127 /* Check AMR/IAMR to see if strict mode is in force */
128 if (vcpu->arch.amr & (1ul << 62))
129 gpte->may_read = 0;
130 if (vcpu->arch.amr & (1ul << 63))
131 gpte->may_write = 0;
132 if (vcpu->arch.iamr & (1ul << 62))
133 gpte->may_execute = 0;
134 }
135 }
136
137 return 0;
138}
139
140#ifdef CONFIG_PPC_64K_PAGES
141#define MMU_BASE_PSIZE MMU_PAGE_64K
142#else
143#define MMU_BASE_PSIZE MMU_PAGE_4K
144#endif
145
146static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
147 unsigned int pshift)
148{
149 int psize = MMU_BASE_PSIZE;
150
151 if (pshift >= PMD_SHIFT)
152 psize = MMU_PAGE_2M;
153 addr &= ~0xfffUL;
154 addr |= mmu_psize_defs[psize].ap << 5;
155 asm volatile("ptesync": : :"memory");
156 asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
157 : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
158 asm volatile("ptesync": : :"memory");
159}
160
161unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
162 unsigned long clr, unsigned long set,
163 unsigned long addr, unsigned int shift)
164{
165 unsigned long old = 0;
166
167 if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) &&
168 pte_present(*ptep)) {
169 /* have to invalidate it first */
170 old = __radix_pte_update(ptep, _PAGE_PRESENT, 0);
171 kvmppc_radix_tlbie_page(kvm, addr, shift);
172 set |= _PAGE_PRESENT;
173 old &= _PAGE_PRESENT;
174 }
175 return __radix_pte_update(ptep, clr, set) | old;
176}
177
178void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
179 pte_t *ptep, pte_t pte)
180{
181 radix__set_pte_at(kvm->mm, addr, ptep, pte, 0);
182}
183
184static struct kmem_cache *kvm_pte_cache;
185
186static pte_t *kvmppc_pte_alloc(void)
187{
188 return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
189}
190
191static void kvmppc_pte_free(pte_t *ptep)
192{
193 kmem_cache_free(kvm_pte_cache, ptep);
194}
195
196static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
197 unsigned int level, unsigned long mmu_seq)
198{
199 pgd_t *pgd;
200 pud_t *pud, *new_pud = NULL;
201 pmd_t *pmd, *new_pmd = NULL;
202 pte_t *ptep, *new_ptep = NULL;
203 unsigned long old;
204 int ret;
205
206 /* Traverse the guest's 2nd-level tree, allocate new levels needed */
207 pgd = kvm->arch.pgtable + pgd_index(gpa);
208 pud = NULL;
209 if (pgd_present(*pgd))
210 pud = pud_offset(pgd, gpa);
211 else
212 new_pud = pud_alloc_one(kvm->mm, gpa);
213
214 pmd = NULL;
215 if (pud && pud_present(*pud))
216 pmd = pmd_offset(pud, gpa);
217 else
218 new_pmd = pmd_alloc_one(kvm->mm, gpa);
219
220 if (level == 0 && !(pmd && pmd_present(*pmd)))
221 new_ptep = kvmppc_pte_alloc();
222
223 /* Check if we might have been invalidated; let the guest retry if so */
224 spin_lock(&kvm->mmu_lock);
225 ret = -EAGAIN;
226 if (mmu_notifier_retry(kvm, mmu_seq))
227 goto out_unlock;
228
229 /* Now traverse again under the lock and change the tree */
230 ret = -ENOMEM;
231 if (pgd_none(*pgd)) {
232 if (!new_pud)
233 goto out_unlock;
234 pgd_populate(kvm->mm, pgd, new_pud);
235 new_pud = NULL;
236 }
237 pud = pud_offset(pgd, gpa);
238 if (pud_none(*pud)) {
239 if (!new_pmd)
240 goto out_unlock;
241 pud_populate(kvm->mm, pud, new_pmd);
242 new_pmd = NULL;
243 }
244 pmd = pmd_offset(pud, gpa);
245 if (pmd_large(*pmd)) {
246 /* Someone else has instantiated a large page here; retry */
247 ret = -EAGAIN;
248 goto out_unlock;
249 }
250 if (level == 1 && !pmd_none(*pmd)) {
251 /*
252 * There's a page table page here, but we wanted
253 * to install a large page. Tell the caller and let
254 * it try installing a normal page if it wants.
255 */
256 ret = -EBUSY;
257 goto out_unlock;
258 }
259 if (level == 0) {
260 if (pmd_none(*pmd)) {
261 if (!new_ptep)
262 goto out_unlock;
263 pmd_populate(kvm->mm, pmd, new_ptep);
264 new_ptep = NULL;
265 }
266 ptep = pte_offset_kernel(pmd, gpa);
267 if (pte_present(*ptep)) {
268 /* PTE was previously valid, so invalidate it */
269 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
270 0, gpa, 0);
271 kvmppc_radix_tlbie_page(kvm, gpa, 0);
272 if (old & _PAGE_DIRTY)
273 mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
274 }
275 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
276 } else {
277 kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
278 }
279 ret = 0;
280
281 out_unlock:
282 spin_unlock(&kvm->mmu_lock);
283 if (new_pud)
284 pud_free(kvm->mm, new_pud);
285 if (new_pmd)
286 pmd_free(kvm->mm, new_pmd);
287 if (new_ptep)
288 kvmppc_pte_free(new_ptep);
289 return ret;
290}
291
292int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
293 unsigned long ea, unsigned long dsisr)
294{
295 struct kvm *kvm = vcpu->kvm;
296 unsigned long mmu_seq, pte_size;
297 unsigned long gpa, gfn, hva, pfn;
298 struct kvm_memory_slot *memslot;
299 struct page *page = NULL, *pages[1];
300 long ret, npages, ok;
301 unsigned int writing;
302 struct vm_area_struct *vma;
303 unsigned long flags;
304 pte_t pte, *ptep;
305 unsigned long pgflags;
306 unsigned int shift, level;
307
308 /* Check for unusual errors */
309 if (dsisr & DSISR_UNSUPP_MMU) {
310 pr_err("KVM: Got unsupported MMU fault\n");
311 return -EFAULT;
312 }
313 if (dsisr & DSISR_BADACCESS) {
314 /* Reflect to the guest as DSI */
315 pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
316 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
317 return RESUME_GUEST;
318 }
319
320 /* Translate the logical address and get the page */
321 gpa = vcpu->arch.fault_gpa & ~0xfffUL;
322 gpa &= ~0xF000000000000000ul;
323 gfn = gpa >> PAGE_SHIFT;
324 if (!(dsisr & DSISR_PGDIRFAULT))
325 gpa |= ea & 0xfff;
326 memslot = gfn_to_memslot(kvm, gfn);
327
328 /* No memslot means it's an emulated MMIO region */
329 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
330 if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS |
331 DSISR_SET_RC)) {
332 /*
333 * Bad address in guest page table tree, or other
334 * unusual error - reflect it to the guest as DSI.
335 */
336 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
337 return RESUME_GUEST;
338 }
339 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
340 dsisr & DSISR_ISSTORE);
341 }
342
343 /* used to check for invalidations in progress */
344 mmu_seq = kvm->mmu_notifier_seq;
345 smp_rmb();
346
347 writing = (dsisr & DSISR_ISSTORE) != 0;
348 hva = gfn_to_hva_memslot(memslot, gfn);
349 if (dsisr & DSISR_SET_RC) {
350 /*
351 * Need to set an R or C bit in the 2nd-level tables;
352 * if the relevant bits aren't already set in the linux
353 * page tables, fall through to do the gup_fast to
354 * set them in the linux page tables too.
355 */
356 ok = 0;
357 pgflags = _PAGE_ACCESSED;
358 if (writing)
359 pgflags |= _PAGE_DIRTY;
360 local_irq_save(flags);
361 ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva,
362 NULL, NULL);
363 if (ptep) {
364 pte = READ_ONCE(*ptep);
365 if (pte_present(pte) &&
366 (pte_val(pte) & pgflags) == pgflags)
367 ok = 1;
368 }
369 local_irq_restore(flags);
370 if (ok) {
371 spin_lock(&kvm->mmu_lock);
372 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
373 spin_unlock(&kvm->mmu_lock);
374 return RESUME_GUEST;
375 }
376 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable,
377 gpa, NULL, &shift);
378 if (ptep && pte_present(*ptep)) {
379 kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
380 gpa, shift);
381 spin_unlock(&kvm->mmu_lock);
382 return RESUME_GUEST;
383 }
384 spin_unlock(&kvm->mmu_lock);
385 }
386 }
387
388 ret = -EFAULT;
389 pfn = 0;
390 pte_size = PAGE_SIZE;
391 pgflags = _PAGE_READ | _PAGE_EXEC;
392 level = 0;
393 npages = get_user_pages_fast(hva, 1, writing, pages);
394 if (npages < 1) {
395 /* Check if it's an I/O mapping */
396 down_read(&current->mm->mmap_sem);
397 vma = find_vma(current->mm, hva);
398 if (vma && vma->vm_start <= hva && hva < vma->vm_end &&
399 (vma->vm_flags & VM_PFNMAP)) {
400 pfn = vma->vm_pgoff +
401 ((hva - vma->vm_start) >> PAGE_SHIFT);
402 pgflags = pgprot_val(vma->vm_page_prot);
403 }
404 up_read(&current->mm->mmap_sem);
405 if (!pfn)
406 return -EFAULT;
407 } else {
408 page = pages[0];
409 pfn = page_to_pfn(page);
410 if (PageHuge(page)) {
411 page = compound_head(page);
412 pte_size <<= compound_order(page);
413 /* See if we can insert a 2MB large-page PTE here */
414 if (pte_size >= PMD_SIZE &&
415 (gpa & PMD_MASK & PAGE_MASK) ==
416 (hva & PMD_MASK & PAGE_MASK)) {
417 level = 1;
418 pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
419 }
420 }
421 /* See if we can provide write access */
422 if (writing) {
423 /*
424 * We assume gup_fast has set dirty on the host PTE.
425 */
426 pgflags |= _PAGE_WRITE;
427 } else {
428 local_irq_save(flags);
429 ptep = __find_linux_pte_or_hugepte(current->mm->pgd,
430 hva, NULL, NULL);
431 if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
432 pgflags |= _PAGE_WRITE;
433 local_irq_restore(flags);
434 }
435 }
436
437 /*
438 * Compute the PTE value that we need to insert.
439 */
440 pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED;
441 if (pgflags & _PAGE_WRITE)
442 pgflags |= _PAGE_DIRTY;
443 pte = pfn_pte(pfn, __pgprot(pgflags));
444
445 /* Allocate space in the tree and write the PTE */
446 ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
447 if (ret == -EBUSY) {
448 /*
449 * There's already a PMD where wanted to install a large page;
450 * for now, fall back to installing a small page.
451 */
452 level = 0;
453 pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1);
454 pte = pfn_pte(pfn, __pgprot(pgflags));
455 ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
456 }
457 if (ret == 0 || ret == -EAGAIN)
458 ret = RESUME_GUEST;
459
460 if (page) {
461 /*
462 * We drop pages[0] here, not page because page might
463 * have been set to the head page of a compound, but
464 * we have to drop the reference on the correct tail
465 * page to match the get inside gup()
466 */
467 put_page(pages[0]);
468 }
469 return ret;
470}
471
472static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
473 unsigned long gfn, unsigned int order)
474{
475 unsigned long i, limit;
476 unsigned long *dp;
477
478 if (!memslot->dirty_bitmap)
479 return;
480 limit = 1ul << order;
481 if (limit < BITS_PER_LONG) {
482 for (i = 0; i < limit; ++i)
483 mark_page_dirty(kvm, gfn + i);
484 return;
485 }
486 dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn);
487 limit /= BITS_PER_LONG;
488 for (i = 0; i < limit; ++i)
489 *dp++ = ~0ul;
490}
491
492/* Called with kvm->lock held */
493int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
494 unsigned long gfn)
495{
496 pte_t *ptep;
497 unsigned long gpa = gfn << PAGE_SHIFT;
498 unsigned int shift;
499 unsigned long old;
500
501 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
502 NULL, &shift);
503 if (ptep && pte_present(*ptep)) {
504 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
505 gpa, shift);
506 kvmppc_radix_tlbie_page(kvm, gpa, shift);
507 if (old & _PAGE_DIRTY) {
508 if (!shift)
509 mark_page_dirty(kvm, gfn);
510 else
511 mark_pages_dirty(kvm, memslot,
512 gfn, shift - PAGE_SHIFT);
513 }
514 }
515 return 0;
516}
517
518/* Called with kvm->lock held */
519int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
520 unsigned long gfn)
521{
522 pte_t *ptep;
523 unsigned long gpa = gfn << PAGE_SHIFT;
524 unsigned int shift;
525 int ref = 0;
526
527 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
528 NULL, &shift);
529 if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
530 kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
531 gpa, shift);
532 /* XXX need to flush tlb here? */
533 ref = 1;
534 }
535 return ref;
536}
537
538/* Called with kvm->lock held */
539int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
540 unsigned long gfn)
541{
542 pte_t *ptep;
543 unsigned long gpa = gfn << PAGE_SHIFT;
544 unsigned int shift;
545 int ref = 0;
546
547 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
548 NULL, &shift);
549 if (ptep && pte_present(*ptep) && pte_young(*ptep))
550 ref = 1;
551 return ref;
552}
553
554/* Returns the number of PAGE_SIZE pages that are dirty */
555static int kvm_radix_test_clear_dirty(struct kvm *kvm,
556 struct kvm_memory_slot *memslot, int pagenum)
557{
558 unsigned long gfn = memslot->base_gfn + pagenum;
559 unsigned long gpa = gfn << PAGE_SHIFT;
560 pte_t *ptep;
561 unsigned int shift;
562 int ret = 0;
563
564 ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
565 NULL, &shift);
566 if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
567 ret = 1;
568 if (shift)
569 ret = 1 << (shift - PAGE_SHIFT);
570 kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
571 gpa, shift);
572 kvmppc_radix_tlbie_page(kvm, gpa, shift);
573 }
574 return ret;
575}
576
577long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
578 struct kvm_memory_slot *memslot, unsigned long *map)
579{
580 unsigned long i, j;
581 unsigned long n, *p;
582 int npages;
583
584 /*
585 * Radix accumulates dirty bits in the first half of the
586 * memslot's dirty_bitmap area, for when pages are paged
587 * out or modified by the host directly. Pick up these
588 * bits and add them to the map.
589 */
590 n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long);
591 p = memslot->dirty_bitmap;
592 for (i = 0; i < n; ++i)
593 map[i] |= xchg(&p[i], 0);
594
595 for (i = 0; i < memslot->npages; i = j) {
596 npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
597
598 /*
599 * Note that if npages > 0 then i must be a multiple of npages,
600 * since huge pages are only used to back the guest at guest
601 * real addresses that are a multiple of their size.
602 * Since we have at most one PTE covering any given guest
603 * real address, if npages > 1 we can skip to i + npages.
604 */
605 j = i + 1;
606 if (npages)
607 for (j = i; npages; ++j, --npages)
608 __set_bit_le(j, map);
609 }
610 return 0;
611}
612
613static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
614 int psize, int *indexp)
615{
616 if (!mmu_psize_defs[psize].shift)
617 return;
618 info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift |
619 (mmu_psize_defs[psize].ap << 29);
620 ++(*indexp);
621}
622
623int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info)
624{
625 int i;
626
627 if (!radix_enabled())
628 return -EINVAL;
629 memset(info, 0, sizeof(*info));
630
631 /* 4k page size */
632 info->geometries[0].page_shift = 12;
633 info->geometries[0].level_bits[0] = 9;
634 for (i = 1; i < 4; ++i)
635 info->geometries[0].level_bits[i] = p9_supported_radix_bits[i];
636 /* 64k page size */
637 info->geometries[1].page_shift = 16;
638 for (i = 0; i < 4; ++i)
639 info->geometries[1].level_bits[i] = p9_supported_radix_bits[i];
640
641 i = 0;
642 add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i);
643 add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i);
644 add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i);
645 add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i);
646
647 return 0;
648}
649
650int kvmppc_init_vm_radix(struct kvm *kvm)
651{
652 kvm->arch.pgtable = pgd_alloc(kvm->mm);
653 if (!kvm->arch.pgtable)
654 return -ENOMEM;
655 return 0;
656}
657
658void kvmppc_free_radix(struct kvm *kvm)
659{
660 unsigned long ig, iu, im;
661 pte_t *pte;
662 pmd_t *pmd;
663 pud_t *pud;
664 pgd_t *pgd;
665
666 if (!kvm->arch.pgtable)
667 return;
668 pgd = kvm->arch.pgtable;
669 for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
670 if (!pgd_present(*pgd))
671 continue;
672 pud = pud_offset(pgd, 0);
673 for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
674 if (!pud_present(*pud))
675 continue;
676 pmd = pmd_offset(pud, 0);
677 for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
678 if (pmd_huge(*pmd)) {
679 pmd_clear(pmd);
680 continue;
681 }
682 if (!pmd_present(*pmd))
683 continue;
684 pte = pte_offset_map(pmd, 0);
685 memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
686 kvmppc_pte_free(pte);
687 pmd_clear(pmd);
688 }
689 pmd_free(kvm->mm, pmd_offset(pud, 0));
690 pud_clear(pud);
691 }
692 pud_free(kvm->mm, pud_offset(pgd, 0));
693 pgd_clear(pgd);
694 }
695 pgd_free(kvm->mm, kvm->arch.pgtable);
696}
697
698static void pte_ctor(void *addr)
699{
700 memset(addr, 0, PTE_TABLE_SIZE);
701}
702
703int kvmppc_radix_init(void)
704{
705 unsigned long size = sizeof(void *) << PTE_INDEX_SIZE;
706
707 kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
708 if (!kvm_pte_cache)
709 return -ENOMEM;
710 return 0;
711}
712
713void kvmppc_radix_exit(void)
714{
715 kmem_cache_destroy(kvm_pte_cache);
716}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ec34e39471a7..e4a79679342e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1135,7 +1135,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
1135 /* 1135 /*
1136 * Userspace can only modify DPFD (default prefetch depth), 1136 * Userspace can only modify DPFD (default prefetch depth),
1137 * ILE (interrupt little-endian) and TC (translation control). 1137 * ILE (interrupt little-endian) and TC (translation control).
1138 * On POWER8 userspace can also modify AIL (alt. interrupt loc.) 1138 * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
1139 */ 1139 */
1140 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; 1140 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
1141 if (cpu_has_feature(CPU_FTR_ARCH_207S)) 1141 if (cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -1821,6 +1821,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1821 vcpu->arch.vcore = vcore; 1821 vcpu->arch.vcore = vcore;
1822 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; 1822 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
1823 vcpu->arch.thread_cpu = -1; 1823 vcpu->arch.thread_cpu = -1;
1824 vcpu->arch.prev_cpu = -1;
1824 1825
1825 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1826 vcpu->arch.cpu_type = KVM_CPU_3S_64;
1826 kvmppc_sanity_check(vcpu); 1827 kvmppc_sanity_check(vcpu);
@@ -1950,11 +1951,33 @@ static void kvmppc_release_hwthread(int cpu)
1950 tpaca->kvm_hstate.kvm_split_mode = NULL; 1951 tpaca->kvm_hstate.kvm_split_mode = NULL;
1951} 1952}
1952 1953
1954static void do_nothing(void *x)
1955{
1956}
1957
1958static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
1959{
1960 int i;
1961
1962 cpu = cpu_first_thread_sibling(cpu);
1963 cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
1964 /*
1965 * Make sure setting of bit in need_tlb_flush precedes
1966 * testing of cpu_in_guest bits. The matching barrier on
1967 * the other side is the first smp_mb() in kvmppc_run_core().
1968 */
1969 smp_mb();
1970 for (i = 0; i < threads_per_core; ++i)
1971 if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest))
1972 smp_call_function_single(cpu + i, do_nothing, NULL, 1);
1973}
1974
1953static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) 1975static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
1954{ 1976{
1955 int cpu; 1977 int cpu;
1956 struct paca_struct *tpaca; 1978 struct paca_struct *tpaca;
1957 struct kvmppc_vcore *mvc = vc->master_vcore; 1979 struct kvmppc_vcore *mvc = vc->master_vcore;
1980 struct kvm *kvm = vc->kvm;
1958 1981
1959 cpu = vc->pcpu; 1982 cpu = vc->pcpu;
1960 if (vcpu) { 1983 if (vcpu) {
@@ -1965,6 +1988,27 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
1965 cpu += vcpu->arch.ptid; 1988 cpu += vcpu->arch.ptid;
1966 vcpu->cpu = mvc->pcpu; 1989 vcpu->cpu = mvc->pcpu;
1967 vcpu->arch.thread_cpu = cpu; 1990 vcpu->arch.thread_cpu = cpu;
1991
1992 /*
1993 * With radix, the guest can do TLB invalidations itself,
1994 * and it could choose to use the local form (tlbiel) if
1995 * it is invalidating a translation that has only ever been
1996 * used on one vcpu. However, that doesn't mean it has
1997 * only ever been used on one physical cpu, since vcpus
1998 * can move around between pcpus. To cope with this, when
1999 * a vcpu moves from one pcpu to another, we need to tell
2000 * any vcpus running on the same core as this vcpu previously
2001 * ran to flush the TLB. The TLB is shared between threads,
2002 * so we use a single bit in .need_tlb_flush for all 4 threads.
2003 */
2004 if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
2005 if (vcpu->arch.prev_cpu >= 0 &&
2006 cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
2007 cpu_first_thread_sibling(cpu))
2008 radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
2009 vcpu->arch.prev_cpu = cpu;
2010 }
2011 cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
1968 } 2012 }
1969 tpaca = &paca[cpu]; 2013 tpaca = &paca[cpu];
1970 tpaca->kvm_hstate.kvm_vcpu = vcpu; 2014 tpaca->kvm_hstate.kvm_vcpu = vcpu;
@@ -2552,6 +2596,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
2552 kvmppc_release_hwthread(pcpu + i); 2596 kvmppc_release_hwthread(pcpu + i);
2553 if (sip && sip->napped[i]) 2597 if (sip && sip->napped[i])
2554 kvmppc_ipi_thread(pcpu + i); 2598 kvmppc_ipi_thread(pcpu + i);
2599 cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
2555 } 2600 }
2556 2601
2557 kvmppc_set_host_core(pcpu); 2602 kvmppc_set_host_core(pcpu);
@@ -2877,7 +2922,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
2877 smp_mb(); 2922 smp_mb();
2878 2923
2879 /* On the first time here, set up HTAB and VRMA */ 2924 /* On the first time here, set up HTAB and VRMA */
2880 if (!vcpu->kvm->arch.hpte_setup_done) { 2925 if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) {
2881 r = kvmppc_hv_setup_htab_rma(vcpu); 2926 r = kvmppc_hv_setup_htab_rma(vcpu);
2882 if (r) 2927 if (r)
2883 goto out; 2928 goto out;
@@ -2939,6 +2984,13 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
2939{ 2984{
2940 struct kvm_ppc_one_seg_page_size *sps; 2985 struct kvm_ppc_one_seg_page_size *sps;
2941 2986
2987 /*
2988 * Since we don't yet support HPT guests on a radix host,
2989 * return an error if the host uses radix.
2990 */
2991 if (radix_enabled())
2992 return -EINVAL;
2993
2942 info->flags = KVM_PPC_PAGE_SIZES_REAL; 2994 info->flags = KVM_PPC_PAGE_SIZES_REAL;
2943 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 2995 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
2944 info->flags |= KVM_PPC_1T_SEGMENTS; 2996 info->flags |= KVM_PPC_1T_SEGMENTS;
@@ -2961,8 +3013,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
2961{ 3013{
2962 struct kvm_memslots *slots; 3014 struct kvm_memslots *slots;
2963 struct kvm_memory_slot *memslot; 3015 struct kvm_memory_slot *memslot;
2964 int r; 3016 int i, r;
2965 unsigned long n; 3017 unsigned long n;
3018 unsigned long *buf;
3019 struct kvm_vcpu *vcpu;
2966 3020
2967 mutex_lock(&kvm->slots_lock); 3021 mutex_lock(&kvm->slots_lock);
2968 3022
@@ -2976,15 +3030,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
2976 if (!memslot->dirty_bitmap) 3030 if (!memslot->dirty_bitmap)
2977 goto out; 3031 goto out;
2978 3032
3033 /*
3034 * Use second half of bitmap area because radix accumulates
3035 * bits in the first half.
3036 */
2979 n = kvm_dirty_bitmap_bytes(memslot); 3037 n = kvm_dirty_bitmap_bytes(memslot);
2980 memset(memslot->dirty_bitmap, 0, n); 3038 buf = memslot->dirty_bitmap + n / sizeof(long);
3039 memset(buf, 0, n);
2981 3040
2982 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); 3041 if (kvm_is_radix(kvm))
3042 r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
3043 else
3044 r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
2983 if (r) 3045 if (r)
2984 goto out; 3046 goto out;
2985 3047
3048 /* Harvest dirty bits from VPA and DTL updates */
3049 /* Note: we never modify the SLB shadow buffer areas */
3050 kvm_for_each_vcpu(i, vcpu, kvm) {
3051 spin_lock(&vcpu->arch.vpa_update_lock);
3052 kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
3053 kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
3054 spin_unlock(&vcpu->arch.vpa_update_lock);
3055 }
3056
2986 r = -EFAULT; 3057 r = -EFAULT;
2987 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 3058 if (copy_to_user(log->dirty_bitmap, buf, n))
2988 goto out; 3059 goto out;
2989 3060
2990 r = 0; 3061 r = 0;
@@ -3005,6 +3076,15 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
3005static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 3076static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
3006 unsigned long npages) 3077 unsigned long npages)
3007{ 3078{
3079 /*
3080 * For now, if radix_enabled() then we only support radix guests,
3081 * and in that case we don't need the rmap array.
3082 */
3083 if (radix_enabled()) {
3084 slot->arch.rmap = NULL;
3085 return 0;
3086 }
3087
3008 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 3088 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
3009 if (!slot->arch.rmap) 3089 if (!slot->arch.rmap)
3010 return -ENOMEM; 3090 return -ENOMEM;
@@ -3037,7 +3117,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3037 if (npages) 3117 if (npages)
3038 atomic64_inc(&kvm->arch.mmio_update); 3118 atomic64_inc(&kvm->arch.mmio_update);
3039 3119
3040 if (npages && old->npages) { 3120 if (npages && old->npages && !kvm_is_radix(kvm)) {
3041 /* 3121 /*
3042 * If modifying a memslot, reset all the rmap dirty bits. 3122 * If modifying a memslot, reset all the rmap dirty bits.
3043 * If this is a new memslot, we don't need to do anything 3123 * If this is a new memslot, we don't need to do anything
@@ -3046,7 +3126,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
3046 */ 3126 */
3047 slots = kvm_memslots(kvm); 3127 slots = kvm_memslots(kvm);
3048 memslot = id_to_memslot(slots, mem->slot); 3128 memslot = id_to_memslot(slots, mem->slot);
3049 kvmppc_hv_get_dirty_log(kvm, memslot, NULL); 3129 kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL);
3050 } 3130 }
3051} 3131}
3052 3132
@@ -3085,14 +3165,20 @@ static void kvmppc_setup_partition_table(struct kvm *kvm)
3085{ 3165{
3086 unsigned long dw0, dw1; 3166 unsigned long dw0, dw1;
3087 3167
3088 /* PS field - page size for VRMA */ 3168 if (!kvm_is_radix(kvm)) {
3089 dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | 3169 /* PS field - page size for VRMA */
3090 ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); 3170 dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
3091 /* HTABSIZE and HTABORG fields */ 3171 ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
3092 dw0 |= kvm->arch.sdr1; 3172 /* HTABSIZE and HTABORG fields */
3173 dw0 |= kvm->arch.sdr1;
3093 3174
3094 /* Second dword has GR=0; other fields are unused since UPRT=0 */ 3175 /* Second dword as set by userspace */
3095 dw1 = 0; 3176 dw1 = kvm->arch.process_table;
3177 } else {
3178 dw0 = PATB_HR | radix__get_tree_size() |
3179 __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
3180 dw1 = PATB_GR | kvm->arch.process_table;
3181 }
3096 3182
3097 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); 3183 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
3098} 3184}
@@ -3262,6 +3348,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3262{ 3348{
3263 unsigned long lpcr, lpid; 3349 unsigned long lpcr, lpid;
3264 char buf[32]; 3350 char buf[32];
3351 int ret;
3265 3352
3266 /* Allocate the guest's logical partition ID */ 3353 /* Allocate the guest's logical partition ID */
3267 3354
@@ -3309,13 +3396,30 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3309 lpcr |= LPCR_HVICE; 3396 lpcr |= LPCR_HVICE;
3310 } 3397 }
3311 3398
3399 /*
3400 * For now, if the host uses radix, the guest must be radix.
3401 */
3402 if (radix_enabled()) {
3403 kvm->arch.radix = 1;
3404 lpcr &= ~LPCR_VPM1;
3405 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
3406 ret = kvmppc_init_vm_radix(kvm);
3407 if (ret) {
3408 kvmppc_free_lpid(kvm->arch.lpid);
3409 return ret;
3410 }
3411 kvmppc_setup_partition_table(kvm);
3412 }
3413
3312 kvm->arch.lpcr = lpcr; 3414 kvm->arch.lpcr = lpcr;
3313 3415
3314 /* 3416 /*
3315 * Work out how many sets the TLB has, for the use of 3417 * Work out how many sets the TLB has, for the use of
3316 * the TLB invalidation loop in book3s_hv_rmhandlers.S. 3418 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
3317 */ 3419 */
3318 if (cpu_has_feature(CPU_FTR_ARCH_300)) 3420 if (kvm_is_radix(kvm))
3421 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */
3422 else if (cpu_has_feature(CPU_FTR_ARCH_300))
3319 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ 3423 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
3320 else if (cpu_has_feature(CPU_FTR_ARCH_207S)) 3424 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
3321 kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ 3425 kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */
@@ -3325,8 +3429,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3325 /* 3429 /*
3326 * Track that we now have a HV mode VM active. This blocks secondary 3430 * Track that we now have a HV mode VM active. This blocks secondary
3327 * CPU threads from coming online. 3431 * CPU threads from coming online.
3432 * On POWER9, we only need to do this for HPT guests on a radix
3433 * host, which is not yet supported.
3328 */ 3434 */
3329 kvm_hv_vm_activated(); 3435 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3436 kvm_hv_vm_activated();
3330 3437
3331 /* 3438 /*
3332 * Create a debugfs directory for the VM 3439 * Create a debugfs directory for the VM
@@ -3352,11 +3459,17 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
3352{ 3459{
3353 debugfs_remove_recursive(kvm->arch.debugfs_dir); 3460 debugfs_remove_recursive(kvm->arch.debugfs_dir);
3354 3461
3355 kvm_hv_vm_deactivated(); 3462 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3463 kvm_hv_vm_deactivated();
3356 3464
3357 kvmppc_free_vcores(kvm); 3465 kvmppc_free_vcores(kvm);
3358 3466
3359 kvmppc_free_hpt(kvm); 3467 kvmppc_free_lpid(kvm->arch.lpid);
3468
3469 if (kvm_is_radix(kvm))
3470 kvmppc_free_radix(kvm);
3471 else
3472 kvmppc_free_hpt(kvm);
3360 3473
3361 kvmppc_free_pimap(kvm); 3474 kvmppc_free_pimap(kvm);
3362} 3475}
@@ -3385,11 +3498,6 @@ static int kvmppc_core_check_processor_compat_hv(void)
3385 if (!cpu_has_feature(CPU_FTR_HVMODE) || 3498 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
3386 !cpu_has_feature(CPU_FTR_ARCH_206)) 3499 !cpu_has_feature(CPU_FTR_ARCH_206))
3387 return -EIO; 3500 return -EIO;
3388 /*
3389 * Disable KVM for Power9 in radix mode.
3390 */
3391 if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
3392 return -EIO;
3393 3501
3394 return 0; 3502 return 0;
3395} 3503}
@@ -3657,6 +3765,41 @@ static void init_default_hcalls(void)
3657 } 3765 }
3658} 3766}
3659 3767
3768static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
3769{
3770 unsigned long lpcr;
3771 int radix;
3772
3773 /* If not on a POWER9, reject it */
3774 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3775 return -ENODEV;
3776
3777 /* If any unknown flags set, reject it */
3778 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
3779 return -EINVAL;
3780
3781 /* We can't change a guest to/from radix yet */
3782 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
3783 if (radix != kvm_is_radix(kvm))
3784 return -EINVAL;
3785
3786 /* GR (guest radix) bit in process_table field must match */
3787 if (!!(cfg->process_table & PATB_GR) != radix)
3788 return -EINVAL;
3789
3790 /* Process table size field must be reasonable, i.e. <= 24 */
3791 if ((cfg->process_table & PRTS_MASK) > 24)
3792 return -EINVAL;
3793
3794 kvm->arch.process_table = cfg->process_table;
3795 kvmppc_setup_partition_table(kvm);
3796
3797 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
3798 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
3799
3800 return 0;
3801}
3802
3660static struct kvmppc_ops kvm_ops_hv = { 3803static struct kvmppc_ops kvm_ops_hv = {
3661 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, 3804 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
3662 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, 3805 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -3694,6 +3837,8 @@ static struct kvmppc_ops kvm_ops_hv = {
3694 .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, 3837 .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
3695 .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, 3838 .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
3696#endif 3839#endif
3840 .configure_mmu = kvmhv_configure_mmu,
3841 .get_rmmu_info = kvmhv_get_rmmu_info,
3697}; 3842};
3698 3843
3699static int kvm_init_subcore_bitmap(void) 3844static int kvm_init_subcore_bitmap(void)
@@ -3728,6 +3873,11 @@ static int kvm_init_subcore_bitmap(void)
3728 return 0; 3873 return 0;
3729} 3874}
3730 3875
3876static int kvmppc_radix_possible(void)
3877{
3878 return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
3879}
3880
3731static int kvmppc_book3s_init_hv(void) 3881static int kvmppc_book3s_init_hv(void)
3732{ 3882{
3733 int r; 3883 int r;
@@ -3767,12 +3917,19 @@ static int kvmppc_book3s_init_hv(void)
3767 init_vcore_lists(); 3917 init_vcore_lists();
3768 3918
3769 r = kvmppc_mmu_hv_init(); 3919 r = kvmppc_mmu_hv_init();
3920 if (r)
3921 return r;
3922
3923 if (kvmppc_radix_possible())
3924 r = kvmppc_radix_init();
3770 return r; 3925 return r;
3771} 3926}
3772 3927
3773static void kvmppc_book3s_exit_hv(void) 3928static void kvmppc_book3s_exit_hv(void)
3774{ 3929{
3775 kvmppc_free_host_rm_ops(); 3930 kvmppc_free_host_rm_ops();
3931 if (kvmppc_radix_possible())
3932 kvmppc_radix_exit();
3776 kvmppc_hv_ops = NULL; 3933 kvmppc_hv_ops = NULL;
3777} 3934}
3778 3935
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 5bb24be0b346..2f69fbc19bb0 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -200,7 +200,6 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
200 200
201/* 201/*
202 * Send an interrupt or message to another CPU. 202 * Send an interrupt or message to another CPU.
203 * This can only be called in real mode.
204 * The caller needs to include any barrier needed to order writes 203 * The caller needs to include any barrier needed to order writes
205 * to memory vs. the IPI/message. 204 * to memory vs. the IPI/message.
206 */ 205 */
@@ -229,8 +228,7 @@ void kvmhv_rm_send_ipi(int cpu)
229 if (xics_phys) 228 if (xics_phys)
230 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); 229 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
231 else 230 else
232 opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), 231 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
233 IPI_PRIORITY);
234} 232}
235 233
236/* 234/*
@@ -412,14 +410,13 @@ static long kvmppc_read_one_intr(bool *again)
412 410
413 /* Now read the interrupt from the ICP */ 411 /* Now read the interrupt from the ICP */
414 xics_phys = local_paca->kvm_hstate.xics_phys; 412 xics_phys = local_paca->kvm_hstate.xics_phys;
415 if (!xics_phys) { 413 rc = 0;
416 /* Use OPAL to read the XIRR */ 414 if (!xics_phys)
417 rc = opal_rm_int_get_xirr(&xirr, false); 415 rc = opal_int_get_xirr(&xirr, false);
418 if (rc < 0) 416 else
419 return 1;
420 } else {
421 xirr = _lwzcix(xics_phys + XICS_XIRR); 417 xirr = _lwzcix(xics_phys + XICS_XIRR);
422 } 418 if (rc < 0)
419 return 1;
423 420
424 /* 421 /*
425 * Save XIRR for later. Since we get control in reverse endian 422 * Save XIRR for later. Since we get control in reverse endian
@@ -445,15 +442,16 @@ static long kvmppc_read_one_intr(bool *again)
445 * If it is an IPI, clear the MFRR and EOI it. 442 * If it is an IPI, clear the MFRR and EOI it.
446 */ 443 */
447 if (xisr == XICS_IPI) { 444 if (xisr == XICS_IPI) {
445 rc = 0;
448 if (xics_phys) { 446 if (xics_phys) {
449 _stbcix(xics_phys + XICS_MFRR, 0xff); 447 _stbcix(xics_phys + XICS_MFRR, 0xff);
450 _stwcix(xics_phys + XICS_XIRR, xirr); 448 _stwcix(xics_phys + XICS_XIRR, xirr);
451 } else { 449 } else {
452 opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); 450 opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
453 rc = opal_rm_int_eoi(h_xirr); 451 rc = opal_int_eoi(h_xirr);
454 /* If rc > 0, there is another interrupt pending */
455 *again = rc > 0;
456 } 452 }
453 /* If rc > 0, there is another interrupt pending */
454 *again = rc > 0;
457 455
458 /* 456 /*
459 * Need to ensure side effects of above stores 457 * Need to ensure side effects of above stores
@@ -474,8 +472,8 @@ static long kvmppc_read_one_intr(bool *again)
474 if (xics_phys) 472 if (xics_phys)
475 _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); 473 _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
476 else 474 else
477 opal_rm_int_set_mfrr(hard_smp_processor_id(), 475 opal_int_set_mfrr(hard_smp_processor_id(),
478 IPI_PRIORITY); 476 IPI_PRIORITY);
479 /* Let side effects complete */ 477 /* Let side effects complete */
480 smp_mb(); 478 smp_mb();
481 return 1; 479 return 1;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 9ef3c4be952f..b095afcd4309 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -43,6 +43,7 @@ static void *real_vmalloc_addr(void *x)
43static int global_invalidates(struct kvm *kvm, unsigned long flags) 43static int global_invalidates(struct kvm *kvm, unsigned long flags)
44{ 44{
45 int global; 45 int global;
46 int cpu;
46 47
47 /* 48 /*
48 * If there is only one vcore, and it's currently running, 49 * If there is only one vcore, and it's currently running,
@@ -60,8 +61,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
60 /* any other core might now have stale TLB entries... */ 61 /* any other core might now have stale TLB entries... */
61 smp_wmb(); 62 smp_wmb();
62 cpumask_setall(&kvm->arch.need_tlb_flush); 63 cpumask_setall(&kvm->arch.need_tlb_flush);
63 cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu, 64 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
64 &kvm->arch.need_tlb_flush); 65 /*
66 * On POWER9, threads are independent but the TLB is shared,
67 * so use the bit for the first thread to represent the core.
68 */
69 if (cpu_has_feature(CPU_FTR_ARCH_300))
70 cpu = cpu_first_thread_sibling(cpu);
71 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
65 } 72 }
66 73
67 return global; 74 return global;
@@ -182,6 +189,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
182 unsigned long mmu_seq; 189 unsigned long mmu_seq;
183 unsigned long rcbits, irq_flags = 0; 190 unsigned long rcbits, irq_flags = 0;
184 191
192 if (kvm_is_radix(kvm))
193 return H_FUNCTION;
185 psize = hpte_page_size(pteh, ptel); 194 psize = hpte_page_size(pteh, ptel);
186 if (!psize) 195 if (!psize)
187 return H_PARAMETER; 196 return H_PARAMETER;
@@ -458,6 +467,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
458 struct revmap_entry *rev; 467 struct revmap_entry *rev;
459 u64 pte, orig_pte, pte_r; 468 u64 pte, orig_pte, pte_r;
460 469
470 if (kvm_is_radix(kvm))
471 return H_FUNCTION;
461 if (pte_index >= kvm->arch.hpt_npte) 472 if (pte_index >= kvm->arch.hpt_npte)
462 return H_PARAMETER; 473 return H_PARAMETER;
463 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); 474 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
@@ -529,6 +540,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
529 struct revmap_entry *rev, *revs[4]; 540 struct revmap_entry *rev, *revs[4];
530 u64 hp0, hp1; 541 u64 hp0, hp1;
531 542
543 if (kvm_is_radix(kvm))
544 return H_FUNCTION;
532 global = global_invalidates(kvm, 0); 545 global = global_invalidates(kvm, 0);
533 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 546 for (i = 0; i < 4 && ret == H_SUCCESS; ) {
534 n = 0; 547 n = 0;
@@ -642,6 +655,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
642 unsigned long v, r, rb, mask, bits; 655 unsigned long v, r, rb, mask, bits;
643 u64 pte_v, pte_r; 656 u64 pte_v, pte_r;
644 657
658 if (kvm_is_radix(kvm))
659 return H_FUNCTION;
645 if (pte_index >= kvm->arch.hpt_npte) 660 if (pte_index >= kvm->arch.hpt_npte)
646 return H_PARAMETER; 661 return H_PARAMETER;
647 662
@@ -711,6 +726,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
711 int i, n = 1; 726 int i, n = 1;
712 struct revmap_entry *rev = NULL; 727 struct revmap_entry *rev = NULL;
713 728
729 if (kvm_is_radix(kvm))
730 return H_FUNCTION;
714 if (pte_index >= kvm->arch.hpt_npte) 731 if (pte_index >= kvm->arch.hpt_npte)
715 return H_PARAMETER; 732 return H_PARAMETER;
716 if (flags & H_READ_4) { 733 if (flags & H_READ_4) {
@@ -750,6 +767,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
750 unsigned long *rmap; 767 unsigned long *rmap;
751 long ret = H_NOT_FOUND; 768 long ret = H_NOT_FOUND;
752 769
770 if (kvm_is_radix(kvm))
771 return H_FUNCTION;
753 if (pte_index >= kvm->arch.hpt_npte) 772 if (pte_index >= kvm->arch.hpt_npte)
754 return H_PARAMETER; 773 return H_PARAMETER;
755 774
@@ -796,6 +815,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
796 unsigned long *rmap; 815 unsigned long *rmap;
797 long ret = H_NOT_FOUND; 816 long ret = H_NOT_FOUND;
798 817
818 if (kvm_is_radix(kvm))
819 return H_FUNCTION;
799 if (pte_index >= kvm->arch.hpt_npte) 820 if (pte_index >= kvm->arch.hpt_npte)
800 return H_PARAMETER; 821 return H_PARAMETER;
801 822
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 06edc4366639..29f43ed6d5eb 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(kvm_irq_bypass);
36 36
37static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 37static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
38 u32 new_irq); 38 u32 new_irq);
39static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu); 39static int xics_opal_set_server(unsigned int hw_irq, int server_cpu);
40 40
41/* -- ICS routines -- */ 41/* -- ICS routines -- */
42static void ics_rm_check_resend(struct kvmppc_xics *xics, 42static void ics_rm_check_resend(struct kvmppc_xics *xics,
@@ -70,11 +70,9 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
70 hcpu = hcore << threads_shift; 70 hcpu = hcore << threads_shift;
71 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; 71 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
72 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); 72 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
73 if (paca[hcpu].kvm_hstate.xics_phys) 73 kvmppc_set_host_ipi(hcpu, 1);
74 icp_native_cause_ipi_rm(hcpu); 74 smp_mb();
75 else 75 kvmhv_rm_send_ipi(hcpu);
76 opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu),
77 IPI_PRIORITY);
78} 76}
79#else 77#else
80static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } 78static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
@@ -730,7 +728,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
730 ++vcpu->stat.pthru_host; 728 ++vcpu->stat.pthru_host;
731 if (state->intr_cpu != pcpu) { 729 if (state->intr_cpu != pcpu) {
732 ++vcpu->stat.pthru_bad_aff; 730 ++vcpu->stat.pthru_bad_aff;
733 xics_opal_rm_set_server(state->host_irq, pcpu); 731 xics_opal_set_server(state->host_irq, pcpu);
734 } 732 }
735 state->intr_cpu = -1; 733 state->intr_cpu = -1;
736 } 734 }
@@ -758,16 +756,16 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
758 if (xics_phys) { 756 if (xics_phys) {
759 _stwcix(xics_phys + XICS_XIRR, xirr); 757 _stwcix(xics_phys + XICS_XIRR, xirr);
760 } else { 758 } else {
761 rc = opal_rm_int_eoi(be32_to_cpu(xirr)); 759 rc = opal_int_eoi(be32_to_cpu(xirr));
762 *again = rc > 0; 760 *again = rc > 0;
763 } 761 }
764} 762}
765 763
766static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) 764static int xics_opal_set_server(unsigned int hw_irq, int server_cpu)
767{ 765{
768 unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2; 766 unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
769 767
770 return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); 768 return opal_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
771} 769}
772 770
773/* 771/*
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9338a818e05c..47414a6fe2dd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -148,6 +148,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
148 addi r1, r1, 112 148 addi r1, r1, 112
149 ld r7, HSTATE_HOST_MSR(r13) 149 ld r7, HSTATE_HOST_MSR(r13)
150 150
151 /*
152 * If we came back from the guest via a relocation-on interrupt,
153 * we will be in virtual mode at this point, which makes it a
154 * little easier to get back to the caller.
155 */
156 mfmsr r0
157 andi. r0, r0, MSR_IR /* in real mode? */
158 bne .Lvirt_return
159
151 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK 160 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
152 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 161 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
153 beq 11f 162 beq 11f
@@ -181,6 +190,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
181 mtspr SPRN_HSRR1, r7 190 mtspr SPRN_HSRR1, r7
182 ba 0xe80 191 ba 0xe80
183 192
193 /* Virtual-mode return - can't get here for HMI or machine check */
194.Lvirt_return:
195 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
196 beq 16f
197 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
198 beq 17f
199 andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
200 beq 18f
201 mtmsrd r7, 1 /* if so then re-enable them */
20218: mtlr r8
203 blr
204
20516: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
206 mtspr SPRN_HSRR1, r7
207 b exc_virt_0x4500_hardware_interrupt
208
20917: mtspr SPRN_HSRR0, r8
210 mtspr SPRN_HSRR1, r7
211 b exc_virt_0x4e80_h_doorbell
212
184kvmppc_primary_no_guest: 213kvmppc_primary_no_guest:
185 /* We handle this much like a ceded vcpu */ 214 /* We handle this much like a ceded vcpu */
186 /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ 215 /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -518,6 +547,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
518/* Stack frame offsets */ 547/* Stack frame offsets */
519#define STACK_SLOT_TID (112-16) 548#define STACK_SLOT_TID (112-16)
520#define STACK_SLOT_PSSCR (112-24) 549#define STACK_SLOT_PSSCR (112-24)
550#define STACK_SLOT_PID (112-32)
521 551
522.global kvmppc_hv_entry 552.global kvmppc_hv_entry
523kvmppc_hv_entry: 553kvmppc_hv_entry:
@@ -530,6 +560,7 @@ kvmppc_hv_entry:
530 * R1 = host R1 560 * R1 = host R1
531 * R2 = TOC 561 * R2 = TOC
532 * all other volatile GPRS = free 562 * all other volatile GPRS = free
563 * Does not preserve non-volatile GPRs or CR fields
533 */ 564 */
534 mflr r0 565 mflr r0
535 std r0, PPC_LR_STKOFF(r1) 566 std r0, PPC_LR_STKOFF(r1)
@@ -549,32 +580,38 @@ kvmppc_hv_entry:
549 bl kvmhv_start_timing 580 bl kvmhv_start_timing
5501: 5811:
551#endif 582#endif
552 /* Clear out SLB */ 583
584 /* Use cr7 as an indication of radix mode */
585 ld r5, HSTATE_KVM_VCORE(r13)
586 ld r9, VCORE_KVM(r5) /* pointer to struct kvm */
587 lbz r0, KVM_RADIX(r9)
588 cmpwi cr7, r0, 0
589
590 /* Clear out SLB if hash */
591 bne cr7, 2f
553 li r6,0 592 li r6,0
554 slbmte r6,r6 593 slbmte r6,r6
555 slbia 594 slbia
556 ptesync 595 ptesync
557 5962:
558 /* 597 /*
559 * POWER7/POWER8 host -> guest partition switch code. 598 * POWER7/POWER8 host -> guest partition switch code.
560 * We don't have to lock against concurrent tlbies, 599 * We don't have to lock against concurrent tlbies,
561 * but we do have to coordinate across hardware threads. 600 * but we do have to coordinate across hardware threads.
562 */ 601 */
563 /* Set bit in entry map iff exit map is zero. */ 602 /* Set bit in entry map iff exit map is zero. */
564 ld r5, HSTATE_KVM_VCORE(r13)
565 li r7, 1 603 li r7, 1
566 lbz r6, HSTATE_PTID(r13) 604 lbz r6, HSTATE_PTID(r13)
567 sld r7, r7, r6 605 sld r7, r7, r6
568 addi r9, r5, VCORE_ENTRY_EXIT 606 addi r8, r5, VCORE_ENTRY_EXIT
56921: lwarx r3, 0, r9 60721: lwarx r3, 0, r8
570 cmpwi r3, 0x100 /* any threads starting to exit? */ 608 cmpwi r3, 0x100 /* any threads starting to exit? */
571 bge secondary_too_late /* if so we're too late to the party */ 609 bge secondary_too_late /* if so we're too late to the party */
572 or r3, r3, r7 610 or r3, r3, r7
573 stwcx. r3, 0, r9 611 stwcx. r3, 0, r8
574 bne 21b 612 bne 21b
575 613
576 /* Primary thread switches to guest partition. */ 614 /* Primary thread switches to guest partition. */
577 ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
578 cmpwi r6,0 615 cmpwi r6,0
579 bne 10f 616 bne 10f
580 lwz r7,KVM_LPID(r9) 617 lwz r7,KVM_LPID(r9)
@@ -590,30 +627,44 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
590 627
591 /* See if we need to flush the TLB */ 628 /* See if we need to flush the TLB */
592 lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ 629 lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
630BEGIN_FTR_SECTION
631 /*
632 * On POWER9, individual threads can come in here, but the
633 * TLB is shared between the 4 threads in a core, hence
634 * invalidating on one thread invalidates for all.
635 * Thus we make all 4 threads use the same bit here.
636 */
637 clrrdi r6,r6,2
638END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
593 clrldi r7,r6,64-6 /* extract bit number (6 bits) */ 639 clrldi r7,r6,64-6 /* extract bit number (6 bits) */
594 srdi r6,r6,6 /* doubleword number */ 640 srdi r6,r6,6 /* doubleword number */
595 sldi r6,r6,3 /* address offset */ 641 sldi r6,r6,3 /* address offset */
596 add r6,r6,r9 642 add r6,r6,r9
597 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ 643 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
598 li r0,1 644 li r8,1
599 sld r0,r0,r7 645 sld r8,r8,r7
600 ld r7,0(r6) 646 ld r7,0(r6)
601 and. r7,r7,r0 647 and. r7,r7,r8
602 beq 22f 648 beq 22f
60323: ldarx r7,0,r6 /* if set, clear the bit */
604 andc r7,r7,r0
605 stdcx. r7,0,r6
606 bne 23b
607 /* Flush the TLB of any entries for this LPID */ 649 /* Flush the TLB of any entries for this LPID */
608 lwz r6,KVM_TLB_SETS(r9) 650 lwz r0,KVM_TLB_SETS(r9)
609 li r0,0 /* RS for P9 version of tlbiel */ 651 mtctr r0
610 mtctr r6
611 li r7,0x800 /* IS field = 0b10 */ 652 li r7,0x800 /* IS field = 0b10 */
612 ptesync 653 ptesync
61328: tlbiel r7 654 li r0,0 /* RS for P9 version of tlbiel */
655 bne cr7, 29f
65628: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */
614 addi r7,r7,0x1000 657 addi r7,r7,0x1000
615 bdnz 28b 658 bdnz 28b
616 ptesync 659 b 30f
66029: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */
661 addi r7,r7,0x1000
662 bdnz 29b
66330: ptesync
66423: ldarx r7,0,r6 /* clear the bit after TLB flushed */
665 andc r7,r7,r8
666 stdcx. r7,0,r6
667 bne 23b
617 668
618 /* Add timebase offset onto timebase */ 669 /* Add timebase offset onto timebase */
61922: ld r8,VCORE_TB_OFFSET(r5) 67022: ld r8,VCORE_TB_OFFSET(r5)
@@ -658,7 +709,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
658 beq kvmppc_primary_no_guest 709 beq kvmppc_primary_no_guest
659kvmppc_got_guest: 710kvmppc_got_guest:
660 711
661 /* Load up guest SLB entries */ 712 /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
662 lwz r5,VCPU_SLB_MAX(r4) 713 lwz r5,VCPU_SLB_MAX(r4)
663 cmpwi r5,0 714 cmpwi r5,0
664 beq 9f 715 beq 9f
@@ -696,8 +747,10 @@ kvmppc_got_guest:
696BEGIN_FTR_SECTION 747BEGIN_FTR_SECTION
697 mfspr r5, SPRN_TIDR 748 mfspr r5, SPRN_TIDR
698 mfspr r6, SPRN_PSSCR 749 mfspr r6, SPRN_PSSCR
750 mfspr r7, SPRN_PID
699 std r5, STACK_SLOT_TID(r1) 751 std r5, STACK_SLOT_TID(r1)
700 std r6, STACK_SLOT_PSSCR(r1) 752 std r6, STACK_SLOT_PSSCR(r1)
753 std r7, STACK_SLOT_PID(r1)
701END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 754END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
702 755
703BEGIN_FTR_SECTION 756BEGIN_FTR_SECTION
@@ -824,6 +877,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
824 mtspr SPRN_PID, r7 877 mtspr SPRN_PID, r7
825 mtspr SPRN_WORT, r8 878 mtspr SPRN_WORT, r8
826BEGIN_FTR_SECTION 879BEGIN_FTR_SECTION
880 PPC_INVALIDATE_ERAT
881END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
882BEGIN_FTR_SECTION
827 /* POWER8-only registers */ 883 /* POWER8-only registers */
828 ld r5, VCPU_TCSCR(r4) 884 ld r5, VCPU_TCSCR(r4)
829 ld r6, VCPU_ACOP(r4) 885 ld r6, VCPU_ACOP(r4)
@@ -1057,13 +1113,13 @@ hdec_soon:
1057kvmppc_interrupt_hv: 1113kvmppc_interrupt_hv:
1058 /* 1114 /*
1059 * Register contents: 1115 * Register contents:
1060 * R12 = interrupt vector 1116 * R12 = (guest CR << 32) | interrupt vector
1061 * R13 = PACA 1117 * R13 = PACA
1062 * guest CR, R12 saved in shadow VCPU SCRATCH1/0 1118 * guest R12 saved in shadow VCPU SCRATCH0
1119 * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE
1063 * guest R13 saved in SPRN_SCRATCH0 1120 * guest R13 saved in SPRN_SCRATCH0
1064 */ 1121 */
1065 std r9, HSTATE_SCRATCH2(r13) 1122 std r9, HSTATE_SCRATCH2(r13)
1066
1067 lbz r9, HSTATE_IN_GUEST(r13) 1123 lbz r9, HSTATE_IN_GUEST(r13)
1068 cmpwi r9, KVM_GUEST_MODE_HOST_HV 1124 cmpwi r9, KVM_GUEST_MODE_HOST_HV
1069 beq kvmppc_bad_host_intr 1125 beq kvmppc_bad_host_intr
@@ -1094,8 +1150,9 @@ kvmppc_interrupt_hv:
1094 std r10, VCPU_GPR(R10)(r9) 1150 std r10, VCPU_GPR(R10)(r9)
1095 std r11, VCPU_GPR(R11)(r9) 1151 std r11, VCPU_GPR(R11)(r9)
1096 ld r3, HSTATE_SCRATCH0(r13) 1152 ld r3, HSTATE_SCRATCH0(r13)
1097 lwz r4, HSTATE_SCRATCH1(r13)
1098 std r3, VCPU_GPR(R12)(r9) 1153 std r3, VCPU_GPR(R12)(r9)
1154 /* CR is in the high half of r12 */
1155 srdi r4, r12, 32
1099 stw r4, VCPU_CR(r9) 1156 stw r4, VCPU_CR(r9)
1100BEGIN_FTR_SECTION 1157BEGIN_FTR_SECTION
1101 ld r3, HSTATE_CFAR(r13) 1158 ld r3, HSTATE_CFAR(r13)
@@ -1114,6 +1171,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1114 mfspr r11, SPRN_SRR1 1171 mfspr r11, SPRN_SRR1
1115 std r10, VCPU_SRR0(r9) 1172 std r10, VCPU_SRR0(r9)
1116 std r11, VCPU_SRR1(r9) 1173 std r11, VCPU_SRR1(r9)
1174 /* trap is in the low half of r12, clear CR from the high half */
1175 clrldi r12, r12, 32
1117 andi. r0, r12, 2 /* need to read HSRR0/1? */ 1176 andi. r0, r12, 2 /* need to read HSRR0/1? */
1118 beq 1f 1177 beq 1f
1119 mfspr r10, SPRN_HSRR0 1178 mfspr r10, SPRN_HSRR0
@@ -1149,7 +1208,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
114911: stw r3,VCPU_HEIR(r9) 120811: stw r3,VCPU_HEIR(r9)
1150 1209
1151 /* these are volatile across C function calls */ 1210 /* these are volatile across C function calls */
1211#ifdef CONFIG_RELOCATABLE
1212 ld r3, HSTATE_SCRATCH1(r13)
1213 mtctr r3
1214#else
1152 mfctr r3 1215 mfctr r3
1216#endif
1153 mfxer r4 1217 mfxer r4
1154 std r3, VCPU_CTR(r9) 1218 std r3, VCPU_CTR(r9)
1155 std r4, VCPU_XER(r9) 1219 std r4, VCPU_XER(r9)
@@ -1285,11 +1349,15 @@ mc_cont:
1285 mtspr SPRN_CTRLT,r6 1349 mtspr SPRN_CTRLT,r6
12864: 13504:
1287 /* Read the guest SLB and save it away */ 1351 /* Read the guest SLB and save it away */
1352 ld r5, VCPU_KVM(r9)
1353 lbz r0, KVM_RADIX(r5)
1354 cmpwi r0, 0
1355 li r5, 0
1356 bne 3f /* for radix, save 0 entries */
1288 lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ 1357 lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
1289 mtctr r0 1358 mtctr r0
1290 li r6,0 1359 li r6,0
1291 addi r7,r9,VCPU_SLB 1360 addi r7,r9,VCPU_SLB
1292 li r5,0
12931: slbmfee r8,r6 13611: slbmfee r8,r6
1294 andis. r0,r8,SLB_ESID_V@h 1362 andis. r0,r8,SLB_ESID_V@h
1295 beq 2f 1363 beq 2f
@@ -1301,7 +1369,7 @@ mc_cont:
1301 addi r5,r5,1 1369 addi r5,r5,1
13022: addi r6,r6,1 13702: addi r6,r6,1
1303 bdnz 1b 1371 bdnz 1b
1304 stw r5,VCPU_SLB_MAX(r9) 13723: stw r5,VCPU_SLB_MAX(r9)
1305 1373
1306 /* 1374 /*
1307 * Save the guest PURR/SPURR 1375 * Save the guest PURR/SPURR
@@ -1550,9 +1618,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1550BEGIN_FTR_SECTION 1618BEGIN_FTR_SECTION
1551 ld r5, STACK_SLOT_TID(r1) 1619 ld r5, STACK_SLOT_TID(r1)
1552 ld r6, STACK_SLOT_PSSCR(r1) 1620 ld r6, STACK_SLOT_PSSCR(r1)
1621 ld r7, STACK_SLOT_PID(r1)
1553 mtspr SPRN_TIDR, r5 1622 mtspr SPRN_TIDR, r5
1554 mtspr SPRN_PSSCR, r6 1623 mtspr SPRN_PSSCR, r6
1624 mtspr SPRN_PID, r7
1555END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1625END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1626BEGIN_FTR_SECTION
1627 PPC_INVALIDATE_ERAT
1628END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
1556 1629
1557 /* 1630 /*
1558 * POWER7/POWER8 guest -> host partition switch code. 1631 * POWER7/POWER8 guest -> host partition switch code.
@@ -1663,6 +1736,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1663 isync 1736 isync
1664 1737
1665 /* load host SLB entries */ 1738 /* load host SLB entries */
1739BEGIN_MMU_FTR_SECTION
1740 b 0f
1741END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
1666 ld r8,PACA_SLBSHADOWPTR(r13) 1742 ld r8,PACA_SLBSHADOWPTR(r13)
1667 1743
1668 .rept SLB_NUM_BOLTED 1744 .rept SLB_NUM_BOLTED
@@ -1675,7 +1751,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1675 slbmte r6,r5 1751 slbmte r6,r5
16761: addi r8,r8,16 17521: addi r8,r8,16
1677 .endr 1753 .endr
1678 17540:
1679#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1755#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1680 /* Finish timing, if we have a vcpu */ 1756 /* Finish timing, if we have a vcpu */
1681 ld r4, HSTATE_KVM_VCPU(r13) 1757 ld r4, HSTATE_KVM_VCPU(r13)
@@ -1702,11 +1778,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1702 * reflect the HDSI to the guest as a DSI. 1778 * reflect the HDSI to the guest as a DSI.
1703 */ 1779 */
1704kvmppc_hdsi: 1780kvmppc_hdsi:
1781 ld r3, VCPU_KVM(r9)
1782 lbz r0, KVM_RADIX(r3)
1783 cmpwi r0, 0
1705 mfspr r4, SPRN_HDAR 1784 mfspr r4, SPRN_HDAR
1706 mfspr r6, SPRN_HDSISR 1785 mfspr r6, SPRN_HDSISR
1786 bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
1707 /* HPTE not found fault or protection fault? */ 1787 /* HPTE not found fault or protection fault? */
1708 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h 1788 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
1709 beq 1f /* if not, send it to the guest */ 1789 beq 1f /* if not, send it to the guest */
1790BEGIN_FTR_SECTION
1791 mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
1792 b 4f
1793END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1710 andi. r0, r11, MSR_DR /* data relocation enabled? */ 1794 andi. r0, r11, MSR_DR /* data relocation enabled? */
1711 beq 3f 1795 beq 3f
1712 clrrdi r0, r4, 28 1796 clrrdi r0, r4, 28
@@ -1776,13 +1860,29 @@ fast_interrupt_c_return:
1776 stb r0, HSTATE_IN_GUEST(r13) 1860 stb r0, HSTATE_IN_GUEST(r13)
1777 b guest_exit_cont 1861 b guest_exit_cont
1778 1862
1863.Lradix_hdsi:
1864 std r4, VCPU_FAULT_DAR(r9)
1865 stw r6, VCPU_FAULT_DSISR(r9)
1866.Lradix_hisi:
1867 mfspr r5, SPRN_ASDR
1868 std r5, VCPU_FAULT_GPA(r9)
1869 b guest_exit_cont
1870
1779/* 1871/*
1780 * Similarly for an HISI, reflect it to the guest as an ISI unless 1872 * Similarly for an HISI, reflect it to the guest as an ISI unless
1781 * it is an HPTE not found fault for a page that we have paged out. 1873 * it is an HPTE not found fault for a page that we have paged out.
1782 */ 1874 */
1783kvmppc_hisi: 1875kvmppc_hisi:
1876 ld r3, VCPU_KVM(r9)
1877 lbz r0, KVM_RADIX(r3)
1878 cmpwi r0, 0
1879 bne .Lradix_hisi /* for radix, just save ASDR */
1784 andis. r0, r11, SRR1_ISI_NOPT@h 1880 andis. r0, r11, SRR1_ISI_NOPT@h
1785 beq 1f 1881 beq 1f
1882BEGIN_FTR_SECTION
1883 mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
1884 b 4f
1885END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1786 andi. r0, r11, MSR_IR /* instruction relocation enabled? */ 1886 andi. r0, r11, MSR_IR /* instruction relocation enabled? */
1787 beq 3f 1887 beq 3f
1788 clrrdi r0, r10, 28 1888 clrrdi r0, r10, 28
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index ca8f174289bb..2a2b96d53999 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -167,20 +167,38 @@ kvmppc_handler_trampoline_enter_end:
167 * * 167 * *
168 *****************************************************************************/ 168 *****************************************************************************/
169 169
170.global kvmppc_handler_trampoline_exit
171kvmppc_handler_trampoline_exit:
172
173.global kvmppc_interrupt_pr 170.global kvmppc_interrupt_pr
174kvmppc_interrupt_pr: 171kvmppc_interrupt_pr:
172 /* 64-bit entry. Register usage at this point:
173 *
174 * SPRG_SCRATCH0 = guest R13
175 * R12 = (guest CR << 32) | exit handler id
176 * R13 = PACA
177 * HSTATE.SCRATCH0 = guest R12
178 * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE
179 */
180#ifdef CONFIG_PPC64
181 /* Match 32-bit entry */
182#ifdef CONFIG_RELOCATABLE
183 std r9, HSTATE_SCRATCH2(r13)
184 ld r9, HSTATE_SCRATCH1(r13)
185 mtctr r9
186 ld r9, HSTATE_SCRATCH2(r13)
187#endif
188 rotldi r12, r12, 32 /* Flip R12 halves for stw */
189 stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
190 srdi r12, r12, 32 /* shift trap into low half */
191#endif
175 192
193.global kvmppc_handler_trampoline_exit
194kvmppc_handler_trampoline_exit:
176 /* Register usage at this point: 195 /* Register usage at this point:
177 * 196 *
178 * SPRG_SCRATCH0 = guest R13 197 * SPRG_SCRATCH0 = guest R13
179 * R12 = exit handler id 198 * R12 = exit handler id
180 * R13 = shadow vcpu (32-bit) or PACA (64-bit) 199 * R13 = shadow vcpu (32-bit) or PACA (64-bit)
181 * HSTATE.SCRATCH0 = guest R12 200 * HSTATE.SCRATCH0 = guest R12
182 * HSTATE.SCRATCH1 = guest CR 201 * HSTATE.SCRATCH1 = guest CR
183 *
184 */ 202 */
185 203
186 /* Save registers */ 204 /* Save registers */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cd892dec7cb6..40a5b2d75ed1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -565,6 +565,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
565 case KVM_CAP_PPC_HWRNG: 565 case KVM_CAP_PPC_HWRNG:
566 r = kvmppc_hwrng_present(); 566 r = kvmppc_hwrng_present();
567 break; 567 break;
568 case KVM_CAP_PPC_MMU_RADIX:
569 r = !!(hv_enabled && radix_enabled());
570 break;
571 case KVM_CAP_PPC_MMU_HASH_V3:
572 r = !!(hv_enabled && !radix_enabled() &&
573 cpu_has_feature(CPU_FTR_ARCH_300));
574 break;
568#endif 575#endif
569 case KVM_CAP_SYNC_MMU: 576 case KVM_CAP_SYNC_MMU:
570#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 577#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -1468,6 +1475,31 @@ long kvm_arch_vm_ioctl(struct file *filp,
1468 r = kvm_vm_ioctl_rtas_define_token(kvm, argp); 1475 r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
1469 break; 1476 break;
1470 } 1477 }
1478 case KVM_PPC_CONFIGURE_V3_MMU: {
1479 struct kvm *kvm = filp->private_data;
1480 struct kvm_ppc_mmuv3_cfg cfg;
1481
1482 r = -EINVAL;
1483 if (!kvm->arch.kvm_ops->configure_mmu)
1484 goto out;
1485 r = -EFAULT;
1486 if (copy_from_user(&cfg, argp, sizeof(cfg)))
1487 goto out;
1488 r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg);
1489 break;
1490 }
1491 case KVM_PPC_GET_RMMU_INFO: {
1492 struct kvm *kvm = filp->private_data;
1493 struct kvm_ppc_rmmu_info info;
1494
1495 r = -EINVAL;
1496 if (!kvm->arch.kvm_ops->get_rmmu_info)
1497 goto out;
1498 r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info);
1499 if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
1500 r = -EFAULT;
1501 break;
1502 }
1471 default: { 1503 default: {
1472 struct kvm *kvm = filp->private_data; 1504 struct kvm *kvm = filp->private_data;
1473 r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); 1505 r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 309361e86523..0e649d72fe8d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
21obj64-$(CONFIG_SMP) += locks.o 21obj64-$(CONFIG_SMP) += locks.o
22obj64-$(CONFIG_ALTIVEC) += vmx-helper.o 22obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
23 23
24ifeq ($(CONFIG_GENERIC_CSUM),)
25obj-y += checksum_$(BITS).o checksum_wrappers.o 24obj-y += checksum_$(BITS).o checksum_wrappers.o
26endif
27 25
28obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o 26obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
29 27
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index d0d311e108ff..d7f1a966136e 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -36,7 +36,7 @@ _GLOBAL(__csum_partial)
36 * work to calculate the correct checksum, we ignore that case 36 * work to calculate the correct checksum, we ignore that case
37 * and take the potential slowdown of unaligned loads. 37 * and take the potential slowdown of unaligned loads.
38 */ 38 */
39 rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ 39 rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
40 beq .Lcsum_aligned 40 beq .Lcsum_aligned
41 41
42 li r7,4 42 li r7,4
@@ -168,8 +168,12 @@ _GLOBAL(__csum_partial)
168 beq .Lcsum_finish 168 beq .Lcsum_finish
169 169
170 lbz r6,0(r3) 170 lbz r6,0(r3)
171#ifdef __BIG_ENDIAN__
171 sldi r9,r6,8 /* Pad the byte out to 16 bits */ 172 sldi r9,r6,8 /* Pad the byte out to 16 bits */
172 adde r0,r0,r9 173 adde r0,r0,r9
174#else
175 adde r0,r0,r6
176#endif
173 177
174.Lcsum_finish: 178.Lcsum_finish:
175 addze r0,r0 /* add in final carry */ 179 addze r0,r0 /* add in final carry */
@@ -224,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic)
224 * If the source and destination are relatively unaligned we only 228 * If the source and destination are relatively unaligned we only
225 * align the source. This keeps things simple. 229 * align the source. This keeps things simple.
226 */ 230 */
227 rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ 231 rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
228 beq .Lcopy_aligned 232 beq .Lcopy_aligned
229 233
230 li r9,4 234 li r9,4
@@ -386,8 +390,12 @@ dstnr; sth r6,0(r4)
386 beq .Lcopy_finish 390 beq .Lcopy_finish
387 391
388srcnr; lbz r6,0(r3) 392srcnr; lbz r6,0(r3)
393#ifdef __BIG_ENDIAN__
389 sldi r9,r6,8 /* Pad the byte out to 16 bits */ 394 sldi r9,r6,8 /* Pad the byte out to 16 bits */
390 adde r0,r0,r9 395 adde r0,r0,r9
396#else
397 adde r0,r0,r6
398#endif
391dstnr; stb r6,0(r4) 399dstnr; stb r6,0(r4)
392 400
393.Lcopy_finish: 401.Lcopy_finish:
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c1746df0f88e..0899315e1434 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -32,6 +32,49 @@ int patch_branch(unsigned int *addr, unsigned long target, int flags)
32 return patch_instruction(addr, create_branch(addr, target, flags)); 32 return patch_instruction(addr, create_branch(addr, target, flags));
33} 33}
34 34
35bool is_offset_in_branch_range(long offset)
36{
37 /*
38 * Powerpc branch instruction is :
39 *
40 * 0 6 30 31
41 * +---------+----------------+---+---+
42 * | opcode | LI |AA |LK |
43 * +---------+----------------+---+---+
44 * Where AA = 0 and LK = 0
45 *
46 * LI is a signed 24 bits integer. The real branch offset is computed
47 * by: imm32 = SignExtend(LI:'0b00', 32);
48 *
49 * So the maximum forward branch should be:
50 * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc
51 * The maximum backward branch should be:
52 * (0xff800000 << 2) = 0xfe000000 = -0x2000000
53 */
54 return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3));
55}
56
57/*
58 * Helper to check if a given instruction is a conditional branch
59 * Derived from the conditional checks in analyse_instr()
60 */
61bool __kprobes is_conditional_branch(unsigned int instr)
62{
63 unsigned int opcode = instr >> 26;
64
65 if (opcode == 16) /* bc, bca, bcl, bcla */
66 return true;
67 if (opcode == 19) {
68 switch ((instr >> 1) & 0x3ff) {
69 case 16: /* bclr, bclrl */
70 case 528: /* bcctr, bcctrl */
71 case 560: /* bctar, bctarl */
72 return true;
73 }
74 }
75 return false;
76}
77
35unsigned int create_branch(const unsigned int *addr, 78unsigned int create_branch(const unsigned int *addr,
36 unsigned long target, int flags) 79 unsigned long target, int flags)
37{ 80{
@@ -43,7 +86,7 @@ unsigned int create_branch(const unsigned int *addr,
43 offset = offset - (unsigned long)addr; 86 offset = offset - (unsigned long)addr;
44 87
45 /* Check we can represent the target in the instruction format */ 88 /* Check we can represent the target in the instruction format */
46 if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3) 89 if (!is_offset_in_branch_range(offset))
47 return 0; 90 return 0;
48 91
49 /* Mask out the flags and target, so they don't step on each other. */ 92 /* Mask out the flags and target, so they don't step on each other. */
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 21367b3a8146..4bcc9e76fb55 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -26,8 +26,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
26 ori r5,r5,PAGE_SIZE@l 26 ori r5,r5,PAGE_SIZE@l
27BEGIN_FTR_SECTION 27BEGIN_FTR_SECTION
28 ld r10,PPC64_CACHES@toc(r2) 28 ld r10,PPC64_CACHES@toc(r2)
29 lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ 29 lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */
30 lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */ 30 lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */
31 li r9,0 31 li r9,0
32 srd r8,r5,r11 32 srd r8,r5,r11
33 33
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 06c7e9b88408..846dba2c6360 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1803,9 +1803,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1803 return 0; 1803 return 0;
1804 if (op.ea & (size - 1)) 1804 if (op.ea & (size - 1))
1805 break; /* can't handle misaligned */ 1805 break; /* can't handle misaligned */
1806 err = -EFAULT;
1807 if (!address_ok(regs, op.ea, size)) 1806 if (!address_ok(regs, op.ea, size))
1808 goto ldst_done; 1807 return 0;
1809 err = 0; 1808 err = 0;
1810 switch (size) { 1809 switch (size) {
1811 case 4: 1810 case 4:
@@ -1828,9 +1827,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1828 return 0; 1827 return 0;
1829 if (op.ea & (size - 1)) 1828 if (op.ea & (size - 1))
1830 break; /* can't handle misaligned */ 1829 break; /* can't handle misaligned */
1831 err = -EFAULT;
1832 if (!address_ok(regs, op.ea, size)) 1830 if (!address_ok(regs, op.ea, size))
1833 goto ldst_done; 1831 return 0;
1834 err = 0; 1832 err = 0;
1835 switch (size) { 1833 switch (size) {
1836 case 4: 1834 case 4:
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index c100f4d5d5d0..d5b4d9498c54 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -152,9 +152,9 @@ err2; std r0,0(r3)
152 addi r3,r3,8 152 addi r3,r3,8
153 addi r4,r4,-8 153 addi r4,r4,-8
154 154
155 /* Destination is 16 byte aligned, need to get it cacheline aligned */ 155 /* Destination is 16 byte aligned, need to get it cache block aligned */
15611: lwz r7,DCACHEL1LOGLINESIZE(r5) 15611: lwz r7,DCACHEL1LOGBLOCKSIZE(r5)
157 lwz r9,DCACHEL1LINESIZE(r5) 157 lwz r9,DCACHEL1BLOCKSIZE(r5)
158 158
159 /* 159 /*
160 * With worst case alignment the long clear loop takes a minimum 160 * With worst case alignment the long clear loop takes a minimum
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index aaa7ec6788b9..697b70ad1195 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -67,11 +67,13 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
67 if (!(vma->vm_flags & (VM_READ | VM_EXEC))) 67 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
68 goto out_unlock; 68 goto out_unlock;
69 /* 69 /*
70 * protfault should only happen due to us 70 * PROT_NONE is covered by the VMA check above.
71 * mapping a region readonly temporarily. PROT_NONE 71 * and hash should get a NOHPTE fault instead of
72 * is also covered by the VMA check above. 72 * a PROTFAULT in case fixup is needed for things
73 * like autonuma.
73 */ 74 */
74 WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); 75 if (!radix_enabled())
76 WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
75 } 77 }
76 78
77 ret = 0; 79 ret = 0;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 62a50d6d1053..8dc758658972 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -407,15 +407,6 @@ good_area:
407 (cpu_has_feature(CPU_FTR_NOEXECUTE) || 407 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
408 !(vma->vm_flags & (VM_READ | VM_WRITE)))) 408 !(vma->vm_flags & (VM_READ | VM_WRITE))))
409 goto bad_area; 409 goto bad_area;
410
411#ifdef CONFIG_PPC_STD_MMU
412 /*
413 * protfault should only happen due to us
414 * mapping a region readonly temporarily. PROT_NONE
415 * is also covered by the VMA check above.
416 */
417 WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
418#endif /* CONFIG_PPC_STD_MMU */
419 /* a write */ 410 /* a write */
420 } else if (is_write) { 411 } else if (is_write) {
421 if (!(vma->vm_flags & VM_WRITE)) 412 if (!(vma->vm_flags & VM_WRITE))
@@ -425,8 +416,40 @@ good_area:
425 } else { 416 } else {
426 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) 417 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
427 goto bad_area; 418 goto bad_area;
428 WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
429 } 419 }
420#ifdef CONFIG_PPC_STD_MMU
421 /*
422 * For hash translation mode, we should never get a
423 * PROTFAULT. Any update to pte to reduce access will result in us
424 * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
425 * fault instead of DSISR_PROTFAULT.
426 *
427 * A pte update to relax the access will not result in a hash page table
428 * entry invalidate and hence can result in DSISR_PROTFAULT.
429 * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
430 * the special !is_write in the below conditional.
431 *
432 * For platforms that doesn't supports coherent icache and do support
433 * per page noexec bit, we do setup things such that we do the
434 * sync between D/I cache via fault. But that is handled via low level
435 * hash fault code (hash_page_do_lazy_icache()) and we should not reach
436 * here in such case.
437 *
438 * For wrong access that can result in PROTFAULT, the above vma->vm_flags
439 * check should handle those and hence we should fall to the bad_area
440 * handling correctly.
441 *
442 * For embedded with per page exec support that doesn't support coherent
443 * icache we do get PROTFAULT and we handle that D/I cache sync in
444 * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
445 * is conditional for server MMU.
446 *
447 * For radix, we can get prot fault for autonuma case, because radix
448 * page table will have them marked noaccess for user.
449 */
450 if (!radix_enabled() && !is_write)
451 WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
452#endif /* CONFIG_PPC_STD_MMU */
430 453
431 /* 454 /*
432 * If for any reason at all we couldn't handle the fault, 455 * If for any reason at all we couldn't handle the fault,
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 67e19a0821be..12d679df50bd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -35,7 +35,9 @@
35#include <linux/memblock.h> 35#include <linux/memblock.h>
36#include <linux/context_tracking.h> 36#include <linux/context_tracking.h>
37#include <linux/libfdt.h> 37#include <linux/libfdt.h>
38#include <linux/debugfs.h>
38 39
40#include <asm/debug.h>
39#include <asm/processor.h> 41#include <asm/processor.h>
40#include <asm/pgtable.h> 42#include <asm/pgtable.h>
41#include <asm/mmu.h> 43#include <asm/mmu.h>
@@ -747,6 +749,35 @@ static unsigned long __init htab_get_table_size(void)
747} 749}
748 750
749#ifdef CONFIG_MEMORY_HOTPLUG 751#ifdef CONFIG_MEMORY_HOTPLUG
752void resize_hpt_for_hotplug(unsigned long new_mem_size)
753{
754 unsigned target_hpt_shift;
755
756 if (!mmu_hash_ops.resize_hpt)
757 return;
758
759 target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
760
761 /*
762 * To avoid lots of HPT resizes if memory size is fluctuating
763 * across a boundary, we deliberately have some hysterisis
764 * here: we immediately increase the HPT size if the target
765 * shift exceeds the current shift, but we won't attempt to
766 * reduce unless the target shift is at least 2 below the
767 * current shift
768 */
769 if ((target_hpt_shift > ppc64_pft_size)
770 || (target_hpt_shift < (ppc64_pft_size - 1))) {
771 int rc;
772
773 rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
774 if (rc)
775 printk(KERN_WARNING
776 "Unable to resize hash page table to target order %d: %d\n",
777 target_hpt_shift, rc);
778 }
779}
780
750int hash__create_section_mapping(unsigned long start, unsigned long end) 781int hash__create_section_mapping(unsigned long start, unsigned long end)
751{ 782{
752 int rc = htab_bolt_mapping(start, end, __pa(start), 783 int rc = htab_bolt_mapping(start, end, __pa(start),
@@ -1795,3 +1826,34 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
1795 /* Finally limit subsequent allocations */ 1826 /* Finally limit subsequent allocations */
1796 memblock_set_current_limit(ppc64_rma_size); 1827 memblock_set_current_limit(ppc64_rma_size);
1797} 1828}
1829
1830#ifdef CONFIG_DEBUG_FS
1831
1832static int hpt_order_get(void *data, u64 *val)
1833{
1834 *val = ppc64_pft_size;
1835 return 0;
1836}
1837
1838static int hpt_order_set(void *data, u64 val)
1839{
1840 if (!mmu_hash_ops.resize_hpt)
1841 return -ENODEV;
1842
1843 return mmu_hash_ops.resize_hpt(val);
1844}
1845
1846DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
1847
1848static int __init hash64_debugfs(void)
1849{
1850 if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root,
1851 NULL, &fops_hpt_order)) {
1852 pr_err("lpar: unable to create hpt_order debugsfs file\n");
1853 }
1854
1855 return 0;
1856}
1857machine_device_initcall(pseries, hash64_debugfs);
1858
1859#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 37b5f91e381b..a84bb44497f9 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -116,24 +116,3 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
116 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 116 *ptep = __pte(new_pte & ~H_PAGE_BUSY);
117 return 0; 117 return 0;
118} 118}
119
120#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM)
121/*
122 * This enables us to catch the wrong page directory format
123 * Moved here so that we can use WARN() in the call.
124 */
125int hugepd_ok(hugepd_t hpd)
126{
127 bool is_hugepd;
128 unsigned long hpdval;
129
130 hpdval = hpd_val(hpd);
131
132 /*
133 * We should not find this format in page directory, warn otherwise.
134 */
135 is_hugepd = (((hpdval & 0x3) == 0x0) && ((hpdval & HUGEPD_SHIFT_MASK) != 0));
136 WARN(is_hugepd, "Found wrong page directory format\n");
137 return 0;
138}
139#endif
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index f2108c40e697..eb8c6c8c4851 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -41,6 +41,7 @@ static void pmd_ctor(void *addr)
41} 41}
42 42
43struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; 43struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
44EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
44 45
45/* 46/*
46 * Create a kmem_cache() for pagetables. This is not used for PTE 47 * Create a kmem_cache() for pagetables. This is not used for PTE
@@ -86,7 +87,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
86 87
87 pr_debug("Allocated pgtable cache for order %d\n", shift); 88 pr_debug("Allocated pgtable cache for order %d\n", shift);
88} 89}
89 90EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
90 91
91void pgtable_cache_init(void) 92void pgtable_cache_init(void)
92{ 93{
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 8e1588021d1c..6aa3b76aa0d6 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -42,6 +42,8 @@
42#include <linux/memblock.h> 42#include <linux/memblock.h>
43#include <linux/hugetlb.h> 43#include <linux/hugetlb.h>
44#include <linux/slab.h> 44#include <linux/slab.h>
45#include <linux/of_fdt.h>
46#include <linux/libfdt.h>
45 47
46#include <asm/pgalloc.h> 48#include <asm/pgalloc.h>
47#include <asm/page.h> 49#include <asm/page.h>
@@ -344,6 +346,30 @@ static int __init parse_disable_radix(char *p)
344} 346}
345early_param("disable_radix", parse_disable_radix); 347early_param("disable_radix", parse_disable_radix);
346 348
349/*
350 * If we're running under a hypervisor, we need to check the contents of
351 * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
352 * radix. If not, we clear the radix feature bit so we fall back to hash.
353 */
354static void early_check_vec5(void)
355{
356 unsigned long root, chosen;
357 int size;
358 const u8 *vec5;
359
360 root = of_get_flat_dt_root();
361 chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
362 if (chosen == -FDT_ERR_NOTFOUND)
363 return;
364 vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
365 if (!vec5)
366 return;
367 if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
368 !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
369 /* Hypervisor doesn't support radix */
370 cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
371}
372
347void __init mmu_early_init_devtree(void) 373void __init mmu_early_init_devtree(void)
348{ 374{
349 /* Disable radix mode based on kernel command line. */ 375 /* Disable radix mode based on kernel command line. */
@@ -351,6 +377,15 @@ void __init mmu_early_init_devtree(void)
351 if (disable_radix || !(mfmsr() & MSR_HV)) 377 if (disable_radix || !(mfmsr() & MSR_HV))
352 cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; 378 cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
353 379
380 /*
381 * Check /chosen/ibm,architecture-vec-5 if running as a guest.
382 * When running bare-metal, we can use radix if we like
383 * even though the ibm,architecture-vec-5 property created by
384 * skiboot doesn't have the necessary bits set.
385 */
386 if (early_radix_enabled() && !(mfmsr() & MSR_HV))
387 early_check_vec5();
388
354 if (early_radix_enabled()) 389 if (early_radix_enabled())
355 radix__early_init_devtree(); 390 radix__early_init_devtree();
356 else 391 else
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5f844337de21..9ee536ec0739 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -134,6 +134,8 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
134 unsigned long nr_pages = size >> PAGE_SHIFT; 134 unsigned long nr_pages = size >> PAGE_SHIFT;
135 int rc; 135 int rc;
136 136
137 resize_hpt_for_hotplug(memblock_phys_mem_size());
138
137 pgdata = NODE_DATA(nid); 139 pgdata = NODE_DATA(nid);
138 140
139 start = (unsigned long)__va(start); 141 start = (unsigned long)__va(start);
@@ -174,6 +176,8 @@ int arch_remove_memory(u64 start, u64 size)
174 */ 176 */
175 vm_unmap_aliases(); 177 vm_unmap_aliases();
176 178
179 resize_hpt_for_hotplug(memblock_phys_mem_size());
180
177 return ret; 181 return ret;
178} 182}
179#endif 183#endif
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 104bad029ce9..7de7124ac91b 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -184,7 +184,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
184 * of the CMA zone if possible. NOTE: faulting in + migration 184 * of the CMA zone if possible. NOTE: faulting in + migration
185 * can be expensive. Batching can be considered later 185 * can be expensive. Batching can be considered later
186 */ 186 */
187 if (get_pageblock_migratetype(page) == MIGRATE_CMA) { 187 if (is_migrate_cma_page(page)) {
188 if (mm_iommu_move_page_from_cma(page)) 188 if (mm_iommu_move_page_from_cma(page))
189 goto populate; 189 goto populate;
190 if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), 190 if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b1099cb2f393..9befaee237d6 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -290,7 +290,7 @@ int of_node_to_nid(struct device_node *device)
290 290
291 return nid; 291 return nid;
292} 292}
293EXPORT_SYMBOL_GPL(of_node_to_nid); 293EXPORT_SYMBOL(of_node_to_nid);
294 294
295static int __init find_min_common_depth(void) 295static int __init find_min_common_depth(void)
296{ 296{
@@ -786,14 +786,9 @@ new_range:
786 fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 786 fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
787 node_set_online(nid); 787 node_set_online(nid);
788 788
789 if (!(size = numa_enforce_memory_limit(start, size))) { 789 size = numa_enforce_memory_limit(start, size);
790 if (--ranges) 790 if (size)
791 goto new_range; 791 memblock_set_node(start, size, &memblock.memory, nid);
792 else
793 continue;
794 }
795
796 memblock_set_node(start, size, &memblock.memory, nid);
797 792
798 if (--ranges) 793 if (--ranges)
799 goto new_range; 794 goto new_range;
@@ -1098,7 +1093,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
1098 nid = hot_add_node_scn_to_nid(scn_addr); 1093 nid = hot_add_node_scn_to_nid(scn_addr);
1099 } 1094 }
1100 1095
1101 if (nid < 0 || !node_online(nid)) 1096 if (nid < 0 || !node_possible(nid))
1102 nid = first_online_node; 1097 nid = first_online_node;
1103 1098
1104 return nid; 1099 return nid;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 653ff6c74ebe..b798ff674fab 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -131,7 +131,7 @@ void mmu_cleanup_all(void)
131int create_section_mapping(unsigned long start, unsigned long end) 131int create_section_mapping(unsigned long start, unsigned long end)
132{ 132{
133 if (radix_enabled()) 133 if (radix_enabled())
134 return -ENODEV; 134 return radix__create_section_mapping(start, end);
135 135
136 return hash__create_section_mapping(start, end); 136 return hash__create_section_mapping(start, end);
137} 137}
@@ -139,7 +139,7 @@ int create_section_mapping(unsigned long start, unsigned long end)
139int remove_section_mapping(unsigned long start, unsigned long end) 139int remove_section_mapping(unsigned long start, unsigned long end)
140{ 140{
141 if (radix_enabled()) 141 if (radix_enabled())
142 return -ENODEV; 142 return radix__remove_section_mapping(start, end);
143 143
144 return hash__remove_section_mapping(start, end); 144 return hash__remove_section_mapping(start, end);
145} 145}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 34f1a0dbc898..feeda90cd06d 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -18,6 +18,7 @@
18#include <asm/machdep.h> 18#include <asm/machdep.h>
19#include <asm/mmu.h> 19#include <asm/mmu.h>
20#include <asm/firmware.h> 20#include <asm/firmware.h>
21#include <asm/powernv.h>
21 22
22#include <trace/events/thp.h> 23#include <trace/events/thp.h>
23 24
@@ -107,54 +108,66 @@ set_the_pte:
107 return 0; 108 return 0;
108} 109}
109 110
111static inline void __meminit print_mapping(unsigned long start,
112 unsigned long end,
113 unsigned long size)
114{
115 if (end <= start)
116 return;
117
118 pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size);
119}
120
121static int __meminit create_physical_mapping(unsigned long start,
122 unsigned long end)
123{
124 unsigned long addr, mapping_size = 0;
125
126 start = _ALIGN_UP(start, PAGE_SIZE);
127 for (addr = start; addr < end; addr += mapping_size) {
128 unsigned long gap, previous_size;
129 int rc;
130
131 gap = end - addr;
132 previous_size = mapping_size;
133
134 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
135 mmu_psize_defs[MMU_PAGE_1G].shift)
136 mapping_size = PUD_SIZE;
137 else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
138 mmu_psize_defs[MMU_PAGE_2M].shift)
139 mapping_size = PMD_SIZE;
140 else
141 mapping_size = PAGE_SIZE;
142
143 if (mapping_size != previous_size) {
144 print_mapping(start, addr, previous_size);
145 start = addr;
146 }
147
148 rc = radix__map_kernel_page((unsigned long)__va(addr), addr,
149 PAGE_KERNEL_X, mapping_size);
150 if (rc)
151 return rc;
152 }
153
154 print_mapping(start, addr, mapping_size);
155 return 0;
156}
157
110static void __init radix_init_pgtable(void) 158static void __init radix_init_pgtable(void)
111{ 159{
112 int loop_count;
113 u64 base, end, start_addr;
114 unsigned long rts_field; 160 unsigned long rts_field;
115 struct memblock_region *reg; 161 struct memblock_region *reg;
116 unsigned long linear_page_size;
117 162
118 /* We don't support slb for radix */ 163 /* We don't support slb for radix */
119 mmu_slb_size = 0; 164 mmu_slb_size = 0;
120 /* 165 /*
121 * Create the linear mapping, using standard page size for now 166 * Create the linear mapping, using standard page size for now
122 */ 167 */
123 loop_count = 0; 168 for_each_memblock(memory, reg)
124 for_each_memblock(memory, reg) { 169 WARN_ON(create_physical_mapping(reg->base,
125 170 reg->base + reg->size));
126 start_addr = reg->base;
127
128redo:
129 if (loop_count < 1 && mmu_psize_defs[MMU_PAGE_1G].shift)
130 linear_page_size = PUD_SIZE;
131 else if (loop_count < 2 && mmu_psize_defs[MMU_PAGE_2M].shift)
132 linear_page_size = PMD_SIZE;
133 else
134 linear_page_size = PAGE_SIZE;
135
136 base = _ALIGN_UP(start_addr, linear_page_size);
137 end = _ALIGN_DOWN(reg->base + reg->size, linear_page_size);
138
139 pr_info("Mapping range 0x%lx - 0x%lx with 0x%lx\n",
140 (unsigned long)base, (unsigned long)end,
141 linear_page_size);
142
143 while (base < end) {
144 radix__map_kernel_page((unsigned long)__va(base),
145 base, PAGE_KERNEL_X,
146 linear_page_size);
147 base += linear_page_size;
148 }
149 /*
150 * map the rest using lower page size
151 */
152 if (end < reg->base + reg->size) {
153 start_addr = end;
154 loop_count++;
155 goto redo;
156 }
157 }
158 /* 171 /*
159 * Allocate Partition table and process table for the 172 * Allocate Partition table and process table for the
160 * host. 173 * host.
@@ -401,6 +414,8 @@ void __init radix__early_init_mmu(void)
401 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); 414 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
402 radix_init_partition_table(); 415 radix_init_partition_table();
403 radix_init_amor(); 416 radix_init_amor();
417 } else {
418 radix_init_pseries();
404 } 419 }
405 420
406 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); 421 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
@@ -438,6 +453,7 @@ void radix__mmu_cleanup_all(void)
438 lpcr = mfspr(SPRN_LPCR); 453 lpcr = mfspr(SPRN_LPCR);
439 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); 454 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
440 mtspr(SPRN_PTCR, 0); 455 mtspr(SPRN_PTCR, 0);
456 powernv_set_nmmu_ptcr(0);
441 radix__flush_tlb_all(); 457 radix__flush_tlb_all();
442 } 458 }
443} 459}
@@ -467,6 +483,173 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
467 memblock_set_current_limit(first_memblock_base + first_memblock_size); 483 memblock_set_current_limit(first_memblock_base + first_memblock_size);
468} 484}
469 485
486#ifdef CONFIG_MEMORY_HOTPLUG
487static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
488{
489 pte_t *pte;
490 int i;
491
492 for (i = 0; i < PTRS_PER_PTE; i++) {
493 pte = pte_start + i;
494 if (!pte_none(*pte))
495 return;
496 }
497
498 pte_free_kernel(&init_mm, pte_start);
499 pmd_clear(pmd);
500}
501
502static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
503{
504 pmd_t *pmd;
505 int i;
506
507 for (i = 0; i < PTRS_PER_PMD; i++) {
508 pmd = pmd_start + i;
509 if (!pmd_none(*pmd))
510 return;
511 }
512
513 pmd_free(&init_mm, pmd_start);
514 pud_clear(pud);
515}
516
517static void remove_pte_table(pte_t *pte_start, unsigned long addr,
518 unsigned long end)
519{
520 unsigned long next;
521 pte_t *pte;
522
523 pte = pte_start + pte_index(addr);
524 for (; addr < end; addr = next, pte++) {
525 next = (addr + PAGE_SIZE) & PAGE_MASK;
526 if (next > end)
527 next = end;
528
529 if (!pte_present(*pte))
530 continue;
531
532 if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
533 /*
534 * The vmemmap_free() and remove_section_mapping()
535 * codepaths call us with aligned addresses.
536 */
537 WARN_ONCE(1, "%s: unaligned range\n", __func__);
538 continue;
539 }
540
541 pte_clear(&init_mm, addr, pte);
542 }
543}
544
545static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
546 unsigned long end)
547{
548 unsigned long next;
549 pte_t *pte_base;
550 pmd_t *pmd;
551
552 pmd = pmd_start + pmd_index(addr);
553 for (; addr < end; addr = next, pmd++) {
554 next = pmd_addr_end(addr, end);
555
556 if (!pmd_present(*pmd))
557 continue;
558
559 if (pmd_huge(*pmd)) {
560 if (!IS_ALIGNED(addr, PMD_SIZE) ||
561 !IS_ALIGNED(next, PMD_SIZE)) {
562 WARN_ONCE(1, "%s: unaligned range\n", __func__);
563 continue;
564 }
565
566 pte_clear(&init_mm, addr, (pte_t *)pmd);
567 continue;
568 }
569
570 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
571 remove_pte_table(pte_base, addr, next);
572 free_pte_table(pte_base, pmd);
573 }
574}
575
576static void remove_pud_table(pud_t *pud_start, unsigned long addr,
577 unsigned long end)
578{
579 unsigned long next;
580 pmd_t *pmd_base;
581 pud_t *pud;
582
583 pud = pud_start + pud_index(addr);
584 for (; addr < end; addr = next, pud++) {
585 next = pud_addr_end(addr, end);
586
587 if (!pud_present(*pud))
588 continue;
589
590 if (pud_huge(*pud)) {
591 if (!IS_ALIGNED(addr, PUD_SIZE) ||
592 !IS_ALIGNED(next, PUD_SIZE)) {
593 WARN_ONCE(1, "%s: unaligned range\n", __func__);
594 continue;
595 }
596
597 pte_clear(&init_mm, addr, (pte_t *)pud);
598 continue;
599 }
600
601 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
602 remove_pmd_table(pmd_base, addr, next);
603 free_pmd_table(pmd_base, pud);
604 }
605}
606
607static void remove_pagetable(unsigned long start, unsigned long end)
608{
609 unsigned long addr, next;
610 pud_t *pud_base;
611 pgd_t *pgd;
612
613 spin_lock(&init_mm.page_table_lock);
614
615 for (addr = start; addr < end; addr = next) {
616 next = pgd_addr_end(addr, end);
617
618 pgd = pgd_offset_k(addr);
619 if (!pgd_present(*pgd))
620 continue;
621
622 if (pgd_huge(*pgd)) {
623 if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
624 !IS_ALIGNED(next, PGDIR_SIZE)) {
625 WARN_ONCE(1, "%s: unaligned range\n", __func__);
626 continue;
627 }
628
629 pte_clear(&init_mm, addr, (pte_t *)pgd);
630 continue;
631 }
632
633 pud_base = (pud_t *)pgd_page_vaddr(*pgd);
634 remove_pud_table(pud_base, addr, next);
635 }
636
637 spin_unlock(&init_mm.page_table_lock);
638 radix__flush_tlb_kernel_range(start, end);
639}
640
641int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
642{
643 return create_physical_mapping(start, end);
644}
645
646int radix__remove_section_mapping(unsigned long start, unsigned long end)
647{
648 remove_pagetable(start, end);
649 return 0;
650}
651#endif /* CONFIG_MEMORY_HOTPLUG */
652
470#ifdef CONFIG_SPARSEMEM_VMEMMAP 653#ifdef CONFIG_SPARSEMEM_VMEMMAP
471int __meminit radix__vmemmap_create_mapping(unsigned long start, 654int __meminit radix__vmemmap_create_mapping(unsigned long start,
472 unsigned long page_size, 655 unsigned long page_size,
@@ -482,7 +665,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
482#ifdef CONFIG_MEMORY_HOTPLUG 665#ifdef CONFIG_MEMORY_HOTPLUG
483void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) 666void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
484{ 667{
485 /* FIXME!! intel does more. We should free page tables mapping vmemmap ? */ 668 remove_pagetable(start, start + page_size);
486} 669}
487#endif 670#endif
488#endif 671#endif
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 8bca7f58afc4..db93cf747a03 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -52,6 +52,7 @@
52#include <asm/sections.h> 52#include <asm/sections.h>
53#include <asm/firmware.h> 53#include <asm/firmware.h>
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/powernv.h>
55 56
56#include "mmu_decl.h" 57#include "mmu_decl.h"
57 58
@@ -436,6 +437,7 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
436void __init mmu_partition_table_init(void) 437void __init mmu_partition_table_init(void)
437{ 438{
438 unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; 439 unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
440 unsigned long ptcr;
439 441
440 BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); 442 BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
441 partition_tb = __va(memblock_alloc_base(patb_size, patb_size, 443 partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
@@ -448,19 +450,31 @@ void __init mmu_partition_table_init(void)
448 * update partition table control register, 450 * update partition table control register,
449 * 64 K size. 451 * 64 K size.
450 */ 452 */
451 mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); 453 ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
454 mtspr(SPRN_PTCR, ptcr);
455 powernv_set_nmmu_ptcr(ptcr);
452} 456}
453 457
454void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, 458void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
455 unsigned long dw1) 459 unsigned long dw1)
456{ 460{
461 unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
462
457 partition_tb[lpid].patb0 = cpu_to_be64(dw0); 463 partition_tb[lpid].patb0 = cpu_to_be64(dw0);
458 partition_tb[lpid].patb1 = cpu_to_be64(dw1); 464 partition_tb[lpid].patb1 = cpu_to_be64(dw1);
459 465
460 /* Global flush of TLBs and partition table caches for this lpid */ 466 /*
467 * Global flush of TLBs and partition table caches for this lpid.
468 * The type of flush (hash or radix) depends on what the previous
469 * use of this partition ID was, not the new use.
470 */
461 asm volatile("ptesync" : : : "memory"); 471 asm volatile("ptesync" : : : "memory");
462 asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : 472 if (old & PATB_HR)
463 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 473 asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
474 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
475 else
476 asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
477 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
464 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 478 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
465} 479}
466EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); 480EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 5c096c01e8bd..94210940112f 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -248,9 +248,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
248 nw = (next - addr) >> PAGE_SHIFT; 248 nw = (next - addr) >> PAGE_SHIFT;
249 249
250 up_write(&mm->mmap_sem); 250 up_write(&mm->mmap_sem);
251 err = -EFAULT;
252 if (__copy_from_user(spp, map, nw * sizeof(u32))) 251 if (__copy_from_user(spp, map, nw * sizeof(u32)))
253 goto out2; 252 return -EFAULT;
254 map += nw; 253 map += nw;
255 down_write(&mm->mmap_sem); 254 down_write(&mm->mmap_sem);
256 255
@@ -262,6 +261,5 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
262 err = 0; 261 err = 0;
263 out: 262 out:
264 up_write(&mm->mmap_sem); 263 up_write(&mm->mmap_sem);
265 out2:
266 return err; 264 return err;
267} 265}
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 89f70073dec8..30cf03f53428 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -157,8 +157,7 @@
157#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \ 157#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \
158 ___PPC_RS(a) | ___PPC_RB(s)) 158 ___PPC_RS(a) | ___PPC_RB(s))
159#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \ 159#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \
160 ___PPC_RS(a) | __PPC_SH(i) | \ 160 ___PPC_RS(a) | __PPC_SH64(i))
161 (((i) & 0x20) >> 4))
162#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ 161#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
163 ___PPC_RS(a) | __PPC_SH(i) | \ 162 ___PPC_RS(a) | __PPC_SH(i) | \
164 __PPC_MB(mb) | __PPC_ME(me)) 163 __PPC_MB(mb) | __PPC_ME(me))
@@ -166,11 +165,11 @@
166 ___PPC_RS(a) | __PPC_SH(i) | \ 165 ___PPC_RS(a) | __PPC_SH(i) | \
167 __PPC_MB(mb) | __PPC_ME(me)) 166 __PPC_MB(mb) | __PPC_ME(me))
168#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \ 167#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \
169 ___PPC_RS(a) | __PPC_SH(i) | \ 168 ___PPC_RS(a) | __PPC_SH64(i) | \
170 __PPC_MB64(mb) | (((i) & 0x20) >> 4)) 169 __PPC_MB64(mb))
171#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ 170#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \
172 ___PPC_RS(a) | __PPC_SH(i) | \ 171 ___PPC_RS(a) | __PPC_SH64(i) | \
173 __PPC_ME64(me) | (((i) & 0x20) >> 4)) 172 __PPC_ME64(me))
174 173
175/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ 174/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
176#define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i)) 175#define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i))
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 7e706f36e364..f9941b3b5770 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -662,16 +662,17 @@ void bpf_jit_compile(struct bpf_prog *fp)
662 */ 662 */
663 bpf_jit_dump(flen, proglen, pass, code_base); 663 bpf_jit_dump(flen, proglen, pass, code_base);
664 664
665 if (image) { 665 bpf_flush_icache(code_base, code_base + (proglen/4));
666 bpf_flush_icache(code_base, code_base + (proglen/4)); 666
667#ifdef CONFIG_PPC64 667#ifdef CONFIG_PPC64
668 /* Function descriptor nastiness: Address + TOC */ 668 /* Function descriptor nastiness: Address + TOC */
669 ((u64 *)image)[0] = (u64)code_base; 669 ((u64 *)image)[0] = (u64)code_base;
670 ((u64 *)image)[1] = local_paca->kernel_toc; 670 ((u64 *)image)[1] = local_paca->kernel_toc;
671#endif 671#endif
672 fp->bpf_func = (void *)image; 672
673 fp->jited = 1; 673 fp->bpf_func = (void *)image;
674 } 674 fp->jited = 1;
675
675out: 676out:
676 kfree(addrs); 677 kfree(addrs);
677 return; 678 return;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index c34166ef76fc..aee2bb817ac6 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -1044,16 +1044,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1044 */ 1044 */
1045 bpf_jit_dump(flen, proglen, pass, code_base); 1045 bpf_jit_dump(flen, proglen, pass, code_base);
1046 1046
1047 if (image) {
1048 bpf_flush_icache(bpf_hdr, image + alloclen);
1049#ifdef PPC64_ELF_ABI_v1 1047#ifdef PPC64_ELF_ABI_v1
1050 /* Function descriptor nastiness: Address + TOC */ 1048 /* Function descriptor nastiness: Address + TOC */
1051 ((u64 *)image)[0] = (u64)code_base; 1049 ((u64 *)image)[0] = (u64)code_base;
1052 ((u64 *)image)[1] = local_paca->kernel_toc; 1050 ((u64 *)image)[1] = local_paca->kernel_toc;
1053#endif 1051#endif
1054 fp->bpf_func = (void *)image; 1052
1055 fp->jited = 1; 1053 fp->bpf_func = (void *)image;
1056 } 1054 fp->jited = 1;
1055
1056 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
1057 1057
1058out: 1058out:
1059 kfree(addrs); 1059 kfree(addrs);
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index a0589aac4163..69794d9389c2 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -24,6 +24,7 @@
24#include <asm/machdep.h> 24#include <asm/machdep.h>
25#include <asm/iommu.h> 25#include <asm/iommu.h>
26#include <asm/ppc-pci.h> 26#include <asm/ppc-pci.h>
27#include <asm/isa-bridge.h>
27 28
28#include "maple.h" 29#include "maple.h"
29 30
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 479c25601612..4ee837e6391a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -237,15 +237,21 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
237 show_fastsleep_workaround_applyonce, 237 show_fastsleep_workaround_applyonce,
238 store_fastsleep_workaround_applyonce); 238 store_fastsleep_workaround_applyonce);
239 239
240/*
241 * The default stop state that will be used by ppc_md.power_save
242 * function on platforms that support stop instruction.
243 */
244u64 pnv_default_stop_val;
245u64 pnv_default_stop_mask;
240 246
241/* 247/*
242 * Used for ppc_md.power_save which needs a function with no parameters 248 * Used for ppc_md.power_save which needs a function with no parameters
243 */ 249 */
244static void power9_idle(void) 250static void power9_idle(void)
245{ 251{
246 /* Requesting stop state 0 */ 252 power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask);
247 power9_idle_stop(0);
248} 253}
254
249/* 255/*
250 * First deep stop state. Used to figure out when to save/restore 256 * First deep stop state. Used to figure out when to save/restore
251 * hypervisor context. 257 * hypervisor context.
@@ -253,9 +259,11 @@ static void power9_idle(void)
253u64 pnv_first_deep_stop_state = MAX_STOP_STATE; 259u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
254 260
255/* 261/*
256 * Deepest stop idle state. Used when a cpu is offlined 262 * psscr value and mask of the deepest stop idle state.
263 * Used when a cpu is offlined.
257 */ 264 */
258u64 pnv_deepest_stop_state; 265u64 pnv_deepest_stop_psscr_val;
266u64 pnv_deepest_stop_psscr_mask;
259 267
260/* 268/*
261 * Power ISA 3.0 idle initialization. 269 * Power ISA 3.0 idle initialization.
@@ -292,53 +300,157 @@ u64 pnv_deepest_stop_state;
292 * Bits 60:63 - Requested Level 300 * Bits 60:63 - Requested Level
293 * Used to specify which power-saving level must be entered on executing 301 * Used to specify which power-saving level must be entered on executing
294 * stop instruction 302 * stop instruction
303 */
304
305int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
306{
307 int err = 0;
308
309 /*
310 * psscr_mask == 0xf indicates an older firmware.
311 * Set remaining fields of psscr to the default values.
312 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
313 */
314 if (*psscr_mask == 0xf) {
315 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
316 *psscr_mask = PSSCR_HV_DEFAULT_MASK;
317 return err;
318 }
319
320 /*
321 * New firmware is expected to set the psscr_val bits correctly.
322 * Validate that the following invariants are correctly maintained by
323 * the new firmware.
324 * - ESL bit value matches the EC bit value.
325 * - ESL bit is set for all the deep stop states.
326 */
327 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
328 err = ERR_EC_ESL_MISMATCH;
329 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
330 GET_PSSCR_ESL(*psscr_val) == 0) {
331 err = ERR_DEEP_STATE_ESL_MISMATCH;
332 }
333
334 return err;
335}
336
337/*
338 * pnv_arch300_idle_init: Initializes the default idle state, first
339 * deep idle state and deepest idle state on
340 * ISA 3.0 CPUs.
295 * 341 *
296 * @np: /ibm,opal/power-mgt device node 342 * @np: /ibm,opal/power-mgt device node
297 * @flags: cpu-idle-state-flags array 343 * @flags: cpu-idle-state-flags array
298 * @dt_idle_states: Number of idle state entries 344 * @dt_idle_states: Number of idle state entries
299 * Returns 0 on success 345 * Returns 0 on success
300 */ 346 */
301static int __init pnv_arch300_idle_init(struct device_node *np, u32 *flags, 347static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
302 int dt_idle_states) 348 int dt_idle_states)
303{ 349{
304 u64 *psscr_val = NULL; 350 u64 *psscr_val = NULL;
351 u64 *psscr_mask = NULL;
352 u32 *residency_ns = NULL;
353 u64 max_residency_ns = 0;
305 int rc = 0, i; 354 int rc = 0, i;
355 bool default_stop_found = false, deepest_stop_found = false;
306 356
307 psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), 357 psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
308 GFP_KERNEL); 358 psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
309 if (!psscr_val) { 359 residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
360 GFP_KERNEL);
361
362 if (!psscr_val || !psscr_mask || !residency_ns) {
310 rc = -1; 363 rc = -1;
311 goto out; 364 goto out;
312 } 365 }
366
313 if (of_property_read_u64_array(np, 367 if (of_property_read_u64_array(np,
314 "ibm,cpu-idle-state-psscr", 368 "ibm,cpu-idle-state-psscr",
315 psscr_val, dt_idle_states)) { 369 psscr_val, dt_idle_states)) {
316 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); 370 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
371 rc = -1;
372 goto out;
373 }
374
375 if (of_property_read_u64_array(np,
376 "ibm,cpu-idle-state-psscr-mask",
377 psscr_mask, dt_idle_states)) {
378 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
379 rc = -1;
380 goto out;
381 }
382
383 if (of_property_read_u32_array(np,
384 "ibm,cpu-idle-state-residency-ns",
385 residency_ns, dt_idle_states)) {
386 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
317 rc = -1; 387 rc = -1;
318 goto out; 388 goto out;
319 } 389 }
320 390
321 /* 391 /*
322 * Set pnv_first_deep_stop_state and pnv_deepest_stop_state. 392 * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
393 * and the pnv_default_stop_{val,mask}.
394 *
323 * pnv_first_deep_stop_state should be set to the first stop 395 * pnv_first_deep_stop_state should be set to the first stop
324 * level to cause hypervisor state loss. 396 * level to cause hypervisor state loss.
325 * pnv_deepest_stop_state should be set to the deepest stop 397 *
326 * stop state. 398 * pnv_deepest_stop_{val,mask} should be set to values corresponding to
399 * the deepest stop state.
400 *
401 * pnv_default_stop_{val,mask} should be set to values corresponding to
402 * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
327 */ 403 */
328 pnv_first_deep_stop_state = MAX_STOP_STATE; 404 pnv_first_deep_stop_state = MAX_STOP_STATE;
329 for (i = 0; i < dt_idle_states; i++) { 405 for (i = 0; i < dt_idle_states; i++) {
406 int err;
330 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; 407 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
331 408
332 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && 409 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
333 (pnv_first_deep_stop_state > psscr_rl)) 410 (pnv_first_deep_stop_state > psscr_rl))
334 pnv_first_deep_stop_state = psscr_rl; 411 pnv_first_deep_stop_state = psscr_rl;
335 412
336 if (pnv_deepest_stop_state < psscr_rl) 413 err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
337 pnv_deepest_stop_state = psscr_rl; 414 flags[i]);
415 if (err) {
416 report_invalid_psscr_val(psscr_val[i], err);
417 continue;
418 }
419
420 if (max_residency_ns < residency_ns[i]) {
421 max_residency_ns = residency_ns[i];
422 pnv_deepest_stop_psscr_val = psscr_val[i];
423 pnv_deepest_stop_psscr_mask = psscr_mask[i];
424 deepest_stop_found = true;
425 }
426
427 if (!default_stop_found &&
428 (flags[i] & OPAL_PM_STOP_INST_FAST)) {
429 pnv_default_stop_val = psscr_val[i];
430 pnv_default_stop_mask = psscr_mask[i];
431 default_stop_found = true;
432 }
433 }
434
435 if (!default_stop_found) {
436 pnv_default_stop_val = PSSCR_HV_DEFAULT_VAL;
437 pnv_default_stop_mask = PSSCR_HV_DEFAULT_MASK;
438 pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n",
439 pnv_default_stop_val, pnv_default_stop_mask);
440 }
441
442 if (!deepest_stop_found) {
443 pnv_deepest_stop_psscr_val = PSSCR_HV_DEFAULT_VAL;
444 pnv_deepest_stop_psscr_mask = PSSCR_HV_DEFAULT_MASK;
445 pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n",
446 pnv_deepest_stop_psscr_val,
447 pnv_deepest_stop_psscr_mask);
338 } 448 }
339 449
340out: 450out:
341 kfree(psscr_val); 451 kfree(psscr_val);
452 kfree(psscr_mask);
453 kfree(residency_ns);
342 return rc; 454 return rc;
343} 455}
344 456
@@ -373,7 +485,7 @@ static void __init pnv_probe_idle_states(void)
373 } 485 }
374 486
375 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 487 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
376 if (pnv_arch300_idle_init(np, flags, dt_idle_states)) 488 if (pnv_power9_idle_init(np, flags, dt_idle_states))
377 goto out; 489 goto out;
378 } 490 }
379 491
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index c0a8201cb4d9..88f3c61eec95 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -180,7 +180,8 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
180 "An XSCOM operation completed", 180 "An XSCOM operation completed",
181 "SCOM has set a reserved FIR bit to cause recovery", 181 "SCOM has set a reserved FIR bit to cause recovery",
182 "Debug trigger has set a reserved FIR bit to cause recovery", 182 "Debug trigger has set a reserved FIR bit to cause recovery",
183 "A hypervisor resource error occurred" 183 "A hypervisor resource error occurred",
184 "CAPP recovery process is in progress",
184 }; 185 };
185 186
186 /* Print things out */ 187 /* Print things out */
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 998316bf2dad..ecdcba9d1220 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -183,8 +183,9 @@ void opal_event_shutdown(void)
183int __init opal_event_init(void) 183int __init opal_event_init(void)
184{ 184{
185 struct device_node *dn, *opal_node; 185 struct device_node *dn, *opal_node;
186 const __be32 *irqs; 186 const char **names;
187 int i, irqlen, rc = 0; 187 u32 *irqs;
188 int i, rc;
188 189
189 opal_node = of_find_node_by_path("/ibm,opal"); 190 opal_node = of_find_node_by_path("/ibm,opal");
190 if (!opal_node) { 191 if (!opal_node) {
@@ -209,31 +210,56 @@ int __init opal_event_init(void)
209 goto out; 210 goto out;
210 } 211 }
211 212
212 /* Get interrupt property */ 213 /* Get opal-interrupts property and names if present */
213 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); 214 rc = of_property_count_u32_elems(opal_node, "opal-interrupts");
214 opal_irq_count = irqs ? (irqlen / 4) : 0; 215 if (rc < 0)
216 goto out;
217
218 opal_irq_count = rc;
215 pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); 219 pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
216 220
217 /* Install interrupt handlers */ 221 irqs = kcalloc(opal_irq_count, sizeof(*irqs), GFP_KERNEL);
222 names = kcalloc(opal_irq_count, sizeof(*names), GFP_KERNEL);
218 opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); 223 opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL);
219 for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { 224
220 unsigned int irq, virq; 225 if (WARN_ON(!irqs || !names || !opal_irqs))
226 goto out_free;
227
228 rc = of_property_read_u32_array(opal_node, "opal-interrupts",
229 irqs, opal_irq_count);
230 if (rc < 0) {
231 pr_err("Error %d reading opal-interrupts array\n", rc);
232 goto out_free;
233 }
234
235 /* It's not an error for the names to be missing */
236 of_property_read_string_array(opal_node, "opal-interrupts-names",
237 names, opal_irq_count);
238
239 /* Install interrupt handlers */
240 for (i = 0; i < opal_irq_count; i++) {
241 unsigned int virq;
242 char *name;
221 243
222 /* Get hardware and virtual IRQ */ 244 /* Get hardware and virtual IRQ */
223 irq = be32_to_cpup(irqs); 245 virq = irq_create_mapping(NULL, irqs[i]);
224 virq = irq_create_mapping(NULL, irq);
225 if (!virq) { 246 if (!virq) {
226 pr_warn("Failed to map irq 0x%x\n", irq); 247 pr_warn("Failed to map irq 0x%x\n", irqs[i]);
227 continue; 248 continue;
228 } 249 }
229 250
251 if (names[i] && strlen(names[i]))
252 name = kasprintf(GFP_KERNEL, "opal-%s", names[i]);
253 else
254 name = kasprintf(GFP_KERNEL, "opal");
255
230 /* Install interrupt handler */ 256 /* Install interrupt handler */
231 rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW, 257 rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
232 "opal", NULL); 258 name, NULL);
233 if (rc) { 259 if (rc) {
234 irq_dispose_mapping(virq); 260 irq_dispose_mapping(virq);
235 pr_warn("Error %d requesting irq %d (0x%x)\n", 261 pr_warn("Error %d requesting irq %d (0x%x)\n",
236 rc, virq, irq); 262 rc, virq, irqs[i]);
237 continue; 263 continue;
238 } 264 }
239 265
@@ -241,6 +267,9 @@ int __init opal_event_init(void)
241 opal_irqs[i] = virq; 267 opal_irqs[i] = virq;
242 } 268 }
243 269
270out_free:
271 kfree(irqs);
272 kfree(names);
244out: 273out:
245 of_node_put(opal_node); 274 of_node_put(opal_node);
246 return rc; 275 return rc;
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index 4886eb8b6381..a91d7876fae2 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -18,11 +18,11 @@
18 18
19#include <asm/machdep.h> 19#include <asm/machdep.h>
20#include <asm/firmware.h> 20#include <asm/firmware.h>
21#include <asm/xics.h>
22#include <asm/opal.h> 21#include <asm/opal.h>
23#include <asm/prom.h> 22#include <asm/prom.h>
24#include <linux/uaccess.h> 23#include <linux/uaccess.h>
25#include <asm/debug.h> 24#include <asm/debug.h>
25#include <asm/isa-bridge.h>
26 26
27static int opal_lpc_chip_id = -1; 27static int opal_lpc_chip_id = -1;
28 28
@@ -386,7 +386,7 @@ static int opal_lpc_init_debugfs(void)
386machine_device_initcall(powernv, opal_lpc_init_debugfs); 386machine_device_initcall(powernv, opal_lpc_init_debugfs);
387#endif /* CONFIG_DEBUG_FS */ 387#endif /* CONFIG_DEBUG_FS */
388 388
389void opal_lpc_init(void) 389void __init opal_lpc_init(void)
390{ 390{
391 struct device_node *np; 391 struct device_node *np;
392 392
@@ -406,9 +406,17 @@ void opal_lpc_init(void)
406 if (opal_lpc_chip_id < 0) 406 if (opal_lpc_chip_id < 0)
407 return; 407 return;
408 408
409 /* Setup special IO ops */ 409 /* Does it support direct mapping ? */
410 ppc_pci_io = opal_lpc_io; 410 if (of_get_property(np, "ranges", NULL)) {
411 isa_io_special = true; 411 pr_info("OPAL: Found memory mapped LPC bus on chip %d\n",
412 412 opal_lpc_chip_id);
413 pr_info("OPAL: Power8 LPC bus found, chip ID %d\n", opal_lpc_chip_id); 413 isa_bridge_init_non_pci(np);
414 } else {
415 pr_info("OPAL: Found non-mapped LPC bus on chip %d\n",
416 opal_lpc_chip_id);
417
418 /* Setup special IO ops */
419 ppc_pci_io = opal_lpc_io;
420 isa_io_special = true;
421 }
414} 422}
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index 39d6ff9e5630..7a9cde0cfbd1 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -123,6 +123,10 @@ void __init opal_msglog_init(void)
123 return; 123 return;
124 } 124 }
125 125
126 /* Report maximum size */
127 opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) +
128 be32_to_cpu(mc->obuf_size);
129
126 opal_memcons = mc; 130 opal_memcons = mc;
127} 131}
128 132
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3aa40f1b20f5..6693f75e93d1 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -58,14 +58,16 @@ END_FTR_SECTION(0, 1); \
58 58
59#define OPAL_CALL(name, token) \ 59#define OPAL_CALL(name, token) \
60 _GLOBAL_TOC(name); \ 60 _GLOBAL_TOC(name); \
61 mfmsr r12; \
61 mflr r0; \ 62 mflr r0; \
63 andi. r11,r12,MSR_IR|MSR_DR; \
62 std r0,PPC_LR_STKOFF(r1); \ 64 std r0,PPC_LR_STKOFF(r1); \
63 li r0,token; \ 65 li r0,token; \
66 beq opal_real_call; \
64 OPAL_BRANCH(opal_tracepoint_entry) \ 67 OPAL_BRANCH(opal_tracepoint_entry) \
65 mfcr r12; \ 68 mfcr r11; \
66 stw r12,8(r1); \ 69 stw r11,8(r1); \
67 li r11,0; \ 70 li r11,0; \
68 mfmsr r12; \
69 ori r11,r11,MSR_EE; \ 71 ori r11,r11,MSR_EE; \
70 std r12,PACASAVEDMSR(r13); \ 72 std r12,PACASAVEDMSR(r13); \
71 andc r12,r12,r11; \ 73 andc r12,r12,r11; \
@@ -98,6 +100,30 @@ opal_return:
98 mtcr r4; 100 mtcr r4;
99 rfid 101 rfid
100 102
103opal_real_call:
104 mfcr r11
105 stw r11,8(r1)
106 /* Set opal return address */
107 LOAD_REG_ADDR(r11, opal_return_realmode)
108 mtlr r11
109 li r11,MSR_LE
110 andc r12,r12,r11
111 mtspr SPRN_HSRR1,r12
112 LOAD_REG_ADDR(r11,opal)
113 ld r12,8(r11)
114 ld r2,0(r11)
115 mtspr SPRN_HSRR0,r12
116 hrfid
117
118opal_return_realmode:
119 FIXUP_ENDIAN
120 ld r2,PACATOC(r13);
121 lwz r11,8(r1);
122 ld r12,PPC_LR_STKOFF(r1)
123 mtcr r11;
124 mtlr r12
125 blr
126
101#ifdef CONFIG_TRACEPOINTS 127#ifdef CONFIG_TRACEPOINTS
102opal_tracepoint_entry: 128opal_tracepoint_entry:
103 stdu r1,-STACKFRAMESIZE(r1) 129 stdu r1,-STACKFRAMESIZE(r1)
@@ -146,7 +172,7 @@ opal_tracepoint_entry:
146opal_tracepoint_return: 172opal_tracepoint_return:
147 std r3,STK_REG(R31)(r1) 173 std r3,STK_REG(R31)(r1)
148 mr r4,r3 174 mr r4,r3
149 ld r0,STK_REG(R23)(r1) 175 ld r3,STK_REG(R23)(r1)
150 bl __trace_opal_exit 176 bl __trace_opal_exit
151 ld r3,STK_REG(R31)(r1) 177 ld r3,STK_REG(R31)(r1)
152 addi r1,r1,STACKFRAMESIZE 178 addi r1,r1,STACKFRAMESIZE
@@ -155,36 +181,6 @@ opal_tracepoint_return:
155 blr 181 blr
156#endif 182#endif
157 183
158#define OPAL_CALL_REAL(name, token) \
159 _GLOBAL_TOC(name); \
160 mflr r0; \
161 std r0,PPC_LR_STKOFF(r1); \
162 li r0,token; \
163 mfcr r12; \
164 stw r12,8(r1); \
165 \
166 /* Set opal return address */ \
167 LOAD_REG_ADDR(r11, opal_return_realmode); \
168 mtlr r11; \
169 mfmsr r12; \
170 li r11,MSR_LE; \
171 andc r12,r12,r11; \
172 mtspr SPRN_HSRR1,r12; \
173 LOAD_REG_ADDR(r11,opal); \
174 ld r12,8(r11); \
175 ld r2,0(r11); \
176 mtspr SPRN_HSRR0,r12; \
177 hrfid
178
179opal_return_realmode:
180 FIXUP_ENDIAN
181 ld r2,PACATOC(r13);
182 lwz r11,8(r1);
183 ld r12,PPC_LR_STKOFF(r1)
184 mtcr r11;
185 mtlr r12
186 blr
187
188 184
189OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); 185OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
190OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); 186OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
@@ -208,7 +204,6 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
208OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); 204OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
209OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); 205OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
210OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); 206OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
211OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE);
212OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); 207OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
213OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); 208OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
214OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); 209OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
@@ -264,7 +259,6 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
264OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); 259OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
265OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); 260OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
266OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); 261OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
267OPAL_CALL_REAL(opal_rm_resync_timebase, OPAL_RESYNC_TIMEBASE);
268OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); 262OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
269OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); 263OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
270OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); 264OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
@@ -280,9 +274,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
280OPAL_CALL(opal_get_param, OPAL_GET_PARAM); 274OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
281OPAL_CALL(opal_set_param, OPAL_SET_PARAM); 275OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
282OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); 276OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
283OPAL_CALL_REAL(opal_rm_handle_hmi, OPAL_HANDLE_HMI);
284OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); 277OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
285OPAL_CALL_REAL(opal_rm_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
286OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); 278OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
287OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); 279OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
288OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); 280OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
@@ -304,11 +296,8 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
304OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); 296OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
305OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); 297OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
306OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); 298OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
307OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR);
308OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); 299OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
309OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); 300OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
310OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI);
311OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); 301OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
312OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR);
313OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); 302OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
314OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); 303OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 282293572dc8..86d9fde93c17 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -875,6 +875,17 @@ int opal_error_code(int rc)
875 } 875 }
876} 876}
877 877
878void powernv_set_nmmu_ptcr(unsigned long ptcr)
879{
880 int rc;
881
882 if (firmware_has_feature(FW_FEATURE_OPAL)) {
883 rc = opal_nmmu_set_ptcr(-1UL, ptcr);
884 if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
885 pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
886 }
887}
888
878EXPORT_SYMBOL_GPL(opal_poll_events); 889EXPORT_SYMBOL_GPL(opal_poll_events);
879EXPORT_SYMBOL_GPL(opal_rtc_read); 890EXPORT_SYMBOL_GPL(opal_rtc_read);
880EXPORT_SYMBOL_GPL(opal_rtc_write); 891EXPORT_SYMBOL_GPL(opal_rtc_write);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b07680cd2518..8278f43ad4b8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1326,7 +1326,9 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
1326 else 1326 else
1327 m64_bars = 1; 1327 m64_bars = 1;
1328 1328
1329 pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); 1329 pdn->m64_map = kmalloc_array(m64_bars,
1330 sizeof(*pdn->m64_map),
1331 GFP_KERNEL);
1330 if (!pdn->m64_map) 1332 if (!pdn->m64_map)
1331 return -ENOMEM; 1333 return -ENOMEM;
1332 /* Initialize the m64_map to IODA_INVALID_M64 */ 1334 /* Initialize the m64_map to IODA_INVALID_M64 */
@@ -1593,8 +1595,9 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
1593 1595
1594 /* Allocating pe_num_map */ 1596 /* Allocating pe_num_map */
1595 if (pdn->m64_single_mode) 1597 if (pdn->m64_single_mode)
1596 pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, 1598 pdn->pe_num_map = kmalloc_array(num_vfs,
1597 GFP_KERNEL); 1599 sizeof(*pdn->pe_num_map),
1600 GFP_KERNEL);
1598 else 1601 else
1599 pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); 1602 pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
1600 1603
@@ -1950,7 +1953,12 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
1950 struct pnv_phb *phb = pe->phb; 1953 struct pnv_phb *phb = pe->phb;
1951 unsigned int shift = tbl->it_page_shift; 1954 unsigned int shift = tbl->it_page_shift;
1952 1955
1953 if (phb->type == PNV_PHB_NPU) { 1956 /*
1957 * NVLink1 can use the TCE kill register directly as
1958 * it's the same as PHB3. NVLink2 is different and
1959 * should go via the OPAL call.
1960 */
1961 if (phb->model == PNV_PHB_MODEL_NPU) {
1954 /* 1962 /*
1955 * The NVLink hardware does not support TCE kill 1963 * The NVLink hardware does not support TCE kill
1956 * per TCE entry so we have to invalidate 1964 * per TCE entry so we have to invalidate
@@ -1962,11 +1970,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
1962 if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) 1970 if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
1963 pnv_pci_phb3_tce_invalidate(pe, rm, shift, 1971 pnv_pci_phb3_tce_invalidate(pe, rm, shift,
1964 index, npages); 1972 index, npages);
1965 else if (rm)
1966 opal_rm_pci_tce_kill(phb->opal_id,
1967 OPAL_PCI_TCE_KILL_PAGES,
1968 pe->pe_number, 1u << shift,
1969 index << shift, npages);
1970 else 1973 else
1971 opal_pci_tce_kill(phb->opal_id, 1974 opal_pci_tce_kill(phb->opal_id,
1972 OPAL_PCI_TCE_KILL_PAGES, 1975 OPAL_PCI_TCE_KILL_PAGES,
@@ -3671,6 +3674,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3671 phb->model = PNV_PHB_MODEL_PHB3; 3674 phb->model = PNV_PHB_MODEL_PHB3;
3672 else if (of_device_is_compatible(np, "ibm,power8-npu-pciex")) 3675 else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
3673 phb->model = PNV_PHB_MODEL_NPU; 3676 phb->model = PNV_PHB_MODEL_NPU;
3677 else if (of_device_is_compatible(np, "ibm,power9-npu-pciex"))
3678 phb->model = PNV_PHB_MODEL_NPU2;
3674 else 3679 else
3675 phb->model = PNV_PHB_MODEL_UNKNOWN; 3680 phb->model = PNV_PHB_MODEL_UNKNOWN;
3676 3681
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index c6d554fe585c..eb835e977e33 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -940,6 +940,13 @@ void __init pnv_pci_init(void)
940 for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") 940 for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
941 pnv_pci_init_npu_phb(np); 941 pnv_pci_init_npu_phb(np);
942 942
943 /*
944 * Look for NPU2 PHBs which we treat mostly as NPU PHBs with
945 * the exception of TCE kill which requires an OPAL call.
946 */
947 for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
948 pnv_pci_init_npu_phb(np);
949
943 /* Configure IOMMU DMA hooks */ 950 /* Configure IOMMU DMA hooks */
944 set_pci_dma_ops(&dma_iommu_ops); 951 set_pci_dma_ops(&dma_iommu_ops);
945} 952}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index e64df7894d6e..e1d3e5526b54 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -19,6 +19,7 @@ enum pnv_phb_model {
19 PNV_PHB_MODEL_P7IOC, 19 PNV_PHB_MODEL_P7IOC,
20 PNV_PHB_MODEL_PHB3, 20 PNV_PHB_MODEL_PHB3,
21 PNV_PHB_MODEL_NPU, 21 PNV_PHB_MODEL_NPU,
22 PNV_PHB_MODEL_NPU2,
22}; 23};
23 24
24#define PNV_PCI_DIAG_BUF_SIZE 8192 25#define PNV_PCI_DIAG_BUF_SIZE 8192
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index da7c843ac7f1..613052232475 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -18,7 +18,8 @@ static inline void pnv_pci_shutdown(void) { }
18#endif 18#endif
19 19
20extern u32 pnv_get_supported_cpuidle_states(void); 20extern u32 pnv_get_supported_cpuidle_states(void);
21extern u64 pnv_deepest_stop_state; 21extern u64 pnv_deepest_stop_psscr_val;
22extern u64 pnv_deepest_stop_psscr_mask;
22 23
23extern void pnv_lpc_init(void); 24extern void pnv_lpc_init(void);
24 25
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index eec0e8d0454d..e39e6c428af1 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -184,15 +184,17 @@ static void pnv_smp_cpu_kill_self(void)
184 184
185 ppc64_runlatch_off(); 185 ppc64_runlatch_off();
186 186
187 if (cpu_has_feature(CPU_FTR_ARCH_300)) 187 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
188 srr1 = power9_idle_stop(pnv_deepest_stop_state); 188 srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
189 else if (idle_states & OPAL_PM_WINKLE_ENABLED) 189 pnv_deepest_stop_psscr_mask);
190 } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
190 srr1 = power7_winkle(); 191 srr1 = power7_winkle();
191 else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || 192 } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
192 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 193 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
193 srr1 = power7_sleep(); 194 srr1 = power7_sleep();
194 else 195 } else {
195 srr1 = power7_nap(1); 196 srr1 = power7_nap(1);
197 }
196 198
197 ppc64_runlatch_on(); 199 ppc64_runlatch_on();
198 200
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index e1c280a95d58..30ec04f1c67c 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -17,7 +17,6 @@ config PPC_PSERIES
17 select PPC_UDBG_16550 17 select PPC_UDBG_16550
18 select PPC_NATIVE 18 select PPC_NATIVE
19 select PPC_DOORBELL 19 select PPC_DOORBELL
20 select HAVE_CONTEXT_TRACKING
21 select HOTPLUG_CPU if SMP 20 select HOTPLUG_CPU if SMP
22 select ARCH_RANDOM 21 select ARCH_RANDOM
23 select PPC_DOORBELL 22 select PPC_DOORBELL
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 4839db385bb0..4ac419c7eb4c 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -76,7 +76,7 @@ module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
76MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 76MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
77 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 77 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
78module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR); 78module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
79MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove " 79MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
80 "before loaning resumes. " 80 "before loaning resumes. "
81 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); 81 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
82module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); 82module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 5cb2e4beffc5..d3a81e746fc4 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -551,7 +551,13 @@ dlpar_store_out:
551 return rc ? rc : count; 551 return rc ? rc : count;
552} 552}
553 553
554static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store); 554static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
555 char *buf)
556{
557 return sprintf(buf, "%s\n", "memory,cpu");
558}
559
560static CLASS_ATTR(dlpar, S_IWUSR | S_IRUSR, dlpar_show, dlpar_store);
555 561
556static int __init pseries_dlpar_init(void) 562static int __init pseries_dlpar_init(void)
557{ 563{
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index ea7f09bd73b1..63cc82ad58ac 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -64,6 +64,7 @@ hypertas_fw_features_table[] = {
64 {FW_FEATURE_VPHN, "hcall-vphn"}, 64 {FW_FEATURE_VPHN, "hcall-vphn"},
65 {FW_FEATURE_SET_MODE, "hcall-set-mode"}, 65 {FW_FEATURE_SET_MODE, "hcall-set-mode"},
66 {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"}, 66 {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"},
67 {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"},
67}; 68};
68 69
69/* Build up the firmware features bitmask using the contents of 70/* Build up the firmware features bitmask using the contents of
@@ -126,7 +127,7 @@ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
126 index = OV5_INDX(vec5_fw_features_table[i].feature); 127 index = OV5_INDX(vec5_fw_features_table[i].feature);
127 feat = OV5_FEAT(vec5_fw_features_table[i].feature); 128 feat = OV5_FEAT(vec5_fw_features_table[i].feature);
128 129
129 if (vec5[index] & feat) 130 if (index < len && (vec5[index] & feat))
130 powerpc_firmware_features |= 131 powerpc_firmware_features |=
131 vec5_fw_features_table[i].val; 132 vec5_fw_features_table[i].val;
132 } 133 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 2617f9f356bd..3381c20edbc0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -446,9 +446,7 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
446 /* Update memory regions for memory remove */ 446 /* Update memory regions for memory remove */
447 memblock_remove(lmb->base_addr, block_sz); 447 memblock_remove(lmb->base_addr, block_sz);
448 448
449 dlpar_release_drc(lmb->drc_index);
450 dlpar_remove_device_tree_lmb(lmb); 449 dlpar_remove_device_tree_lmb(lmb);
451
452 return 0; 450 return 0;
453} 451}
454 452
@@ -516,6 +514,7 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
516 if (!lmbs[i].reserved) 514 if (!lmbs[i].reserved)
517 continue; 515 continue;
518 516
517 dlpar_release_drc(lmbs[i].drc_index);
519 pr_info("Memory at %llx was hot-removed\n", 518 pr_info("Memory at %llx was hot-removed\n",
520 lmbs[i].base_addr); 519 lmbs[i].base_addr);
521 520
@@ -545,6 +544,9 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
545 if (lmbs[i].drc_index == drc_index) { 544 if (lmbs[i].drc_index == drc_index) {
546 lmb_found = 1; 545 lmb_found = 1;
547 rc = dlpar_remove_lmb(&lmbs[i]); 546 rc = dlpar_remove_lmb(&lmbs[i]);
547 if (!rc)
548 dlpar_release_drc(lmbs[i].drc_index);
549
548 break; 550 break;
549 } 551 }
550 } 552 }
@@ -561,6 +563,44 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
561 return rc; 563 return rc;
562} 564}
563 565
566static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
567{
568 struct of_drconf_cell *lmbs;
569 u32 num_lmbs, *p;
570 int lmb_found;
571 int i, rc;
572
573 pr_info("Attempting to update LMB, drc index %x\n", drc_index);
574
575 p = prop->value;
576 num_lmbs = *p++;
577 lmbs = (struct of_drconf_cell *)p;
578
579 lmb_found = 0;
580 for (i = 0; i < num_lmbs; i++) {
581 if (lmbs[i].drc_index == drc_index) {
582 lmb_found = 1;
583 rc = dlpar_remove_lmb(&lmbs[i]);
584 if (!rc) {
585 rc = dlpar_add_lmb(&lmbs[i]);
586 if (rc)
587 dlpar_release_drc(lmbs[i].drc_index);
588 }
589 break;
590 }
591 }
592
593 if (!lmb_found)
594 rc = -EINVAL;
595
596 if (rc)
597 pr_info("Failed to update memory at %llx\n",
598 lmbs[i].base_addr);
599 else
600 pr_info("Memory at %llx was updated\n", lmbs[i].base_addr);
601
602 return rc;
603}
564#else 604#else
565static inline int pseries_remove_memblock(unsigned long base, 605static inline int pseries_remove_memblock(unsigned long base,
566 unsigned int memblock_size) 606 unsigned int memblock_size)
@@ -599,10 +639,6 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
599 if (lmb->flags & DRCONF_MEM_ASSIGNED) 639 if (lmb->flags & DRCONF_MEM_ASSIGNED)
600 return -EINVAL; 640 return -EINVAL;
601 641
602 rc = dlpar_acquire_drc(lmb->drc_index);
603 if (rc)
604 return rc;
605
606 rc = dlpar_add_device_tree_lmb(lmb); 642 rc = dlpar_add_device_tree_lmb(lmb);
607 if (rc) { 643 if (rc) {
608 pr_err("Couldn't update device tree for drc index %x\n", 644 pr_err("Couldn't update device tree for drc index %x\n",
@@ -618,12 +654,10 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
618 654
619 /* Add the memory */ 655 /* Add the memory */
620 rc = add_memory(nid, lmb->base_addr, block_sz); 656 rc = add_memory(nid, lmb->base_addr, block_sz);
621 if (rc) { 657 if (rc)
622 dlpar_remove_device_tree_lmb(lmb); 658 dlpar_remove_device_tree_lmb(lmb);
623 dlpar_release_drc(lmb->drc_index); 659 else
624 } else {
625 lmb->flags |= DRCONF_MEM_ASSIGNED; 660 lmb->flags |= DRCONF_MEM_ASSIGNED;
626 }
627 661
628 return rc; 662 return rc;
629} 663}
@@ -655,10 +689,16 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
655 return -EINVAL; 689 return -EINVAL;
656 690
657 for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) { 691 for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
658 rc = dlpar_add_lmb(&lmbs[i]); 692 rc = dlpar_acquire_drc(lmbs[i].drc_index);
659 if (rc) 693 if (rc)
660 continue; 694 continue;
661 695
696 rc = dlpar_add_lmb(&lmbs[i]);
697 if (rc) {
698 dlpar_release_drc(lmbs[i].drc_index);
699 continue;
700 }
701
662 lmbs_added++; 702 lmbs_added++;
663 703
664 /* Mark this lmb so we can remove it later if all of the 704 /* Mark this lmb so we can remove it later if all of the
@@ -678,6 +718,8 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
678 if (rc) 718 if (rc)
679 pr_err("Failed to remove LMB, drc index %x\n", 719 pr_err("Failed to remove LMB, drc index %x\n",
680 be32_to_cpu(lmbs[i].drc_index)); 720 be32_to_cpu(lmbs[i].drc_index));
721 else
722 dlpar_release_drc(lmbs[i].drc_index);
681 } 723 }
682 rc = -EINVAL; 724 rc = -EINVAL;
683 } else { 725 } else {
@@ -711,7 +753,13 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
711 for (i = 0; i < num_lmbs; i++) { 753 for (i = 0; i < num_lmbs; i++) {
712 if (lmbs[i].drc_index == drc_index) { 754 if (lmbs[i].drc_index == drc_index) {
713 lmb_found = 1; 755 lmb_found = 1;
714 rc = dlpar_add_lmb(&lmbs[i]); 756 rc = dlpar_acquire_drc(lmbs[i].drc_index);
757 if (!rc) {
758 rc = dlpar_add_lmb(&lmbs[i]);
759 if (rc)
760 dlpar_release_drc(lmbs[i].drc_index);
761 }
762
715 break; 763 break;
716 } 764 }
717 } 765 }
@@ -769,6 +817,9 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
769 else 817 else
770 rc = -EINVAL; 818 rc = -EINVAL;
771 break; 819 break;
820 case PSERIES_HP_ELOG_ACTION_READD:
821 rc = dlpar_memory_readd_by_index(drc_index, prop);
822 break;
772 default: 823 default:
773 pr_err("Invalid action (%d) specified\n", hp_elog->action); 824 pr_err("Invalid action (%d) specified\n", hp_elog->action);
774 rc = -EINVAL; 825 rc = -EINVAL;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5dc1c3c6e716..251060cf1713 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -27,6 +27,8 @@
27#include <linux/console.h> 27#include <linux/console.h>
28#include <linux/export.h> 28#include <linux/export.h>
29#include <linux/jump_label.h> 29#include <linux/jump_label.h>
30#include <linux/delay.h>
31#include <linux/stop_machine.h>
30#include <asm/processor.h> 32#include <asm/processor.h>
31#include <asm/mmu.h> 33#include <asm/mmu.h>
32#include <asm/page.h> 34#include <asm/page.h>
@@ -609,6 +611,135 @@ static int __init disable_bulk_remove(char *str)
609 611
610__setup("bulk_remove=", disable_bulk_remove); 612__setup("bulk_remove=", disable_bulk_remove);
611 613
614#define HPT_RESIZE_TIMEOUT 10000 /* ms */
615
616struct hpt_resize_state {
617 unsigned long shift;
618 int commit_rc;
619};
620
621static int pseries_lpar_resize_hpt_commit(void *data)
622{
623 struct hpt_resize_state *state = data;
624
625 state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
626 if (state->commit_rc != H_SUCCESS)
627 return -EIO;
628
629 /* Hypervisor has transitioned the HTAB, update our globals */
630 ppc64_pft_size = state->shift;
631 htab_size_bytes = 1UL << ppc64_pft_size;
632 htab_hash_mask = (htab_size_bytes >> 7) - 1;
633
634 return 0;
635}
636
637/* Must be called in user context */
638static int pseries_lpar_resize_hpt(unsigned long shift)
639{
640 struct hpt_resize_state state = {
641 .shift = shift,
642 .commit_rc = H_FUNCTION,
643 };
644 unsigned int delay, total_delay = 0;
645 int rc;
646 ktime_t t0, t1, t2;
647
648 might_sleep();
649
650 if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
651 return -ENODEV;
652
653 printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n",
654 shift);
655
656 t0 = ktime_get();
657
658 rc = plpar_resize_hpt_prepare(0, shift);
659 while (H_IS_LONG_BUSY(rc)) {
660 delay = get_longbusy_msecs(rc);
661 total_delay += delay;
662 if (total_delay > HPT_RESIZE_TIMEOUT) {
663 /* prepare with shift==0 cancels an in-progress resize */
664 rc = plpar_resize_hpt_prepare(0, 0);
665 if (rc != H_SUCCESS)
666 printk(KERN_WARNING
667 "lpar: Unexpected error %d cancelling timed out HPT resize\n",
668 rc);
669 return -ETIMEDOUT;
670 }
671 msleep(delay);
672 rc = plpar_resize_hpt_prepare(0, shift);
673 };
674
675 switch (rc) {
676 case H_SUCCESS:
677 /* Continue on */
678 break;
679
680 case H_PARAMETER:
681 return -EINVAL;
682 case H_RESOURCE:
683 return -EPERM;
684 default:
685 printk(KERN_WARNING
686 "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n",
687 rc);
688 return -EIO;
689 }
690
691 t1 = ktime_get();
692
693 rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
694
695 t2 = ktime_get();
696
697 if (rc != 0) {
698 switch (state.commit_rc) {
699 case H_PTEG_FULL:
700 printk(KERN_WARNING
701 "lpar: Hash collision while resizing HPT\n");
702 return -ENOSPC;
703
704 default:
705 printk(KERN_WARNING
706 "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
707 state.commit_rc);
708 return -EIO;
709 };
710 }
711
712 printk(KERN_INFO
713 "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
714 shift, (long long) ktime_ms_delta(t1, t0),
715 (long long) ktime_ms_delta(t2, t1));
716
717 return 0;
718}
719
720/* Actually only used for radix, so far */
721static int pseries_lpar_register_process_table(unsigned long base,
722 unsigned long page_size, unsigned long table_size)
723{
724 long rc;
725 unsigned long flags = PROC_TABLE_NEW;
726
727 if (radix_enabled())
728 flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
729 for (;;) {
730 rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
731 page_size, table_size);
732 if (!H_IS_LONG_BUSY(rc))
733 break;
734 mdelay(get_longbusy_msecs(rc));
735 }
736 if (rc != H_SUCCESS) {
737 pr_err("Failed to register process table (rc=%ld)\n", rc);
738 BUG();
739 }
740 return rc;
741}
742
612void __init hpte_init_pseries(void) 743void __init hpte_init_pseries(void)
613{ 744{
614 mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; 745 mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
@@ -620,6 +751,13 @@ void __init hpte_init_pseries(void)
620 mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; 751 mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
621 mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; 752 mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
622 mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; 753 mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
754 mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
755}
756
757void radix_init_pseries(void)
758{
759 pr_info("Using radix MMU under hypervisor\n");
760 register_process_table = pseries_lpar_register_process_table;
623} 761}
624 762
625#ifdef CONFIG_PPC_SMLPAR 763#ifdef CONFIG_PPC_SMLPAR
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index a560a98bcf3b..5a0c7ba429ce 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -39,6 +39,7 @@ struct update_props_workarea {
39#define ADD_DT_NODE 0x03000000 39#define ADD_DT_NODE 0x03000000
40 40
41#define MIGRATION_SCOPE (1) 41#define MIGRATION_SCOPE (1)
42#define PRRN_SCOPE -2
42 43
43static int mobility_rtas_call(int token, char *buf, s32 scope) 44static int mobility_rtas_call(int token, char *buf, s32 scope)
44{ 45{
@@ -236,6 +237,35 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
236 return rc; 237 return rc;
237} 238}
238 239
240static void prrn_update_node(__be32 phandle)
241{
242 struct pseries_hp_errorlog *hp_elog;
243 struct device_node *dn;
244
245 /*
246 * If a node is found from a the given phandle, the phandle does not
247 * represent the drc index of an LMB and we can ignore.
248 */
249 dn = of_find_node_by_phandle(be32_to_cpu(phandle));
250 if (dn) {
251 of_node_put(dn);
252 return;
253 }
254
255 hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
256 if(!hp_elog)
257 return;
258
259 hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
260 hp_elog->action = PSERIES_HP_ELOG_ACTION_READD;
261 hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
262 hp_elog->_drc_u.drc_index = phandle;
263
264 queue_hotplug_event(hp_elog, NULL, NULL);
265
266 kfree(hp_elog);
267}
268
239int pseries_devicetree_update(s32 scope) 269int pseries_devicetree_update(s32 scope)
240{ 270{
241 char *rtas_buf; 271 char *rtas_buf;
@@ -274,6 +304,10 @@ int pseries_devicetree_update(s32 scope)
274 break; 304 break;
275 case UPDATE_DT_NODE: 305 case UPDATE_DT_NODE:
276 update_dt_node(phandle, scope); 306 update_dt_node(phandle, scope);
307
308 if (scope == PRRN_SCOPE)
309 prrn_update_node(phandle);
310
277 break; 311 break;
278 case ADD_DT_NODE: 312 case ADD_DT_NODE:
279 drc_index = *data++; 313 drc_index = *data++;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 7736352f7279..b4d362ed03a1 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -66,6 +66,7 @@
66#include <asm/reg.h> 66#include <asm/reg.h>
67#include <asm/plpar_wrappers.h> 67#include <asm/plpar_wrappers.h>
68#include <asm/kexec.h> 68#include <asm/kexec.h>
69#include <asm/isa-bridge.h>
69 70
70#include "pseries.h" 71#include "pseries.h"
71 72
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 3f864c36d847..1be0499f5397 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1403,7 +1403,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
1403 struct pt_regs regs; 1403 struct pt_regs regs;
1404 1404
1405 while (max_to_print--) { 1405 while (max_to_print--) {
1406 if (sp < PAGE_OFFSET) { 1406 if (!is_kernel_addr(sp)) {
1407 if (sp != 0) 1407 if (sp != 0)
1408 printf("SP (%lx) is in userspace\n", sp); 1408 printf("SP (%lx) is in userspace\n", sp);
1409 break; 1409 break;
@@ -1431,12 +1431,12 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
1431 mread(newsp + LRSAVE_OFFSET, &nextip, 1431 mread(newsp + LRSAVE_OFFSET, &nextip,
1432 sizeof(unsigned long)); 1432 sizeof(unsigned long));
1433 if (lr == ip) { 1433 if (lr == ip) {
1434 if (lr < PAGE_OFFSET 1434 if (!is_kernel_addr(lr)
1435 || (fnstart <= lr && lr < fnend)) 1435 || (fnstart <= lr && lr < fnend))
1436 printip = 0; 1436 printip = 0;
1437 } else if (lr == nextip) { 1437 } else if (lr == nextip) {
1438 printip = 0; 1438 printip = 0;
1439 } else if (lr >= PAGE_OFFSET 1439 } else if (is_kernel_addr(lr)
1440 && !(fnstart <= lr && lr < fnend)) { 1440 && !(fnstart <= lr && lr < fnend)) {
1441 printf("[link register ] "); 1441 printf("[link register ] ");
1442 xmon_print_symbol(lr, " ", "\n"); 1442 xmon_print_symbol(lr, " ", "\n");
@@ -1496,7 +1496,7 @@ static void print_bug_trap(struct pt_regs *regs)
1496 if (regs->msr & MSR_PR) 1496 if (regs->msr & MSR_PR)
1497 return; /* not in kernel */ 1497 return; /* not in kernel */
1498 addr = regs->nip; /* address of trap instruction */ 1498 addr = regs->nip; /* address of trap instruction */
1499 if (addr < PAGE_OFFSET) 1499 if (!is_kernel_addr(addr))
1500 return; 1500 return;
1501 bug = find_bug(regs->nip); 1501 bug = find_bug(regs->nip);
1502 if (bug == NULL) 1502 if (bug == NULL)
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 0835a37a5f3a..370593006f5f 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -19,7 +19,12 @@
19#include <asm/firmware.h> 19#include <asm/firmware.h>
20#include <asm/opal.h> 20#include <asm/opal.h>
21#include <asm/runlatch.h> 21#include <asm/runlatch.h>
22#include <asm/cpuidle.h>
22 23
24/*
25 * Expose only those Hardware idle states via the cpuidle framework
26 * that have latency value below POWERNV_THRESHOLD_LATENCY_NS.
27 */
23#define POWERNV_THRESHOLD_LATENCY_NS 200000 28#define POWERNV_THRESHOLD_LATENCY_NS 200000
24 29
25static struct cpuidle_driver powernv_idle_driver = { 30static struct cpuidle_driver powernv_idle_driver = {
@@ -30,7 +35,12 @@ static struct cpuidle_driver powernv_idle_driver = {
30static int max_idle_state; 35static int max_idle_state;
31static struct cpuidle_state *cpuidle_state_table; 36static struct cpuidle_state *cpuidle_state_table;
32 37
33static u64 stop_psscr_table[CPUIDLE_STATE_MAX]; 38struct stop_psscr_table {
39 u64 val;
40 u64 mask;
41};
42
43static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX];
34 44
35static u64 snooze_timeout; 45static u64 snooze_timeout;
36static bool snooze_timeout_en; 46static bool snooze_timeout_en;
@@ -102,7 +112,8 @@ static int stop_loop(struct cpuidle_device *dev,
102 int index) 112 int index)
103{ 113{
104 ppc64_runlatch_off(); 114 ppc64_runlatch_off();
105 power9_idle_stop(stop_psscr_table[index]); 115 power9_idle_stop(stop_psscr_table[index].val,
116 stop_psscr_table[index].mask);
106 ppc64_runlatch_on(); 117 ppc64_runlatch_on();
107 return index; 118 return index;
108} 119}
@@ -167,6 +178,25 @@ static int powernv_cpuidle_driver_init(void)
167 return 0; 178 return 0;
168} 179}
169 180
181static inline void add_powernv_state(int index, const char *name,
182 unsigned int flags,
183 int (*idle_fn)(struct cpuidle_device *,
184 struct cpuidle_driver *,
185 int),
186 unsigned int target_residency,
187 unsigned int exit_latency,
188 u64 psscr_val, u64 psscr_mask)
189{
190 strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
191 strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
192 powernv_states[index].flags = flags;
193 powernv_states[index].target_residency = target_residency;
194 powernv_states[index].exit_latency = exit_latency;
195 powernv_states[index].enter = idle_fn;
196 stop_psscr_table[index].val = psscr_val;
197 stop_psscr_table[index].mask = psscr_mask;
198}
199
170static int powernv_add_idle_states(void) 200static int powernv_add_idle_states(void)
171{ 201{
172 struct device_node *power_mgt; 202 struct device_node *power_mgt;
@@ -176,7 +206,9 @@ static int powernv_add_idle_states(void)
176 u32 residency_ns[CPUIDLE_STATE_MAX]; 206 u32 residency_ns[CPUIDLE_STATE_MAX];
177 u32 flags[CPUIDLE_STATE_MAX]; 207 u32 flags[CPUIDLE_STATE_MAX];
178 u64 psscr_val[CPUIDLE_STATE_MAX]; 208 u64 psscr_val[CPUIDLE_STATE_MAX];
209 u64 psscr_mask[CPUIDLE_STATE_MAX];
179 const char *names[CPUIDLE_STATE_MAX]; 210 const char *names[CPUIDLE_STATE_MAX];
211 u32 has_stop_states = 0;
180 int i, rc; 212 int i, rc;
181 213
182 /* Currently we have snooze statically defined */ 214 /* Currently we have snooze statically defined */
@@ -223,19 +255,30 @@ static int powernv_add_idle_states(void)
223 255
224 /* 256 /*
225 * If the idle states use stop instruction, probe for psscr values 257 * If the idle states use stop instruction, probe for psscr values
226 * which are necessary to specify required stop level. 258 * and psscr mask which are necessary to specify required stop level.
227 */ 259 */
228 if (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)) 260 has_stop_states = (flags[0] &
261 (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
262 if (has_stop_states) {
229 if (of_property_read_u64_array(power_mgt, 263 if (of_property_read_u64_array(power_mgt,
230 "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { 264 "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
231 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); 265 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
266 goto out;
267 }
268
269 if (of_property_read_u64_array(power_mgt,
270 "ibm,cpu-idle-state-psscr-mask",
271 psscr_mask, dt_idle_states)) {
272 pr_warn("cpuidle-powernv:Missing ibm,cpu-idle-state-psscr-mask in DT\n");
232 goto out; 273 goto out;
233 } 274 }
275 }
234 276
235 rc = of_property_read_u32_array(power_mgt, 277 rc = of_property_read_u32_array(power_mgt,
236 "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states); 278 "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
237 279
238 for (i = 0; i < dt_idle_states; i++) { 280 for (i = 0; i < dt_idle_states; i++) {
281 unsigned int exit_latency, target_residency;
239 /* 282 /*
240 * If an idle state has exit latency beyond 283 * If an idle state has exit latency beyond
241 * POWERNV_THRESHOLD_LATENCY_NS then don't use it 284 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
@@ -243,28 +286,43 @@ static int powernv_add_idle_states(void)
243 */ 286 */
244 if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS) 287 if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
245 continue; 288 continue;
289 /*
290 * Firmware passes residency and latency values in ns.
291 * cpuidle expects it in us.
292 */
293 exit_latency = latency_ns[i] / 1000;
294 if (!rc)
295 target_residency = residency_ns[i] / 1000;
296 else
297 target_residency = 0;
298
299 if (has_stop_states) {
300 int err = validate_psscr_val_mask(&psscr_val[i],
301 &psscr_mask[i],
302 flags[i]);
303 if (err) {
304 report_invalid_psscr_val(psscr_val[i], err);
305 continue;
306 }
307 }
246 308
247 /* 309 /*
248 * Cpuidle accepts exit_latency and target_residency in us. 310 * For nap and fastsleep, use default target_residency
249 * Use default target_residency values if f/w does not expose it. 311 * values if f/w does not expose it.
250 */ 312 */
251 if (flags[i] & OPAL_PM_NAP_ENABLED) { 313 if (flags[i] & OPAL_PM_NAP_ENABLED) {
314 if (!rc)
315 target_residency = 100;
252 /* Add NAP state */ 316 /* Add NAP state */
253 strcpy(powernv_states[nr_idle_states].name, "Nap"); 317 add_powernv_state(nr_idle_states, "Nap",
254 strcpy(powernv_states[nr_idle_states].desc, "Nap"); 318 CPUIDLE_FLAG_NONE, nap_loop,
255 powernv_states[nr_idle_states].flags = 0; 319 target_residency, exit_latency, 0, 0);
256 powernv_states[nr_idle_states].target_residency = 100;
257 powernv_states[nr_idle_states].enter = nap_loop;
258 } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) && 320 } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) &&
259 !(flags[i] & OPAL_PM_TIMEBASE_STOP)) { 321 !(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
260 strncpy(powernv_states[nr_idle_states].name, 322 add_powernv_state(nr_idle_states, names[i],
261 names[i], CPUIDLE_NAME_LEN); 323 CPUIDLE_FLAG_NONE, stop_loop,
262 strncpy(powernv_states[nr_idle_states].desc, 324 target_residency, exit_latency,
263 names[i], CPUIDLE_NAME_LEN); 325 psscr_val[i], psscr_mask[i]);
264 powernv_states[nr_idle_states].flags = 0;
265
266 powernv_states[nr_idle_states].enter = stop_loop;
267 stop_psscr_table[nr_idle_states] = psscr_val[i];
268 } 326 }
269 327
270 /* 328 /*
@@ -274,32 +332,21 @@ static int powernv_add_idle_states(void)
274#ifdef CONFIG_TICK_ONESHOT 332#ifdef CONFIG_TICK_ONESHOT
275 if (flags[i] & OPAL_PM_SLEEP_ENABLED || 333 if (flags[i] & OPAL_PM_SLEEP_ENABLED ||
276 flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { 334 flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) {
335 if (!rc)
336 target_residency = 300000;
277 /* Add FASTSLEEP state */ 337 /* Add FASTSLEEP state */
278 strcpy(powernv_states[nr_idle_states].name, "FastSleep"); 338 add_powernv_state(nr_idle_states, "FastSleep",
279 strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); 339 CPUIDLE_FLAG_TIMER_STOP,
280 powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; 340 fastsleep_loop,
281 powernv_states[nr_idle_states].target_residency = 300000; 341 target_residency, exit_latency, 0, 0);
282 powernv_states[nr_idle_states].enter = fastsleep_loop;
283 } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) && 342 } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) &&
284 (flags[i] & OPAL_PM_TIMEBASE_STOP)) { 343 (flags[i] & OPAL_PM_TIMEBASE_STOP)) {
285 strncpy(powernv_states[nr_idle_states].name, 344 add_powernv_state(nr_idle_states, names[i],
286 names[i], CPUIDLE_NAME_LEN); 345 CPUIDLE_FLAG_TIMER_STOP, stop_loop,
287 strncpy(powernv_states[nr_idle_states].desc, 346 target_residency, exit_latency,
288 names[i], CPUIDLE_NAME_LEN); 347 psscr_val[i], psscr_mask[i]);
289
290 powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
291 powernv_states[nr_idle_states].enter = stop_loop;
292 stop_psscr_table[nr_idle_states] = psscr_val[i];
293 } 348 }
294#endif 349#endif
295 powernv_states[nr_idle_states].exit_latency =
296 ((unsigned int)latency_ns[i]) / 1000;
297
298 if (!rc) {
299 powernv_states[nr_idle_states].target_residency =
300 ((unsigned int)residency_ns[i]) / 1000;
301 }
302
303 nr_idle_states++; 350 nr_idle_states++;
304 } 351 }
305out: 352out:
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 5d80810934df..97a420c11eed 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -30,14 +30,6 @@ config ADB_MACII
30 Quadra 610, Quadra 650, Quadra 700, Quadra 800, Centris 610 and 30 Quadra 610, Quadra 650, Quadra 700, Quadra 800, Centris 610 and
31 Centris 650. 31 Centris 650.
32 32
33config ADB_MACIISI
34 bool "Include Mac IIsi ADB driver"
35 depends on ADB && MAC && BROKEN
36 help
37 Say Y here if want your kernel to support Macintosh systems that use
38 the Mac IIsi style ADB. This includes the IIsi, IIvi, IIvx, Classic
39 II, LC, LC II, LC III, Performa 460, and the Performa 600.
40
41config ADB_IOP 33config ADB_IOP
42 bool "Include IOP (IIfx/Quadra 9x0) ADB driver" 34 bool "Include IOP (IIfx/Quadra 9x0) ADB driver"
43 depends on ADB && MAC 35 depends on ADB && MAC
@@ -60,17 +52,15 @@ config ADB_PMU68K
60 52
61# we want to change this to something like CONFIG_SYSCTRL_CUDA/PMU 53# we want to change this to something like CONFIG_SYSCTRL_CUDA/PMU
62config ADB_CUDA 54config ADB_CUDA
63 bool "Support for CUDA based Macs and PowerMacs" 55 bool "Support for Cuda/Egret based Macs and PowerMacs"
64 depends on (ADB || PPC_PMAC) && !PPC_PMAC64 56 depends on (ADB || PPC_PMAC) && !PPC_PMAC64
65 help 57 help
66 This provides support for CUDA based Macintosh and Power Macintosh 58 This provides support for Cuda/Egret based Macintosh and
67 systems. This includes many m68k based Macs (Color Classic, Mac TV, 59 Power Macintosh systems. This includes most m68k based Macs,
68 Performa 475, Performa 520, Performa 550, Performa 575, 60 most Old World PowerMacs, the first generation iMacs, the
69 Performa 588, Quadra 605, Quadra 630, Quadra/Centris 660AV, and 61 Blue & White G3 and the "Yikes" G4 (PCI Graphics). All later
70 Quadra 840AV), most OldWorld PowerMacs, the first generation iMacs, 62 models should use CONFIG_ADB_PMU instead. It is safe to say Y
71 the Blue&White G3 and the "Yikes" G4 (PCI Graphics). All later 63 here even if your machine doesn't have a Cuda or Egret device.
72 models should use CONFIG_ADB_PMU instead. It is safe to say Y here
73 even if your machine doesn't have a CUDA.
74 64
75 If unsure say Y. 65 If unsure say Y.
76 66
diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile
index 383ba920085b..516eb65bcacc 100644
--- a/drivers/macintosh/Makefile
+++ b/drivers/macintosh/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_PMAC_SMU) += smu.o
20 20
21obj-$(CONFIG_ADB) += adb.o 21obj-$(CONFIG_ADB) += adb.o
22obj-$(CONFIG_ADB_MACII) += via-macii.o 22obj-$(CONFIG_ADB_MACII) += via-macii.o
23obj-$(CONFIG_ADB_MACIISI) += via-maciisi.o
24obj-$(CONFIG_ADB_IOP) += adb-iop.o 23obj-$(CONFIG_ADB_IOP) += adb-iop.o
25obj-$(CONFIG_ADB_PMU68K) += via-pmu68k.o 24obj-$(CONFIG_ADB_PMU68K) += via-pmu68k.o
26obj-$(CONFIG_ADB_MACIO) += macio-adb.o 25obj-$(CONFIG_ADB_MACIO) += macio-adb.o
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 226179b975a0..152414e6378a 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -48,7 +48,6 @@
48EXPORT_SYMBOL(adb_client_list); 48EXPORT_SYMBOL(adb_client_list);
49 49
50extern struct adb_driver via_macii_driver; 50extern struct adb_driver via_macii_driver;
51extern struct adb_driver via_maciisi_driver;
52extern struct adb_driver via_cuda_driver; 51extern struct adb_driver via_cuda_driver;
53extern struct adb_driver adb_iop_driver; 52extern struct adb_driver adb_iop_driver;
54extern struct adb_driver via_pmu_driver; 53extern struct adb_driver via_pmu_driver;
@@ -59,9 +58,6 @@ static struct adb_driver *adb_driver_list[] = {
59#ifdef CONFIG_ADB_MACII 58#ifdef CONFIG_ADB_MACII
60 &via_macii_driver, 59 &via_macii_driver,
61#endif 60#endif
62#ifdef CONFIG_ADB_MACIISI
63 &via_maciisi_driver,
64#endif
65#ifdef CONFIG_ADB_CUDA 61#ifdef CONFIG_ADB_CUDA
66 &via_cuda_driver, 62 &via_cuda_driver,
67#endif 63#endif
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 2088e23a8002..c60415958dfe 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -1,10 +1,10 @@
1/* 1/*
2 * Device driver for the via-cuda on Apple Powermacs. 2 * Device driver for the Cuda and Egret system controllers found on PowerMacs
3 * and 68k Macs.
3 * 4 *
4 * The VIA (versatile interface adapter) interfaces to the CUDA, 5 * The Cuda or Egret is a 6805 microcontroller interfaced to the 6522 VIA.
5 * a 6805 microprocessor core which controls the ADB (Apple Desktop 6 * This MCU controls system power, Parameter RAM, Real Time Clock and the
6 * Bus) which connects to the keyboard and mouse. The CUDA also 7 * Apple Desktop Bus (ADB) that connects to the keyboard and mouse.
7 * controls system power and the RTC (real time clock) chip.
8 * 8 *
9 * Copyright (C) 1996 Paul Mackerras. 9 * Copyright (C) 1996 Paul Mackerras.
10 */ 10 */
@@ -50,10 +50,27 @@ static DEFINE_SPINLOCK(cuda_lock);
50#define IER (14*RS) /* Interrupt enable register */ 50#define IER (14*RS) /* Interrupt enable register */
51#define ANH (15*RS) /* A-side data, no handshake */ 51#define ANH (15*RS) /* A-side data, no handshake */
52 52
53/* Bits in B data register: all active low */ 53/*
54#define TREQ 0x08 /* Transfer request (input) */ 54 * When the Cuda design replaced the Egret, some signal names and
55#define TACK 0x10 /* Transfer acknowledge (output) */ 55 * logic sense changed. They all serve the same purposes, however.
56#define TIP 0x20 /* Transfer in progress (output) */ 56 *
57 * VIA pin | Egret pin
58 * ----------------+------------------------------------------
59 * PB3 (input) | Transceiver session (active low)
60 * PB4 (output) | VIA full (active high)
61 * PB5 (output) | System session (active high)
62 *
63 * VIA pin | Cuda pin
64 * ----------------+------------------------------------------
65 * PB3 (input) | Transfer request (active low)
66 * PB4 (output) | Byte acknowledge (active low)
67 * PB5 (output) | Transfer in progress (active low)
68 */
69
70/* Bits in Port B data register */
71#define TREQ 0x08 /* Transfer request */
72#define TACK 0x10 /* Transfer acknowledge */
73#define TIP 0x20 /* Transfer in progress */
57 74
58/* Bits in ACR */ 75/* Bits in ACR */
59#define SR_CTRL 0x1c /* Shift register control bits */ 76#define SR_CTRL 0x1c /* Shift register control bits */
@@ -65,6 +82,74 @@ static DEFINE_SPINLOCK(cuda_lock);
65#define IER_CLR 0 /* clear bits in IER */ 82#define IER_CLR 0 /* clear bits in IER */
66#define SR_INT 0x04 /* Shift register full/empty */ 83#define SR_INT 0x04 /* Shift register full/empty */
67 84
85/* Duration of byte acknowledgement pulse (us) */
86#define EGRET_TACK_ASSERTED_DELAY 300
87#define EGRET_TACK_NEGATED_DELAY 400
88
89/* Interval from interrupt to start of session (us) */
90#define EGRET_SESSION_DELAY 450
91
92#ifdef CONFIG_PPC
93#define mcu_is_egret false
94#else
95static bool mcu_is_egret;
96#endif
97
98static inline bool TREQ_asserted(u8 portb)
99{
100 return !(portb & TREQ);
101}
102
103static inline void assert_TIP(void)
104{
105 if (mcu_is_egret) {
106 udelay(EGRET_SESSION_DELAY);
107 out_8(&via[B], in_8(&via[B]) | TIP);
108 } else
109 out_8(&via[B], in_8(&via[B]) & ~TIP);
110}
111
112static inline void assert_TIP_and_TACK(void)
113{
114 if (mcu_is_egret) {
115 udelay(EGRET_SESSION_DELAY);
116 out_8(&via[B], in_8(&via[B]) | TIP | TACK);
117 } else
118 out_8(&via[B], in_8(&via[B]) & ~(TIP | TACK));
119}
120
121static inline void assert_TACK(void)
122{
123 if (mcu_is_egret) {
124 udelay(EGRET_TACK_NEGATED_DELAY);
125 out_8(&via[B], in_8(&via[B]) | TACK);
126 } else
127 out_8(&via[B], in_8(&via[B]) & ~TACK);
128}
129
130static inline void toggle_TACK(void)
131{
132 out_8(&via[B], in_8(&via[B]) ^ TACK);
133}
134
135static inline void negate_TACK(void)
136{
137 if (mcu_is_egret) {
138 udelay(EGRET_TACK_ASSERTED_DELAY);
139 out_8(&via[B], in_8(&via[B]) & ~TACK);
140 } else
141 out_8(&via[B], in_8(&via[B]) | TACK);
142}
143
144static inline void negate_TIP_and_TACK(void)
145{
146 if (mcu_is_egret) {
147 udelay(EGRET_TACK_ASSERTED_DELAY);
148 out_8(&via[B], in_8(&via[B]) & ~(TIP | TACK));
149 } else
150 out_8(&via[B], in_8(&via[B]) | TIP | TACK);
151}
152
68static enum cuda_state { 153static enum cuda_state {
69 idle, 154 idle,
70 sent_first_byte, 155 sent_first_byte,
@@ -120,11 +205,13 @@ int __init find_via_cuda(void)
120 struct adb_request req; 205 struct adb_request req;
121 int err; 206 int err;
122 207
123 if (macintosh_config->adb_type != MAC_ADB_CUDA) 208 if (macintosh_config->adb_type != MAC_ADB_CUDA &&
209 macintosh_config->adb_type != MAC_ADB_EGRET)
124 return 0; 210 return 0;
125 211
126 via = via1; 212 via = via1;
127 cuda_state = idle; 213 cuda_state = idle;
214 mcu_is_egret = macintosh_config->adb_type == MAC_ADB_EGRET;
128 215
129 err = cuda_init_via(); 216 err = cuda_init_via();
130 if (err) { 217 if (err) {
@@ -221,7 +308,7 @@ static int __init via_cuda_start(void)
221 return -EAGAIN; 308 return -EAGAIN;
222 } 309 }
223 310
224 printk("Macintosh CUDA driver v0.5 for Unified ADB.\n"); 311 pr_info("Macintosh Cuda and Egret driver.\n");
225 312
226 cuda_fully_inited = 1; 313 cuda_fully_inited = 1;
227 return 0; 314 return 0;
@@ -237,7 +324,8 @@ cuda_probe(void)
237 if (sys_ctrler != SYS_CTRLER_CUDA) 324 if (sys_ctrler != SYS_CTRLER_CUDA)
238 return -ENODEV; 325 return -ENODEV;
239#else 326#else
240 if (macintosh_config->adb_type != MAC_ADB_CUDA) 327 if (macintosh_config->adb_type != MAC_ADB_CUDA &&
328 macintosh_config->adb_type != MAC_ADB_EGRET)
241 return -ENODEV; 329 return -ENODEV;
242#endif 330#endif
243 if (via == NULL) 331 if (via == NULL)
@@ -246,12 +334,39 @@ cuda_probe(void)
246} 334}
247#endif /* CONFIG_ADB */ 335#endif /* CONFIG_ADB */
248 336
337static int __init sync_egret(void)
338{
339 if (TREQ_asserted(in_8(&via[B]))) {
340 /* Complete the inbound transfer */
341 assert_TIP_and_TACK();
342 while (1) {
343 negate_TACK();
344 mdelay(1);
345 (void)in_8(&via[SR]);
346 assert_TACK();
347 if (!TREQ_asserted(in_8(&via[B])))
348 break;
349 }
350 negate_TIP_and_TACK();
351 } else if (in_8(&via[B]) & TIP) {
352 /* Terminate the outbound transfer */
353 negate_TACK();
354 assert_TACK();
355 mdelay(1);
356 negate_TIP_and_TACK();
357 }
358 /* Clear shift register interrupt */
359 if (in_8(&via[IFR]) & SR_INT)
360 (void)in_8(&via[SR]);
361 return 0;
362}
363
249#define WAIT_FOR(cond, what) \ 364#define WAIT_FOR(cond, what) \
250 do { \ 365 do { \
251 int x; \ 366 int x; \
252 for (x = 1000; !(cond); --x) { \ 367 for (x = 1000; !(cond); --x) { \
253 if (x == 0) { \ 368 if (x == 0) { \
254 printk("Timeout waiting for " what "\n"); \ 369 pr_err("Timeout waiting for " what "\n"); \
255 return -ENXIO; \ 370 return -ENXIO; \
256 } \ 371 } \
257 udelay(100); \ 372 udelay(100); \
@@ -261,10 +376,6 @@ cuda_probe(void)
261static int 376static int
262__init cuda_init_via(void) 377__init cuda_init_via(void)
263{ 378{
264 out_8(&via[DIRB], (in_8(&via[DIRB]) | TACK | TIP) & ~TREQ); /* TACK & TIP out */
265 out_8(&via[B], in_8(&via[B]) | TACK | TIP); /* negate them */
266 out_8(&via[ACR] ,(in_8(&via[ACR]) & ~SR_CTRL) | SR_EXT); /* SR data in */
267 (void)in_8(&via[SR]); /* clear any left-over data */
268#ifdef CONFIG_PPC 379#ifdef CONFIG_PPC
269 out_8(&via[IER], 0x7f); /* disable interrupts from VIA */ 380 out_8(&via[IER], 0x7f); /* disable interrupts from VIA */
270 (void)in_8(&via[IER]); 381 (void)in_8(&via[IER]);
@@ -272,16 +383,25 @@ __init cuda_init_via(void)
272 out_8(&via[IER], SR_INT); /* disable SR interrupt from VIA */ 383 out_8(&via[IER], SR_INT); /* disable SR interrupt from VIA */
273#endif 384#endif
274 385
386 out_8(&via[DIRB], (in_8(&via[DIRB]) | TACK | TIP) & ~TREQ); /* TACK & TIP out */
387 out_8(&via[ACR], (in_8(&via[ACR]) & ~SR_CTRL) | SR_EXT); /* SR data in */
388 (void)in_8(&via[SR]); /* clear any left-over data */
389
390 if (mcu_is_egret)
391 return sync_egret();
392
393 negate_TIP_and_TACK();
394
275 /* delay 4ms and then clear any pending interrupt */ 395 /* delay 4ms and then clear any pending interrupt */
276 mdelay(4); 396 mdelay(4);
277 (void)in_8(&via[SR]); 397 (void)in_8(&via[SR]);
278 out_8(&via[IFR], SR_INT); 398 out_8(&via[IFR], SR_INT);
279 399
280 /* sync with the CUDA - assert TACK without TIP */ 400 /* sync with the CUDA - assert TACK without TIP */
281 out_8(&via[B], in_8(&via[B]) & ~TACK); 401 assert_TACK();
282 402
283 /* wait for the CUDA to assert TREQ in response */ 403 /* wait for the CUDA to assert TREQ in response */
284 WAIT_FOR((in_8(&via[B]) & TREQ) == 0, "CUDA response to sync"); 404 WAIT_FOR(TREQ_asserted(in_8(&via[B])), "CUDA response to sync");
285 405
286 /* wait for the interrupt and then clear it */ 406 /* wait for the interrupt and then clear it */
287 WAIT_FOR(in_8(&via[IFR]) & SR_INT, "CUDA response to sync (2)"); 407 WAIT_FOR(in_8(&via[IFR]) & SR_INT, "CUDA response to sync (2)");
@@ -289,14 +409,13 @@ __init cuda_init_via(void)
289 out_8(&via[IFR], SR_INT); 409 out_8(&via[IFR], SR_INT);
290 410
291 /* finish the sync by negating TACK */ 411 /* finish the sync by negating TACK */
292 out_8(&via[B], in_8(&via[B]) | TACK); 412 negate_TACK();
293 413
294 /* wait for the CUDA to negate TREQ and the corresponding interrupt */ 414 /* wait for the CUDA to negate TREQ and the corresponding interrupt */
295 WAIT_FOR(in_8(&via[B]) & TREQ, "CUDA response to sync (3)"); 415 WAIT_FOR(!TREQ_asserted(in_8(&via[B])), "CUDA response to sync (3)");
296 WAIT_FOR(in_8(&via[IFR]) & SR_INT, "CUDA response to sync (4)"); 416 WAIT_FOR(in_8(&via[IFR]) & SR_INT, "CUDA response to sync (4)");
297 (void)in_8(&via[SR]); 417 (void)in_8(&via[SR]);
298 out_8(&via[IFR], SR_INT); 418 out_8(&via[IFR], SR_INT);
299 out_8(&via[B], in_8(&via[B]) | TIP); /* should be unnecessary */
300 419
301 return 0; 420 return 0;
302} 421}
@@ -357,6 +476,7 @@ cuda_reset_adb_bus(void)
357 return 0; 476 return 0;
358} 477}
359#endif /* CONFIG_ADB */ 478#endif /* CONFIG_ADB */
479
360/* Construct and send a cuda request */ 480/* Construct and send a cuda request */
361int 481int
362cuda_request(struct adb_request *req, void (*done)(struct adb_request *), 482cuda_request(struct adb_request *req, void (*done)(struct adb_request *),
@@ -413,47 +533,43 @@ cuda_write(struct adb_request *req)
413static void 533static void
414cuda_start(void) 534cuda_start(void)
415{ 535{
416 struct adb_request *req;
417
418 /* assert cuda_state == idle */ 536 /* assert cuda_state == idle */
419 /* get the packet to send */ 537 if (current_req == NULL)
420 req = current_req;
421 if (req == 0)
422 return; 538 return;
423 if ((in_8(&via[B]) & TREQ) == 0) 539 data_index = 0;
540 if (TREQ_asserted(in_8(&via[B])))
424 return; /* a byte is coming in from the CUDA */ 541 return; /* a byte is coming in from the CUDA */
425 542
426 /* set the shift register to shift out and send a byte */ 543 /* set the shift register to shift out and send a byte */
427 out_8(&via[ACR], in_8(&via[ACR]) | SR_OUT); 544 out_8(&via[ACR], in_8(&via[ACR]) | SR_OUT);
428 out_8(&via[SR], req->data[0]); 545 out_8(&via[SR], current_req->data[data_index++]);
429 out_8(&via[B], in_8(&via[B]) & ~TIP); 546 if (mcu_is_egret)
547 assert_TIP_and_TACK();
548 else
549 assert_TIP();
430 cuda_state = sent_first_byte; 550 cuda_state = sent_first_byte;
431} 551}
432 552
433void 553void
434cuda_poll(void) 554cuda_poll(void)
435{ 555{
436 /* cuda_interrupt only takes a normal lock, we disable 556 cuda_interrupt(0, NULL);
437 * interrupts here to avoid re-entering and thus deadlocking.
438 */
439 if (cuda_irq)
440 disable_irq(cuda_irq);
441 cuda_interrupt(0, NULL);
442 if (cuda_irq)
443 enable_irq(cuda_irq);
444} 557}
445EXPORT_SYMBOL(cuda_poll); 558EXPORT_SYMBOL(cuda_poll);
446 559
560#define ARRAY_FULL(a, p) ((p) - (a) == ARRAY_SIZE(a))
561
447static irqreturn_t 562static irqreturn_t
448cuda_interrupt(int irq, void *arg) 563cuda_interrupt(int irq, void *arg)
449{ 564{
450 int status; 565 unsigned long flags;
566 u8 status;
451 struct adb_request *req = NULL; 567 struct adb_request *req = NULL;
452 unsigned char ibuf[16]; 568 unsigned char ibuf[16];
453 int ibuf_len = 0; 569 int ibuf_len = 0;
454 int complete = 0; 570 int complete = 0;
455 571
456 spin_lock(&cuda_lock); 572 spin_lock_irqsave(&cuda_lock, flags);
457 573
458 /* On powermacs, this handler is registered for the VIA IRQ. But they use 574 /* On powermacs, this handler is registered for the VIA IRQ. But they use
459 * just the shift register IRQ -- other VIA interrupt sources are disabled. 575 * just the shift register IRQ -- other VIA interrupt sources are disabled.
@@ -466,52 +582,50 @@ cuda_interrupt(int irq, void *arg)
466#endif 582#endif
467 { 583 {
468 if ((in_8(&via[IFR]) & SR_INT) == 0) { 584 if ((in_8(&via[IFR]) & SR_INT) == 0) {
469 spin_unlock(&cuda_lock); 585 spin_unlock_irqrestore(&cuda_lock, flags);
470 return IRQ_NONE; 586 return IRQ_NONE;
471 } else { 587 } else {
472 out_8(&via[IFR], SR_INT); 588 out_8(&via[IFR], SR_INT);
473 } 589 }
474 } 590 }
475 591
476 status = (~in_8(&via[B]) & (TIP|TREQ)) | (in_8(&via[ACR]) & SR_OUT); 592 status = in_8(&via[B]) & (TIP | TACK | TREQ);
477 /* printk("cuda_interrupt: state=%d status=%x\n", cuda_state, status); */ 593
478 switch (cuda_state) { 594 switch (cuda_state) {
479 case idle: 595 case idle:
480 /* CUDA has sent us the first byte of data - unsolicited */ 596 /* System controller has unsolicited data for us */
481 if (status != TREQ)
482 printk("cuda: state=idle, status=%x\n", status);
483 (void)in_8(&via[SR]); 597 (void)in_8(&via[SR]);
484 out_8(&via[B], in_8(&via[B]) & ~TIP); 598idle_state:
599 assert_TIP();
485 cuda_state = reading; 600 cuda_state = reading;
486 reply_ptr = cuda_rbuf; 601 reply_ptr = cuda_rbuf;
487 reading_reply = 0; 602 reading_reply = 0;
488 break; 603 break;
489 604
490 case awaiting_reply: 605 case awaiting_reply:
491 /* CUDA has sent us the first byte of data of a reply */ 606 /* System controller has reply data for us */
492 if (status != TREQ)
493 printk("cuda: state=awaiting_reply, status=%x\n", status);
494 (void)in_8(&via[SR]); 607 (void)in_8(&via[SR]);
495 out_8(&via[B], in_8(&via[B]) & ~TIP); 608 assert_TIP();
496 cuda_state = reading; 609 cuda_state = reading;
497 reply_ptr = current_req->reply; 610 reply_ptr = current_req->reply;
498 reading_reply = 1; 611 reading_reply = 1;
499 break; 612 break;
500 613
501 case sent_first_byte: 614 case sent_first_byte:
502 if (status == TREQ + TIP + SR_OUT) { 615 if (TREQ_asserted(status)) {
503 /* collision */ 616 /* collision */
504 out_8(&via[ACR], in_8(&via[ACR]) & ~SR_OUT); 617 out_8(&via[ACR], in_8(&via[ACR]) & ~SR_OUT);
505 (void)in_8(&via[SR]); 618 (void)in_8(&via[SR]);
506 out_8(&via[B], in_8(&via[B]) | TIP | TACK); 619 negate_TIP_and_TACK();
507 cuda_state = idle; 620 cuda_state = idle;
621 /* Egret does not raise an "aborted" interrupt */
622 if (mcu_is_egret)
623 goto idle_state;
508 } else { 624 } else {
509 /* assert status == TIP + SR_OUT */ 625 out_8(&via[SR], current_req->data[data_index++]);
510 if (status != TIP + SR_OUT) 626 toggle_TACK();
511 printk("cuda: state=sent_first_byte status=%x\n", status); 627 if (mcu_is_egret)
512 out_8(&via[SR], current_req->data[1]); 628 assert_TACK();
513 out_8(&via[B], in_8(&via[B]) ^ TACK);
514 data_index = 2;
515 cuda_state = sending; 629 cuda_state = sending;
516 } 630 }
517 break; 631 break;
@@ -521,7 +635,7 @@ cuda_interrupt(int irq, void *arg)
521 if (data_index >= req->nbytes) { 635 if (data_index >= req->nbytes) {
522 out_8(&via[ACR], in_8(&via[ACR]) & ~SR_OUT); 636 out_8(&via[ACR], in_8(&via[ACR]) & ~SR_OUT);
523 (void)in_8(&via[SR]); 637 (void)in_8(&via[SR]);
524 out_8(&via[B], in_8(&via[B]) | TACK | TIP); 638 negate_TIP_and_TACK();
525 req->sent = 1; 639 req->sent = 1;
526 if (req->reply_expected) { 640 if (req->reply_expected) {
527 cuda_state = awaiting_reply; 641 cuda_state = awaiting_reply;
@@ -534,26 +648,37 @@ cuda_interrupt(int irq, void *arg)
534 } 648 }
535 } else { 649 } else {
536 out_8(&via[SR], req->data[data_index++]); 650 out_8(&via[SR], req->data[data_index++]);
537 out_8(&via[B], in_8(&via[B]) ^ TACK); 651 toggle_TACK();
652 if (mcu_is_egret)
653 assert_TACK();
538 } 654 }
539 break; 655 break;
540 656
541 case reading: 657 case reading:
542 *reply_ptr++ = in_8(&via[SR]); 658 if (reading_reply ? ARRAY_FULL(current_req->reply, reply_ptr)
543 if (status == TIP) { 659 : ARRAY_FULL(cuda_rbuf, reply_ptr))
660 (void)in_8(&via[SR]);
661 else
662 *reply_ptr++ = in_8(&via[SR]);
663 if (!TREQ_asserted(status)) {
664 if (mcu_is_egret)
665 assert_TACK();
544 /* that's all folks */ 666 /* that's all folks */
545 out_8(&via[B], in_8(&via[B]) | TACK | TIP); 667 negate_TIP_and_TACK();
546 cuda_state = read_done; 668 cuda_state = read_done;
669 /* Egret does not raise a "read done" interrupt */
670 if (mcu_is_egret)
671 goto read_done_state;
547 } else { 672 } else {
548 /* assert status == TIP | TREQ */ 673 toggle_TACK();
549 if (status != TIP + TREQ) 674 if (mcu_is_egret)
550 printk("cuda: state=reading status=%x\n", status); 675 negate_TACK();
551 out_8(&via[B], in_8(&via[B]) ^ TACK);
552 } 676 }
553 break; 677 break;
554 678
555 case read_done: 679 case read_done:
556 (void)in_8(&via[SR]); 680 (void)in_8(&via[SR]);
681read_done_state:
557 if (reading_reply) { 682 if (reading_reply) {
558 req = current_req; 683 req = current_req;
559 req->reply_len = reply_ptr - req->reply; 684 req->reply_len = reply_ptr - req->reply;
@@ -570,6 +695,7 @@ cuda_interrupt(int irq, void *arg)
570 } 695 }
571 current_req = req->next; 696 current_req = req->next;
572 complete = 1; 697 complete = 1;
698 reading_reply = 0;
573 } else { 699 } else {
574 /* This is tricky. We must break the spinlock to call 700 /* This is tricky. We must break the spinlock to call
575 * cuda_input. However, doing so means we might get 701 * cuda_input. However, doing so means we might get
@@ -581,21 +707,19 @@ cuda_interrupt(int irq, void *arg)
581 ibuf_len = reply_ptr - cuda_rbuf; 707 ibuf_len = reply_ptr - cuda_rbuf;
582 memcpy(ibuf, cuda_rbuf, ibuf_len); 708 memcpy(ibuf, cuda_rbuf, ibuf_len);
583 } 709 }
584 if (status == TREQ) { 710 reply_ptr = cuda_rbuf;
585 out_8(&via[B], in_8(&via[B]) & ~TIP); 711 cuda_state = idle;
712 cuda_start();
713 if (cuda_state == idle && TREQ_asserted(in_8(&via[B]))) {
714 assert_TIP();
586 cuda_state = reading; 715 cuda_state = reading;
587 reply_ptr = cuda_rbuf;
588 reading_reply = 0;
589 } else {
590 cuda_state = idle;
591 cuda_start();
592 } 716 }
593 break; 717 break;
594 718
595 default: 719 default:
596 printk("cuda_interrupt: unknown cuda_state %d?\n", cuda_state); 720 pr_err("cuda_interrupt: unknown cuda_state %d?\n", cuda_state);
597 } 721 }
598 spin_unlock(&cuda_lock); 722 spin_unlock_irqrestore(&cuda_lock, flags);
599 if (complete && req) { 723 if (complete && req) {
600 void (*done)(struct adb_request *) = req->done; 724 void (*done)(struct adb_request *) = req->done;
601 mb(); 725 mb();
@@ -614,8 +738,6 @@ cuda_interrupt(int irq, void *arg)
614static void 738static void
615cuda_input(unsigned char *buf, int nb) 739cuda_input(unsigned char *buf, int nb)
616{ 740{
617 int i;
618
619 switch (buf[0]) { 741 switch (buf[0]) {
620 case ADB_PACKET: 742 case ADB_PACKET:
621#ifdef CONFIG_XMON 743#ifdef CONFIG_XMON
@@ -632,10 +754,14 @@ cuda_input(unsigned char *buf, int nb)
632#endif /* CONFIG_ADB */ 754#endif /* CONFIG_ADB */
633 break; 755 break;
634 756
757 case TIMER_PACKET:
758 /* Egret sends these periodically. Might be useful as a 'heartbeat'
759 * to trigger a recovery for the VIA shift register errata.
760 */
761 break;
762
635 default: 763 default:
636 printk("data from cuda (%d bytes):", nb); 764 print_hex_dump(KERN_INFO, "cuda_input: ", DUMP_PREFIX_NONE, 32, 1,
637 for (i = 0; i < nb; ++i) 765 buf, nb, false);
638 printk(" %.2x", buf[i]);
639 printk("\n");
640 } 766 }
641} 767}
diff --git a/drivers/macintosh/via-maciisi.c b/drivers/macintosh/via-maciisi.c
deleted file mode 100644
index 34d02a91b29f..000000000000
--- a/drivers/macintosh/via-maciisi.c
+++ /dev/null
@@ -1,677 +0,0 @@
1/*
2 * Device driver for the IIsi-style ADB on some Mac LC and II-class machines
3 *
4 * Based on via-cuda.c and via-macii.c, as well as the original
5 * adb-bus.c, which in turn is somewhat influenced by (but uses no
6 * code from) the NetBSD HWDIRECT ADB code. Original IIsi driver work
7 * was done by Robert Thompson and integrated into the old style
8 * driver by Michael Schmitz.
9 *
10 * Original sources (c) Alan Cox, Paul Mackerras, and others.
11 *
12 * Rewritten for Unified ADB by David Huggins-Daines <dhd@debian.org>
13 *
14 * 7/13/2000- extensive changes by Andrew McPherson <andrew@macduff.dhs.org>
15 * Works about 30% of the time now.
16 */
17
18#include <linux/types.h>
19#include <linux/errno.h>
20#include <linux/kernel.h>
21#include <linux/adb.h>
22#include <linux/cuda.h>
23#include <linux/delay.h>
24#include <linux/interrupt.h>
25#include <asm/macintosh.h>
26#include <asm/macints.h>
27#include <asm/mac_via.h>
28
29static volatile unsigned char *via;
30
31/* VIA registers - spaced 0x200 bytes apart - only the ones we actually use */
32#define RS 0x200 /* skip between registers */
33#define B 0 /* B-side data */
34#define A RS /* A-side data */
35#define DIRB (2*RS) /* B-side direction (1=output) */
36#define DIRA (3*RS) /* A-side direction (1=output) */
37#define SR (10*RS) /* Shift register */
38#define ACR (11*RS) /* Auxiliary control register */
39#define IFR (13*RS) /* Interrupt flag register */
40#define IER (14*RS) /* Interrupt enable register */
41
42/* Bits in B data register: all active low */
43#define TREQ 0x08 /* Transfer request (input) */
44#define TACK 0x10 /* Transfer acknowledge (output) */
45#define TIP 0x20 /* Transfer in progress (output) */
46#define ST_MASK 0x30 /* mask for selecting ADB state bits */
47
48/* Bits in ACR */
49#define SR_CTRL 0x1c /* Shift register control bits */
50#define SR_EXT 0x0c /* Shift on external clock */
51#define SR_OUT 0x10 /* Shift out if 1 */
52
53/* Bits in IFR and IER */
54#define IER_SET 0x80 /* set bits in IER */
55#define IER_CLR 0 /* clear bits in IER */
56#define SR_INT 0x04 /* Shift register full/empty */
57#define SR_DATA 0x08 /* Shift register data */
58#define SR_CLOCK 0x10 /* Shift register clock */
59
60#define ADB_DELAY 150
61
62#undef DEBUG_MACIISI_ADB
63
64static struct adb_request* current_req;
65static struct adb_request* last_req;
66static unsigned char maciisi_rbuf[16];
67static unsigned char *reply_ptr;
68static int data_index;
69static int reading_reply;
70static int reply_len;
71static int tmp;
72static int need_sync;
73
74static enum maciisi_state {
75 idle,
76 sending,
77 reading,
78} maciisi_state;
79
80static int maciisi_probe(void);
81static int maciisi_init(void);
82static int maciisi_send_request(struct adb_request* req, int sync);
83static void maciisi_sync(struct adb_request *req);
84static int maciisi_write(struct adb_request* req);
85static irqreturn_t maciisi_interrupt(int irq, void* arg);
86static void maciisi_input(unsigned char *buf, int nb);
87static int maciisi_init_via(void);
88static void maciisi_poll(void);
89static int maciisi_start(void);
90
91struct adb_driver via_maciisi_driver = {
92 "Mac IIsi",
93 maciisi_probe,
94 maciisi_init,
95 maciisi_send_request,
96 NULL, /* maciisi_adb_autopoll, */
97 maciisi_poll,
98 NULL /* maciisi_reset_adb_bus */
99};
100
101static int
102maciisi_probe(void)
103{
104 if (macintosh_config->adb_type != MAC_ADB_IISI)
105 return -ENODEV;
106
107 via = via1;
108 return 0;
109}
110
111static int
112maciisi_init(void)
113{
114 int err;
115
116 if (via == NULL)
117 return -ENODEV;
118
119 if ((err = maciisi_init_via())) {
120 printk(KERN_ERR "maciisi_init: maciisi_init_via() failed, code %d\n", err);
121 via = NULL;
122 return err;
123 }
124
125 if (request_irq(IRQ_MAC_ADB, maciisi_interrupt, 0, "ADB",
126 maciisi_interrupt)) {
127 printk(KERN_ERR "maciisi_init: can't get irq %d\n", IRQ_MAC_ADB);
128 return -EAGAIN;
129 }
130
131 printk("adb: Mac IIsi driver v0.2 for Unified ADB.\n");
132 return 0;
133}
134
135/* Flush data from the ADB controller */
136static void
137maciisi_stfu(void)
138{
139 int status = via[B] & (TIP|TREQ);
140
141 if (status & TREQ) {
142#ifdef DEBUG_MACIISI_ADB
143 printk (KERN_DEBUG "maciisi_stfu called with TREQ high!\n");
144#endif
145 return;
146 }
147
148 udelay(ADB_DELAY);
149 via[ACR] &= ~SR_OUT;
150 via[IER] = IER_CLR | SR_INT;
151
152 udelay(ADB_DELAY);
153
154 status = via[B] & (TIP|TREQ);
155
156 if (!(status & TREQ))
157 {
158 via[B] |= TIP;
159
160 while(1)
161 {
162 int poll_timeout = ADB_DELAY * 5;
163 /* Poll for SR interrupt */
164 while (!(via[IFR] & SR_INT) && poll_timeout-- > 0)
165 status = via[B] & (TIP|TREQ);
166
167 tmp = via[SR]; /* Clear shift register */
168#ifdef DEBUG_MACIISI_ADB
169 printk(KERN_DEBUG "maciisi_stfu: status %x timeout %d data %x\n",
170 status, poll_timeout, tmp);
171#endif
172 if(via[B] & TREQ)
173 break;
174
175 /* ACK on-off */
176 via[B] |= TACK;
177 udelay(ADB_DELAY);
178 via[B] &= ~TACK;
179 }
180
181 /* end frame */
182 via[B] &= ~TIP;
183 udelay(ADB_DELAY);
184 }
185
186 via[IER] = IER_SET | SR_INT;
187}
188
189/* All specifically VIA-related initialization goes here */
190static int
191maciisi_init_via(void)
192{
193 int i;
194
195 /* Set the lines up. We want TREQ as input TACK|TIP as output */
196 via[DIRB] = (via[DIRB] | TACK | TIP) & ~TREQ;
197 /* Shift register on input */
198 via[ACR] = (via[ACR] & ~SR_CTRL) | SR_EXT;
199#ifdef DEBUG_MACIISI_ADB
200 printk(KERN_DEBUG "maciisi_init_via: initial status %x\n", via[B] & (TIP|TREQ));
201#endif
202 /* Wipe any pending data and int */
203 tmp = via[SR];
204 /* Enable keyboard interrupts */
205 via[IER] = IER_SET | SR_INT;
206 /* Set initial state: idle */
207 via[B] &= ~(TACK|TIP);
208 /* Clear interrupt bit */
209 via[IFR] = SR_INT;
210
211 for(i = 0; i < 60; i++) {
212 udelay(ADB_DELAY);
213 maciisi_stfu();
214 udelay(ADB_DELAY);
215 if(via[B] & TREQ)
216 break;
217 }
218 if (i == 60)
219 printk(KERN_ERR "maciisi_init_via: bus jam?\n");
220
221 maciisi_state = idle;
222 need_sync = 0;
223
224 return 0;
225}
226
227/* Send a request, possibly waiting for a reply */
228static int
229maciisi_send_request(struct adb_request* req, int sync)
230{
231 int i;
232
233#ifdef DEBUG_MACIISI_ADB
234 static int dump_packet = 0;
235#endif
236
237 if (via == NULL) {
238 req->complete = 1;
239 return -ENXIO;
240 }
241
242#ifdef DEBUG_MACIISI_ADB
243 if (dump_packet) {
244 printk(KERN_DEBUG "maciisi_send_request:");
245 for (i = 0; i < req->nbytes; i++) {
246 printk(" %.2x", req->data[i]);
247 }
248 printk(" sync %d\n", sync);
249 }
250#endif
251
252 req->reply_expected = 1;
253
254 i = maciisi_write(req);
255 if (i)
256 {
257 /* Normally, if a packet requires syncing, that happens at the end of
258 * maciisi_send_request. But if the transfer fails, it will be restarted
259 * by maciisi_interrupt(). We use need_sync to tell maciisi_interrupt
260 * when to sync a packet that it sends out.
261 *
262 * Suggestions on a better way to do this are welcome.
263 */
264 if(i == -EBUSY && sync)
265 need_sync = 1;
266 else
267 need_sync = 0;
268 return i;
269 }
270 if(sync)
271 maciisi_sync(req);
272
273 return 0;
274}
275
276/* Poll the ADB chip until the request completes */
277static void maciisi_sync(struct adb_request *req)
278{
279 int count = 0;
280
281#ifdef DEBUG_MACIISI_ADB
282 printk(KERN_DEBUG "maciisi_sync called\n");
283#endif
284
285 /* If for some reason the ADB chip shuts up on us, we want to avoid an endless loop. */
286 while (!req->complete && count++ < 50) {
287 maciisi_poll();
288 }
289 /* This could be BAD... when the ADB controller doesn't respond
290 * for this long, it's probably not coming back :-( */
291 if (count > 50) /* Hopefully shouldn't happen */
292 printk(KERN_ERR "maciisi_send_request: poll timed out!\n");
293}
294
295int
296maciisi_request(struct adb_request *req, void (*done)(struct adb_request *),
297 int nbytes, ...)
298{
299 va_list list;
300 int i;
301
302 req->nbytes = nbytes;
303 req->done = done;
304 req->reply_expected = 0;
305 va_start(list, nbytes);
306 for (i = 0; i < nbytes; i++)
307 req->data[i++] = va_arg(list, int);
308 va_end(list);
309
310 return maciisi_send_request(req, 1);
311}
312
313/* Enqueue a request, and run the queue if possible */
314static int
315maciisi_write(struct adb_request* req)
316{
317 unsigned long flags;
318 int i;
319
320 /* We will accept CUDA packets - the VIA sends them to us, so
321 it figures that we should be able to send them to it */
322 if (req->nbytes < 2 || req->data[0] > CUDA_PACKET) {
323 printk(KERN_ERR "maciisi_write: packet too small or not an ADB or CUDA packet\n");
324 req->complete = 1;
325 return -EINVAL;
326 }
327 req->next = NULL;
328 req->sent = 0;
329 req->complete = 0;
330 req->reply_len = 0;
331
332 local_irq_save(flags);
333
334 if (current_req) {
335 last_req->next = req;
336 last_req = req;
337 } else {
338 current_req = req;
339 last_req = req;
340 }
341 if (maciisi_state == idle)
342 {
343 i = maciisi_start();
344 if(i != 0)
345 {
346 local_irq_restore(flags);
347 return i;
348 }
349 }
350 else
351 {
352#ifdef DEBUG_MACIISI_ADB
353 printk(KERN_DEBUG "maciisi_write: would start, but state is %d\n", maciisi_state);
354#endif
355 local_irq_restore(flags);
356 return -EBUSY;
357 }
358
359 local_irq_restore(flags);
360
361 return 0;
362}
363
364static int
365maciisi_start(void)
366{
367 struct adb_request* req;
368 int status;
369
370#ifdef DEBUG_MACIISI_ADB
371 status = via[B] & (TIP | TREQ);
372
373 printk(KERN_DEBUG "maciisi_start called, state=%d, status=%x, ifr=%x\n", maciisi_state, status, via[IFR]);
374#endif
375
376 if (maciisi_state != idle) {
377 /* shouldn't happen */
378 printk(KERN_ERR "maciisi_start: maciisi_start called when driver busy!\n");
379 return -EBUSY;
380 }
381
382 req = current_req;
383 if (req == NULL)
384 return -EINVAL;
385
386 status = via[B] & (TIP|TREQ);
387 if (!(status & TREQ)) {
388#ifdef DEBUG_MACIISI_ADB
389 printk(KERN_DEBUG "maciisi_start: bus busy - aborting\n");
390#endif
391 return -EBUSY;
392 }
393
394 /* Okay, send */
395#ifdef DEBUG_MACIISI_ADB
396 printk(KERN_DEBUG "maciisi_start: sending\n");
397#endif
398 /* Set state to active */
399 via[B] |= TIP;
400 /* ACK off */
401 via[B] &= ~TACK;
402 /* Delay */
403 udelay(ADB_DELAY);
404 /* Shift out and send */
405 via[ACR] |= SR_OUT;
406 via[SR] = req->data[0];
407 data_index = 1;
408 /* ACK on */
409 via[B] |= TACK;
410 maciisi_state = sending;
411
412 return 0;
413}
414
415void
416maciisi_poll(void)
417{
418 unsigned long flags;
419
420 local_irq_save(flags);
421 if (via[IFR] & SR_INT) {
422 maciisi_interrupt(0, NULL);
423 }
424 else /* avoid calling this function too quickly in a loop */
425 udelay(ADB_DELAY);
426
427 local_irq_restore(flags);
428}
429
430/* Shift register interrupt - this is *supposed* to mean that the
431 register is either full or empty. In practice, I have no idea what
432 it means :( */
433static irqreturn_t
434maciisi_interrupt(int irq, void* arg)
435{
436 int status;
437 struct adb_request *req;
438#ifdef DEBUG_MACIISI_ADB
439 static int dump_reply = 0;
440#endif
441 int i;
442 unsigned long flags;
443
444 local_irq_save(flags);
445
446 status = via[B] & (TIP|TREQ);
447#ifdef DEBUG_MACIISI_ADB
448 printk(KERN_DEBUG "state %d status %x ifr %x\n", maciisi_state, status, via[IFR]);
449#endif
450
451 if (!(via[IFR] & SR_INT)) {
452 /* Shouldn't happen, we hope */
453 printk(KERN_ERR "maciisi_interrupt: called without interrupt flag set\n");
454 local_irq_restore(flags);
455 return IRQ_NONE;
456 }
457
458 /* Clear the interrupt */
459 /* via[IFR] = SR_INT; */
460
461 switch_start:
462 switch (maciisi_state) {
463 case idle:
464 if (status & TIP)
465 printk(KERN_ERR "maciisi_interrupt: state is idle but TIP asserted!\n");
466
467 if(!reading_reply)
468 udelay(ADB_DELAY);
469 /* Shift in */
470 via[ACR] &= ~SR_OUT;
471 /* Signal start of frame */
472 via[B] |= TIP;
473 /* Clear the interrupt (throw this value on the floor, it's useless) */
474 tmp = via[SR];
475 /* ACK adb chip, high-low */
476 via[B] |= TACK;
477 udelay(ADB_DELAY);
478 via[B] &= ~TACK;
479 reply_len = 0;
480 maciisi_state = reading;
481 if (reading_reply) {
482 reply_ptr = current_req->reply;
483 } else {
484 reply_ptr = maciisi_rbuf;
485 }
486 break;
487
488 case sending:
489 /* via[SR]; */
490 /* Set ACK off */
491 via[B] &= ~TACK;
492 req = current_req;
493
494 if (!(status & TREQ)) {
495 /* collision */
496 printk(KERN_ERR "maciisi_interrupt: send collision\n");
497 /* Set idle and input */
498 via[ACR] &= ~SR_OUT;
499 tmp = via[SR];
500 via[B] &= ~TIP;
501 /* Must re-send */
502 reading_reply = 0;
503 reply_len = 0;
504 maciisi_state = idle;
505 udelay(ADB_DELAY);
506 /* process this now, because the IFR has been cleared */
507 goto switch_start;
508 }
509
510 udelay(ADB_DELAY);
511
512 if (data_index >= req->nbytes) {
513 /* Sent the whole packet, put the bus back in idle state */
514 /* Shift in, we are about to read a reply (hopefully) */
515 via[ACR] &= ~SR_OUT;
516 tmp = via[SR];
517 /* End of frame */
518 via[B] &= ~TIP;
519 req->sent = 1;
520 maciisi_state = idle;
521 if (req->reply_expected) {
522 /* Note: only set this once we've
523 successfully sent the packet */
524 reading_reply = 1;
525 } else {
526 current_req = req->next;
527 if (req->done)
528 (*req->done)(req);
529 /* Do any queued requests now */
530 i = maciisi_start();
531 if(i == 0 && need_sync) {
532 /* Packet needs to be synced */
533 maciisi_sync(current_req);
534 }
535 if(i != -EBUSY)
536 need_sync = 0;
537 }
538 } else {
539 /* Sending more stuff */
540 /* Shift out */
541 via[ACR] |= SR_OUT;
542 /* Write */
543 via[SR] = req->data[data_index++];
544 /* Signal 'byte ready' */
545 via[B] |= TACK;
546 }
547 break;
548
549 case reading:
550 /* Shift in */
551 /* via[ACR] &= ~SR_OUT; */ /* Not in 2.2 */
552 if (reply_len++ > 16) {
553 printk(KERN_ERR "maciisi_interrupt: reply too long, aborting read\n");
554 via[B] |= TACK;
555 udelay(ADB_DELAY);
556 via[B] &= ~(TACK|TIP);
557 maciisi_state = idle;
558 i = maciisi_start();
559 if(i == 0 && need_sync) {
560 /* Packet needs to be synced */
561 maciisi_sync(current_req);
562 }
563 if(i != -EBUSY)
564 need_sync = 0;
565 break;
566 }
567 /* Read data */
568 *reply_ptr++ = via[SR];
569 status = via[B] & (TIP|TREQ);
570 /* ACK on/off */
571 via[B] |= TACK;
572 udelay(ADB_DELAY);
573 via[B] &= ~TACK;
574 if (!(status & TREQ))
575 break; /* more stuff to deal with */
576
577 /* end of frame */
578 via[B] &= ~TIP;
579 tmp = via[SR]; /* That's what happens in 2.2 */
580 udelay(ADB_DELAY); /* Give controller time to recover */
581
582 /* end of packet, deal with it */
583 if (reading_reply) {
584 req = current_req;
585 req->reply_len = reply_ptr - req->reply;
586 if (req->data[0] == ADB_PACKET) {
587 /* Have to adjust the reply from ADB commands */
588 if (req->reply_len <= 2 || (req->reply[1] & 2) != 0) {
589 /* the 0x2 bit indicates no response */
590 req->reply_len = 0;
591 } else {
592 /* leave just the command and result bytes in the reply */
593 req->reply_len -= 2;
594 memmove(req->reply, req->reply + 2, req->reply_len);
595 }
596 }
597#ifdef DEBUG_MACIISI_ADB
598 if (dump_reply) {
599 int i;
600 printk(KERN_DEBUG "maciisi_interrupt: reply is ");
601 for (i = 0; i < req->reply_len; ++i)
602 printk(" %.2x", req->reply[i]);
603 printk("\n");
604 }
605#endif
606 req->complete = 1;
607 current_req = req->next;
608 if (req->done)
609 (*req->done)(req);
610 /* Obviously, we got it */
611 reading_reply = 0;
612 } else {
613 maciisi_input(maciisi_rbuf, reply_ptr - maciisi_rbuf);
614 }
615 maciisi_state = idle;
616 status = via[B] & (TIP|TREQ);
617 if (!(status & TREQ)) {
618 /* Timeout?! More likely, another packet coming in already */
619#ifdef DEBUG_MACIISI_ADB
620 printk(KERN_DEBUG "extra data after packet: status %x ifr %x\n",
621 status, via[IFR]);
622#endif
623#if 0
624 udelay(ADB_DELAY);
625 via[B] |= TIP;
626
627 maciisi_state = reading;
628 reading_reply = 0;
629 reply_ptr = maciisi_rbuf;
630#else
631 /* Process the packet now */
632 reading_reply = 0;
633 goto switch_start;
634#endif
635 /* We used to do this... but the controller might actually have data for us */
636 /* maciisi_stfu(); */
637 }
638 else {
639 /* Do any queued requests now if possible */
640 i = maciisi_start();
641 if(i == 0 && need_sync) {
642 /* Packet needs to be synced */
643 maciisi_sync(current_req);
644 }
645 if(i != -EBUSY)
646 need_sync = 0;
647 }
648 break;
649
650 default:
651 printk("maciisi_interrupt: unknown maciisi_state %d?\n", maciisi_state);
652 }
653 local_irq_restore(flags);
654 return IRQ_HANDLED;
655}
656
657static void
658maciisi_input(unsigned char *buf, int nb)
659{
660#ifdef DEBUG_MACIISI_ADB
661 int i;
662#endif
663
664 switch (buf[0]) {
665 case ADB_PACKET:
666 adb_input(buf+2, nb-2, buf[1] & 0x40);
667 break;
668 default:
669#ifdef DEBUG_MACIISI_ADB
670 printk(KERN_DEBUG "data from IIsi ADB (%d bytes):", nb);
671 for (i = 0; i < nb; ++i)
672 printk(" %.2x", buf[i]);
673 printk("\n");
674#endif
675 break;
676 }
677}
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index 56e9a4732ef0..c14fd6b65b5a 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -2,9 +2,10 @@ ccflags-y := $(call cc-disable-warning, unused-const-variable)
2ccflags-$(CONFIG_PPC_WERROR) += -Werror 2ccflags-$(CONFIG_PPC_WERROR) += -Werror
3 3
4cxl-y += main.o file.o irq.o fault.o native.o 4cxl-y += main.o file.o irq.o fault.o native.o
5cxl-y += context.o sysfs.o debugfs.o pci.o trace.o 5cxl-y += context.o sysfs.o pci.o trace.o
6cxl-y += vphb.o phb.o api.o 6cxl-y += vphb.o phb.o api.o
7cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o 7cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o
8cxl-$(CONFIG_DEBUG_FS) += debugfs.o
8obj-$(CONFIG_CXL) += cxl.o 9obj-$(CONFIG_CXL) += cxl.o
9obj-$(CONFIG_CXL_BASE) += base.o 10obj-$(CONFIG_CXL_BASE) += base.o
10 11
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 1b35e33d2434..bcc030eacab7 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -11,7 +11,6 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/file.h> 12#include <linux/file.h>
13#include <misc/cxl.h> 13#include <misc/cxl.h>
14#include <asm/pnv-pci.h>
15#include <linux/msi.h> 14#include <linux/msi.h>
16#include <linux/module.h> 15#include <linux/module.h>
17#include <linux/mount.h> 16#include <linux/mount.h>
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b24d76723fb0..6c722d96b775 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -418,6 +418,8 @@ struct cxl_afu {
418 struct dentry *debugfs; 418 struct dentry *debugfs;
419 struct mutex contexts_lock; 419 struct mutex contexts_lock;
420 spinlock_t afu_cntl_lock; 420 spinlock_t afu_cntl_lock;
421 /* Used to block access to AFU config space while deconfigured */
422 struct rw_semaphore configured_rwsem;
421 423
422 /* AFU error buffer fields and bin attribute for sysfs */ 424 /* AFU error buffer fields and bin attribute for sysfs */
423 u64 eb_len, eb_offset; 425 u64 eb_len, eb_offset;
@@ -800,12 +802,67 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count);
800void afu_release_irqs(struct cxl_context *ctx, void *cookie); 802void afu_release_irqs(struct cxl_context *ctx, void *cookie);
801void afu_irq_name_free(struct cxl_context *ctx); 803void afu_irq_name_free(struct cxl_context *ctx);
802 804
805#ifdef CONFIG_DEBUG_FS
806
803int cxl_debugfs_init(void); 807int cxl_debugfs_init(void);
804void cxl_debugfs_exit(void); 808void cxl_debugfs_exit(void);
805int cxl_debugfs_adapter_add(struct cxl *adapter); 809int cxl_debugfs_adapter_add(struct cxl *adapter);
806void cxl_debugfs_adapter_remove(struct cxl *adapter); 810void cxl_debugfs_adapter_remove(struct cxl *adapter);
807int cxl_debugfs_afu_add(struct cxl_afu *afu); 811int cxl_debugfs_afu_add(struct cxl_afu *afu);
808void cxl_debugfs_afu_remove(struct cxl_afu *afu); 812void cxl_debugfs_afu_remove(struct cxl_afu *afu);
813void cxl_stop_trace(struct cxl *cxl);
814void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir);
815void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir);
816void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir);
817
818#else /* CONFIG_DEBUG_FS */
819
820static inline int __init cxl_debugfs_init(void)
821{
822 return 0;
823}
824
825static inline void cxl_debugfs_exit(void)
826{
827}
828
829static inline int cxl_debugfs_adapter_add(struct cxl *adapter)
830{
831 return 0;
832}
833
834static inline void cxl_debugfs_adapter_remove(struct cxl *adapter)
835{
836}
837
838static inline int cxl_debugfs_afu_add(struct cxl_afu *afu)
839{
840 return 0;
841}
842
843static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu)
844{
845}
846
847static inline void cxl_stop_trace(struct cxl *cxl)
848{
849}
850
851static inline void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter,
852 struct dentry *dir)
853{
854}
855
856static inline void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter,
857 struct dentry *dir)
858{
859}
860
861static inline void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir)
862{
863}
864
865#endif /* CONFIG_DEBUG_FS */
809 866
810void cxl_handle_fault(struct work_struct *work); 867void cxl_handle_fault(struct work_struct *work);
811void cxl_prefault(struct cxl_context *ctx, u64 wed); 868void cxl_prefault(struct cxl_context *ctx, u64 wed);
@@ -870,12 +927,8 @@ int cxl_data_cache_flush(struct cxl *adapter);
870int cxl_afu_disable(struct cxl_afu *afu); 927int cxl_afu_disable(struct cxl_afu *afu);
871int cxl_psl_purge(struct cxl_afu *afu); 928int cxl_psl_purge(struct cxl_afu *afu);
872 929
873void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir);
874void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir);
875void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir);
876void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx); 930void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx);
877void cxl_native_err_irq_dump_regs(struct cxl *adapter); 931void cxl_native_err_irq_dump_regs(struct cxl *adapter);
878void cxl_stop_trace(struct cxl *cxl);
879int cxl_pci_vphb_add(struct cxl_afu *afu); 932int cxl_pci_vphb_add(struct cxl_afu *afu);
880void cxl_pci_vphb_remove(struct cxl_afu *afu); 933void cxl_pci_vphb_remove(struct cxl_afu *afu);
881void cxl_release_mapping(struct cxl_context *ctx); 934void cxl_release_mapping(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 62e0dfb5f15b..2a6bf1d0a3a4 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -268,7 +268,8 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
268 idr_init(&afu->contexts_idr); 268 idr_init(&afu->contexts_idr);
269 mutex_init(&afu->contexts_lock); 269 mutex_init(&afu->contexts_lock);
270 spin_lock_init(&afu->afu_cntl_lock); 270 spin_lock_init(&afu->afu_cntl_lock);
271 271 init_rwsem(&afu->configured_rwsem);
272 down_write(&afu->configured_rwsem);
272 afu->prefault_mode = CXL_PREFAULT_NONE; 273 afu->prefault_mode = CXL_PREFAULT_NONE;
273 afu->irqs_max = afu->adapter->user_irqs; 274 afu->irqs_max = afu->adapter->user_irqs;
274 275
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 80a87ab25b83..cca938845ffd 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1129,6 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
1129 if ((rc = cxl_native_register_psl_irq(afu))) 1129 if ((rc = cxl_native_register_psl_irq(afu)))
1130 goto err2; 1130 goto err2;
1131 1131
1132 up_write(&afu->configured_rwsem);
1132 return 0; 1133 return 0;
1133 1134
1134err2: 1135err2:
@@ -1141,6 +1142,7 @@ err1:
1141 1142
1142static void pci_deconfigure_afu(struct cxl_afu *afu) 1143static void pci_deconfigure_afu(struct cxl_afu *afu)
1143{ 1144{
1145 down_write(&afu->configured_rwsem);
1144 cxl_native_release_psl_irq(afu); 1146 cxl_native_release_psl_irq(afu);
1145 if (afu->adapter->native->sl_ops->release_serr_irq) 1147 if (afu->adapter->native->sl_ops->release_serr_irq)
1146 afu->adapter->native->sl_ops->release_serr_irq(afu); 1148 afu->adapter->native->sl_ops->release_serr_irq(afu);
@@ -1610,6 +1612,9 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
1610 cxl_sysfs_adapter_remove(adapter); 1612 cxl_sysfs_adapter_remove(adapter);
1611 cxl_debugfs_adapter_remove(adapter); 1613 cxl_debugfs_adapter_remove(adapter);
1612 1614
1615 /* Flush adapter datacache as its about to be removed */
1616 cxl_data_cache_flush(adapter);
1617
1613 cxl_deconfigure_adapter(adapter); 1618 cxl_deconfigure_adapter(adapter);
1614 1619
1615 device_unregister(&adapter->dev); 1620 device_unregister(&adapter->dev);
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 3519acebfdab..639a343b7836 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -76,23 +76,22 @@ static int cxl_pcie_cfg_record(u8 bus, u8 devfn)
76 return (bus << 8) + devfn; 76 return (bus << 8) + devfn;
77} 77}
78 78
79static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, 79static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
80 struct cxl_afu **_afu, int *_record)
81{ 80{
82 struct pci_controller *phb; 81 struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL;
83 struct cxl_afu *afu;
84 int record;
85 82
86 phb = pci_bus_to_host(bus); 83 return phb ? phb->private_data : NULL;
87 if (phb == NULL) 84}
88 return PCIBIOS_DEVICE_NOT_FOUND; 85
86static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
87 struct cxl_afu *afu, int *_record)
88{
89 int record;
89 90
90 afu = (struct cxl_afu *)phb->private_data;
91 record = cxl_pcie_cfg_record(bus->number, devfn); 91 record = cxl_pcie_cfg_record(bus->number, devfn);
92 if (record > afu->crs_num) 92 if (record > afu->crs_num)
93 return PCIBIOS_DEVICE_NOT_FOUND; 93 return PCIBIOS_DEVICE_NOT_FOUND;
94 94
95 *_afu = afu;
96 *_record = record; 95 *_record = record;
97 return 0; 96 return 0;
98} 97}
@@ -106,9 +105,14 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
106 u16 val16; 105 u16 val16;
107 u32 val32; 106 u32 val32;
108 107
109 rc = cxl_pcie_config_info(bus, devfn, &afu, &record); 108 afu = pci_bus_to_afu(bus);
109 /* Grab a reader lock on afu. */
110 if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
111 return PCIBIOS_DEVICE_NOT_FOUND;
112
113 rc = cxl_pcie_config_info(bus, devfn, afu, &record);
110 if (rc) 114 if (rc)
111 return rc; 115 goto out;
112 116
113 switch (len) { 117 switch (len) {
114 case 1: 118 case 1:
@@ -127,10 +131,9 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
127 WARN_ON(1); 131 WARN_ON(1);
128 } 132 }
129 133
130 if (rc) 134out:
131 return PCIBIOS_DEVICE_NOT_FOUND; 135 up_read(&afu->configured_rwsem);
132 136 return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
133 return PCIBIOS_SUCCESSFUL;
134} 137}
135 138
136static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, 139static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
@@ -139,9 +142,14 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
139 int rc, record; 142 int rc, record;
140 struct cxl_afu *afu; 143 struct cxl_afu *afu;
141 144
142 rc = cxl_pcie_config_info(bus, devfn, &afu, &record); 145 afu = pci_bus_to_afu(bus);
146 /* Grab a reader lock on afu. */
147 if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
148 return PCIBIOS_DEVICE_NOT_FOUND;
149
150 rc = cxl_pcie_config_info(bus, devfn, afu, &record);
143 if (rc) 151 if (rc)
144 return rc; 152 goto out;
145 153
146 switch (len) { 154 switch (len) {
147 case 1: 155 case 1:
@@ -157,10 +165,9 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
157 WARN_ON(1); 165 WARN_ON(1);
158 } 166 }
159 167
160 if (rc) 168out:
161 return PCIBIOS_SET_FAILED; 169 up_read(&afu->configured_rwsem);
162 170 return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
163 return PCIBIOS_SUCCESSFUL;
164} 171}
165 172
166static struct pci_ops cxl_pcie_pci_ops = 173static struct pci_ops cxl_pcie_pci_ops =
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index da346f2817a8..fc1e5d7fc1c7 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -62,6 +62,7 @@ struct cpuidle_state {
62}; 62};
63 63
64/* Idle State Flags */ 64/* Idle State Flags */
65#define CPUIDLE_FLAG_NONE (0x00)
65#define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ 66#define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */
66#define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ 67#define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */
67 68
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cac48eda1075..e0035808c814 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -871,6 +871,8 @@ struct kvm_ppc_smmu_info {
871#define KVM_CAP_S390_USER_INSTR0 130 871#define KVM_CAP_S390_USER_INSTR0 130
872#define KVM_CAP_MSI_DEVID 131 872#define KVM_CAP_MSI_DEVID 131
873#define KVM_CAP_PPC_HTM 132 873#define KVM_CAP_PPC_HTM 132
874#define KVM_CAP_PPC_MMU_RADIX 134
875#define KVM_CAP_PPC_MMU_HASH_V3 135
874 876
875#ifdef KVM_CAP_IRQ_ROUTING 877#ifdef KVM_CAP_IRQ_ROUTING
876 878
@@ -1187,6 +1189,10 @@ struct kvm_s390_ucas_mapping {
1187#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) 1189#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
1188/* Available with KVM_CAP_PPC_RTAS */ 1190/* Available with KVM_CAP_PPC_RTAS */
1189#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) 1191#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
1192/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
1193#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
1194/* Available with KVM_CAP_PPC_RADIX_MMU */
1195#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
1190 1196
1191/* ioctl for vm fd */ 1197/* ioctl for vm fd */
1192#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) 1198#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ebb4dadca66b..699c5bc51a92 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1740,6 +1740,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
1740} 1740}
1741EXPORT_SYMBOL_GPL(unregister_kprobes); 1741EXPORT_SYMBOL_GPL(unregister_kprobes);
1742 1742
1743int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
1744 unsigned long val, void *data)
1745{
1746 return NOTIFY_DONE;
1747}
1748
1743static struct notifier_block kprobe_exceptions_nb = { 1749static struct notifier_block kprobe_exceptions_nb = {
1744 .notifier_call = kprobe_exceptions_notify, 1750 .notifier_call = kprobe_exceptions_notify,
1745 .priority = 0x7fffffff /* we need to be notified first */ 1751 .priority = 0x7fffffff /* we need to be notified first */
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index a084f7a511d8..82335533620e 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -8,7 +8,7 @@ ifdef CONFIG_GCC_PLUGINS
8 8
9 gcc-plugin-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += latent_entropy_plugin.so 9 gcc-plugin-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += latent_entropy_plugin.so
10 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += -DLATENT_ENTROPY_PLUGIN 10 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += -DLATENT_ENTROPY_PLUGIN
11 ifdef CONFIG_PAX_LATENT_ENTROPY 11 ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
12 DISABLE_LATENT_ENTROPY_PLUGIN += -fplugin-arg-latent_entropy_plugin-disable 12 DISABLE_LATENT_ENTROPY_PLUGIN += -fplugin-arg-latent_entropy_plugin-disable
13 endif 13 endif
14 14
@@ -51,6 +51,14 @@ gcc-plugins-check: FORCE
51ifdef CONFIG_GCC_PLUGINS 51ifdef CONFIG_GCC_PLUGINS
52 ifeq ($(PLUGINCC),) 52 ifeq ($(PLUGINCC),)
53 ifneq ($(GCC_PLUGINS_CFLAGS),) 53 ifneq ($(GCC_PLUGINS_CFLAGS),)
54 # Various gccs between 4.5 and 5.1 have bugs on powerpc due to missing
55 # header files. gcc <= 4.6 doesn't work at all, gccs from 4.8 to 5.1 have
56 # issues with 64-bit targets.
57 ifeq ($(ARCH),powerpc)
58 ifeq ($(call cc-ifversion, -le, 0501, y), y)
59 @echo "Cannot use CONFIG_GCC_PLUGINS: plugin support on gcc <= 5.1 is buggy on powerpc, please upgrade to gcc 5.2 or newer" >&2 && exit 1
60 endif
61 endif
54 ifeq ($(call cc-ifversion, -ge, 0405, y), y) 62 ifeq ($(call cc-ifversion, -ge, 0405, y), y)
55 $(Q)$(srctree)/scripts/gcc-plugin.sh --show-error "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)" || true 63 $(Q)$(srctree)/scripts/gcc-plugin.sh --show-error "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)" || true
56 @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc installation does not support plugins, perhaps the necessary headers are missing?" >&2 && exit 1 64 @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc installation does not support plugins, perhaps the necessary headers are missing?" >&2 && exit 1