summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 12:50:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 12:50:09 -0400
commitb68e7e952f24527de62f4768b1cead91f92f5f6e (patch)
treec9c1dbc333becac5396eaef4d5971d3f4ca337e3
parentd3b5d35290d729a2518af00feca867385a1b08fa (diff)
parentd0790fb6e5bc5a6bb923de9c2be7fc210d6b689b (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky: - three merges for KVM/s390 with changes for vfio-ccw and cpacf. The patches are included in the KVM tree as well, let git sort it out. - add the new 'trng' random number generator - provide the secure key verification API for the pkey interface - introduce the z13 cpu counters to perf - add a new system call to set up the guarded storage facility - simplify TASK_SIZE and arch_get_unmapped_area - export the raw STSI data related to CPU topology to user space - ... and the usual churn of bug-fixes and cleanups. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (74 commits) s390/crypt: use the correct module alias for paes_s390. s390/cpacf: Introduce kma instruction s390/cpacf: query instructions use unique parameters for compatibility with KMA s390/trng: Introduce s390 TRNG device driver. s390/crypto: Provide s390 specific arch random functionality. s390/crypto: Add new subfunctions to the cpacf PRNO function. s390/crypto: Renaming PPNO to PRNO. s390/pageattr: avoid unnecessary page table splitting s390/mm: simplify arch_get_unmapped_area[_topdown] s390/mm: make TASK_SIZE independent from the number of page table levels s390/gs: add regset for the guarded storage broadcast control block s390/kvm: Add use_cmma field to mm_context_t s390/kvm: Add PGSTE manipulation functions vfio: ccw: improve error handling for vfio_ccw_mdev_remove vfio: ccw: remove unnecessary NULL checks of a pointer s390/spinlock: remove compare and delay instruction s390/spinlock: use atomic primitives for spinlocks s390/cpumf: simplify detection of guest samples s390/pci: remove forward declaration s390/pci: increase the PCI_NR_FUNCTIONS default ...
-rw-r--r--Documentation/s390/00-INDEX2
-rw-r--r--Documentation/s390/vfio-ccw.txt303
-rw-r--r--MAINTAINERS11
-rw-r--r--arch/s390/Kbuild2
-rw-r--r--arch/s390/Kconfig39
-rw-r--r--arch/s390/configs/default_defconfig1
-rw-r--r--arch/s390/configs/gcov_defconfig1
-rw-r--r--arch/s390/configs/performance_defconfig1
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/Makefile1
-rw-r--r--arch/s390/crypto/arch_random.c31
-rw-r--r--arch/s390/crypto/paes_s390.c2
-rw-r--r--arch/s390/crypto/prng.c42
-rw-r--r--arch/s390/include/asm/Kbuild6
-rw-r--r--arch/s390/include/asm/archrandom.h69
-rw-r--r--arch/s390/include/asm/atomic_ops.h22
-rw-r--r--arch/s390/include/asm/bitops.h13
-rw-r--r--arch/s390/include/asm/cio.h18
-rw-r--r--arch/s390/include/asm/cpacf.h56
-rw-r--r--arch/s390/include/asm/cpu_mf.h6
-rw-r--r--arch/s390/include/asm/div64.h1
-rw-r--r--arch/s390/include/asm/elf.h1
-rw-r--r--arch/s390/include/asm/emergency-restart.h6
-rw-r--r--arch/s390/include/asm/facility.h6
-rw-r--r--arch/s390/include/asm/irq_regs.h1
-rw-r--r--arch/s390/include/asm/isc.h1
-rw-r--r--arch/s390/include/asm/kmap_types.h6
-rw-r--r--arch/s390/include/asm/local.h1
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/s390/include/asm/lowcore.h9
-rw-r--r--arch/s390/include/asm/mman.h4
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/nmi.h12
-rw-r--r--arch/s390/include/asm/page-states.h19
-rw-r--r--arch/s390/include/asm/perf_event.h4
-rw-r--r--arch/s390/include/asm/pgtable.h16
-rw-r--r--arch/s390/include/asm/pkey.h21
-rw-r--r--arch/s390/include/asm/processor.h14
-rw-r--r--arch/s390/include/asm/setup.h6
-rw-r--r--arch/s390/include/asm/sparsemem.h2
-rw-r--r--arch/s390/include/asm/spinlock.h45
-rw-r--r--arch/s390/include/asm/spinlock_types.h6
-rw-r--r--arch/s390/include/asm/switch_to.h3
-rw-r--r--arch/s390/include/asm/sysinfo.h12
-rw-r--r--arch/s390/include/asm/thread_info.h12
-rw-r--r--arch/s390/include/uapi/asm/Kbuild19
-rw-r--r--arch/s390/include/uapi/asm/errno.h11
-rw-r--r--arch/s390/include/uapi/asm/fcntl.h1
-rw-r--r--arch/s390/include/uapi/asm/guarded_storage.h77
-rw-r--r--arch/s390/include/uapi/asm/ioctl.h1
-rw-r--r--arch/s390/include/uapi/asm/mman.h6
-rw-r--r--arch/s390/include/uapi/asm/param.h6
-rw-r--r--arch/s390/include/uapi/asm/pkey.h19
-rw-r--r--arch/s390/include/uapi/asm/poll.h1
-rw-r--r--arch/s390/include/uapi/asm/resource.h13
-rw-r--r--arch/s390/include/uapi/asm/sockios.h6
-rw-r--r--arch/s390/include/uapi/asm/termbits.h6
-rw-r--r--arch/s390/include/uapi/asm/unistd.h2
-rw-r--r--arch/s390/kernel/Makefile6
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/compat_wrapper.c1
-rw-r--r--arch/s390/kernel/crash_dump.c15
-rw-r--r--arch/s390/kernel/early.c66
-rw-r--r--arch/s390/kernel/entry.S28
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/guarded_storage.c128
-rw-r--r--arch/s390/kernel/head.S1
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/kdebugfs.c15
-rw-r--r--arch/s390/kernel/machine_kexec.c13
-rw-r--r--arch/s390/kernel/nmi.c19
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c128
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c148
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c7
-rw-r--r--arch/s390/kernel/process.c7
-rw-r--r--arch/s390/kernel/processor.c16
-rw-r--r--arch/s390/kernel/ptrace.c132
-rw-r--r--arch/s390/kernel/setup.c18
-rw-r--r--arch/s390/kernel/smp.c43
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kernel/sysinfo.c98
-rw-r--r--arch/s390/kernel/topology.c6
-rw-r--r--arch/s390/kvm/interrupt.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c6
-rw-r--r--arch/s390/lib/spinlock.c84
-rw-r--r--arch/s390/mm/gmap.c37
-rw-r--r--arch/s390/mm/gup.c2
-rw-r--r--arch/s390/mm/mmap.c84
-rw-r--r--arch/s390/mm/page-states.c3
-rw-r--r--arch/s390/mm/pageattr.c10
-rw-r--r--arch/s390/mm/pgalloc.c4
-rw-r--r--arch/s390/mm/pgtable.c153
-rw-r--r--arch/s390/pci/pci.c22
-rw-r--r--drivers/char/hw_random/Kconfig14
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/s390-trng.c268
-rw-r--r--drivers/iommu/Kconfig8
-rw-r--r--drivers/s390/block/dasd_3990_erp.c5
-rw-r--r--drivers/s390/block/dasd_eckd.c16
-rw-r--r--drivers/s390/block/dasd_int.h2
-rw-r--r--drivers/s390/cio/Makefile3
-rw-r--r--drivers/s390/cio/cio.c69
-rw-r--r--drivers/s390/cio/cio.h1
-rw-r--r--drivers/s390/cio/device_fsm.c54
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c842
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.h42
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c308
-rw-r--r--drivers/s390/cio/vfio_ccw_fsm.c203
-rw-r--r--drivers/s390/cio/vfio_ccw_ops.c425
-rw-r--r--drivers/s390/cio/vfio_ccw_private.h96
-rw-r--r--drivers/s390/crypto/pkey_api.c64
-rw-r--r--include/uapi/linux/elf.h2
-rw-r--r--include/uapi/linux/vfio.h18
-rw-r--r--include/uapi/linux/vfio_ccw.h24
115 files changed, 4223 insertions, 561 deletions
diff --git a/Documentation/s390/00-INDEX b/Documentation/s390/00-INDEX
index 9189535f6cd2..317f0378ae01 100644
--- a/Documentation/s390/00-INDEX
+++ b/Documentation/s390/00-INDEX
@@ -22,5 +22,7 @@ qeth.txt
22 - HiperSockets Bridge Port Support. 22 - HiperSockets Bridge Port Support.
23s390dbf.txt 23s390dbf.txt
24 - information on using the s390 debug feature. 24 - information on using the s390 debug feature.
25vfio-ccw.txt
26 information on the vfio-ccw I/O subchannel driver.
25zfcpdump.txt 27zfcpdump.txt
26 - information on the s390 SCSI dump tool. 28 - information on the s390 SCSI dump tool.
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.txt
new file mode 100644
index 000000000000..90b3dfead81b
--- /dev/null
+++ b/Documentation/s390/vfio-ccw.txt
@@ -0,0 +1,303 @@
1vfio-ccw: the basic infrastructure
2==================================
3
4Introduction
5------------
6
7Here we describe the vfio support for I/O subchannel devices for
8Linux/s390. Motivation for vfio-ccw is to passthrough subchannels to a
9virtual machine, while vfio is the means.
10
11Different than other hardware architectures, s390 has defined a unified
12I/O access method, which is so called Channel I/O. It has its own access
13patterns:
14- Channel programs run asynchronously on a separate (co)processor.
15- The channel subsystem will access any memory designated by the caller
16 in the channel program directly, i.e. there is no iommu involved.
17Thus when we introduce vfio support for these devices, we realize it
18with a mediated device (mdev) implementation. The vfio mdev will be
19added to an iommu group, so as to make itself able to be managed by the
20vfio framework. And we add read/write callbacks for special vfio I/O
21regions to pass the channel programs from the mdev to its parent device
22(the real I/O subchannel device) to do further address translation and
23to perform I/O instructions.
24
25This document does not intend to explain the s390 I/O architecture in
26every detail. More information/reference could be found here:
27- A good start to know Channel I/O in general:
28 https://en.wikipedia.org/wiki/Channel_I/O
29- s390 architecture:
30 s390 Principles of Operation manual (IBM Form. No. SA22-7832)
31- The existing Qemu code which implements a simple emulated channel
32 subsystem could also be a good reference. It makes it easier to follow
33 the flow.
34 qemu/hw/s390x/css.c
35
36For vfio mediated device framework:
37- Documentation/vfio-mediated-device.txt
38
39Motivation of vfio-ccw
40----------------------
41
42Currently, a guest virtualized via qemu/kvm on s390 only sees
43paravirtualized virtio devices via the "Virtio Over Channel I/O
44(virtio-ccw)" transport. This makes virtio devices discoverable via
45standard operating system algorithms for handling channel devices.
46
47However this is not enough. On s390 for the majority of devices, which
48use the standard Channel I/O based mechanism, we also need to provide
49the functionality of passing through them to a Qemu virtual machine.
50This includes devices that don't have a virtio counterpart (e.g. tape
51drives) or that have specific characteristics which guests want to
52exploit.
53
54For passing a device to a guest, we want to use the same interface as
55everybody else, namely vfio. Thus, we would like to introduce vfio
56support for channel devices. And we would like to name this new vfio
57device "vfio-ccw".
58
59Access patterns of CCW devices
60------------------------------
61
62s390 architecture has implemented a so called channel subsystem, that
63provides a unified view of the devices physically attached to the
64systems. Though the s390 hardware platform knows about a huge variety of
65different peripheral attachments like disk devices (aka. DASDs), tapes,
66communication controllers, etc. They can all be accessed by a well
67defined access method and they are presenting I/O completion a unified
68way: I/O interruptions.
69
70All I/O requires the use of channel command words (CCWs). A CCW is an
71instruction to a specialized I/O channel processor. A channel program is
72a sequence of CCWs which are executed by the I/O channel subsystem. To
73issue a channel program to the channel subsystem, it is required to
74build an operation request block (ORB), which can be used to point out
75the format of the CCW and other control information to the system. The
76operating system signals the I/O channel subsystem to begin executing
77the channel program with a SSCH (start sub-channel) instruction. The
78central processor is then free to proceed with non-I/O instructions
79until interrupted. The I/O completion result is received by the
80interrupt handler in the form of interrupt response block (IRB).
81
82Back to vfio-ccw, in short:
83- ORBs and channel programs are built in guest kernel (with guest
84 physical addresses).
85- ORBs and channel programs are passed to the host kernel.
86- Host kernel translates the guest physical addresses to real addresses
87 and starts the I/O with issuing a privileged Channel I/O instruction
88 (e.g SSCH).
89- channel programs run asynchronously on a separate processor.
90- I/O completion will be signaled to the host with I/O interruptions.
91 And it will be copied as IRB to user space to pass it back to the
92 guest.
93
94Physical vfio ccw device and its child mdev
95-------------------------------------------
96
97As mentioned above, we realize vfio-ccw with a mdev implementation.
98
99Channel I/O does not have IOMMU hardware support, so the physical
100vfio-ccw device does not have an IOMMU level translation or isolation.
101
102Sub-channel I/O instructions are all privileged instructions, When
103handling the I/O instruction interception, vfio-ccw has the software
104policing and translation how the channel program is programmed before
105it gets sent to hardware.
106
107Within this implementation, we have two drivers for two types of
108devices:
109- The vfio_ccw driver for the physical subchannel device.
110 This is an I/O subchannel driver for the real subchannel device. It
111 realizes a group of callbacks and registers to the mdev framework as a
112 parent (physical) device. As a consequence, mdev provides vfio_ccw a
113 generic interface (sysfs) to create mdev devices. A vfio mdev could be
114 created by vfio_ccw then and added to the mediated bus. It is the vfio
115 device that added to an IOMMU group and a vfio group.
116 vfio_ccw also provides an I/O region to accept channel program
117 request from user space and store I/O interrupt result for user
118 space to retrieve. To notify user space an I/O completion, it offers
119 an interface to setup an eventfd fd for asynchronous signaling.
120
121- The vfio_mdev driver for the mediated vfio ccw device.
122 This is provided by the mdev framework. It is a vfio device driver for
123 the mdev that created by vfio_ccw.
124 It realize a group of vfio device driver callbacks, adds itself to a
125 vfio group, and registers itself to the mdev framework as a mdev
126 driver.
127 It uses a vfio iommu backend that uses the existing map and unmap
128 ioctls, but rather than programming them into an IOMMU for a device,
129 it simply stores the translations for use by later requests. This
130 means that a device programmed in a VM with guest physical addresses
131 can have the vfio kernel convert that address to process virtual
132 address, pin the page and program the hardware with the host physical
133 address in one step.
134 For a mdev, the vfio iommu backend will not pin the pages during the
135 VFIO_IOMMU_MAP_DMA ioctl. Mdev framework will only maintain a database
136 of the iova<->vaddr mappings in this operation. And they export a
137 vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
138 backend for the physical devices to pin and unpin pages by demand.
139
140Below is a high Level block diagram.
141
142 +-------------+
143 | |
144 | +---------+ | mdev_register_driver() +--------------+
145 | | Mdev | +<-----------------------+ |
146 | | bus | | | vfio_mdev.ko |
147 | | driver | +----------------------->+ |<-> VFIO user
148 | +---------+ | probe()/remove() +--------------+ APIs
149 | |
150 | MDEV CORE |
151 | MODULE |
152 | mdev.ko |
153 | +---------+ | mdev_register_device() +--------------+
154 | |Physical | +<-----------------------+ |
155 | | device | | | vfio_ccw.ko |<-> subchannel
156 | |interface| +----------------------->+ | device
157 | +---------+ | callback +--------------+
158 +-------------+
159
160The process of how these work together.
1611. vfio_ccw.ko drives the physical I/O subchannel, and registers the
162 physical device (with callbacks) to mdev framework.
163 When vfio_ccw probing the subchannel device, it registers device
164 pointer and callbacks to the mdev framework. Mdev related file nodes
165 under the device node in sysfs would be created for the subchannel
166 device, namely 'mdev_create', 'mdev_destroy' and
167 'mdev_supported_types'.
1682. Create a mediated vfio ccw device.
169 Use the 'mdev_create' sysfs file, we need to manually create one (and
170 only one for our case) mediated device.
1713. vfio_mdev.ko drives the mediated ccw device.
172 vfio_mdev is also the vfio device drvier. It will probe the mdev and
173 add it to an iommu_group and a vfio_group. Then we could pass through
174 the mdev to a guest.
175
176vfio-ccw I/O region
177-------------------
178
179An I/O region is used to accept channel program request from user
180space and store I/O interrupt result for user space to retrieve. The
181defination of the region is:
182
183struct ccw_io_region {
184#define ORB_AREA_SIZE 12
185 __u8 orb_area[ORB_AREA_SIZE];
186#define SCSW_AREA_SIZE 12
187 __u8 scsw_area[SCSW_AREA_SIZE];
188#define IRB_AREA_SIZE 96
189 __u8 irb_area[IRB_AREA_SIZE];
190 __u32 ret_code;
191} __packed;
192
193While starting an I/O request, orb_area should be filled with the
194guest ORB, and scsw_area should be filled with the SCSW of the Virtual
195Subchannel.
196
197irb_area stores the I/O result.
198
199ret_code stores a return code for each access of the region.
200
201vfio-ccw patches overview
202-------------------------
203
204For now, our patches are rebased on the latest mdev implementation.
205vfio-ccw follows what vfio-pci did on the s390 paltform and uses
206vfio-iommu-type1 as the vfio iommu backend. It's a good start to launch
207the code review for vfio-ccw. Note that the implementation is far from
208complete yet; but we'd like to get feedback for the general
209architecture.
210
211* CCW translation APIs
212- Description:
213 These introduce a group of APIs (start with 'cp_') to do CCW
214 translation. The CCWs passed in by a user space program are
215 organized with their guest physical memory addresses. These APIs
216 will copy the CCWs into the kernel space, and assemble a runnable
217 kernel channel program by updating the guest physical addresses with
218 their corresponding host physical addresses.
219- Patches:
220 vfio: ccw: introduce channel program interfaces
221
222* vfio_ccw device driver
223- Description:
224 The following patches utilizes the CCW translation APIs and introduce
225 vfio_ccw, which is the driver for the I/O subchannel devices you want
226 to pass through.
227 vfio_ccw implements the following vfio ioctls:
228 VFIO_DEVICE_GET_INFO
229 VFIO_DEVICE_GET_IRQ_INFO
230 VFIO_DEVICE_GET_REGION_INFO
231 VFIO_DEVICE_RESET
232 VFIO_DEVICE_SET_IRQS
233 This provides an I/O region, so that the user space program can pass a
234 channel program to the kernel, to do further CCW translation before
235 issuing them to a real device.
236 This also provides the SET_IRQ ioctl to setup an event notifier to
237 notify the user space program the I/O completion in an asynchronous
238 way.
239- Patches:
240 vfio: ccw: basic implementation for vfio_ccw driver
241 vfio: ccw: introduce ccw_io_region
242 vfio: ccw: realize VFIO_DEVICE_GET_REGION_INFO ioctl
243 vfio: ccw: realize VFIO_DEVICE_RESET ioctl
244 vfio: ccw: realize VFIO_DEVICE_G(S)ET_IRQ_INFO ioctls
245
246The user of vfio-ccw is not limited to Qemu, while Qemu is definitely a
247good example to get understand how these patches work. Here is a little
248bit more detail how an I/O request triggered by the Qemu guest will be
249handled (without error handling).
250
251Explanation:
252Q1-Q7: Qemu side process.
253K1-K5: Kernel side process.
254
255Q1. Get I/O region info during initialization.
256Q2. Setup event notifier and handler to handle I/O completion.
257
258... ...
259
260Q3. Intercept a ssch instruction.
261Q4. Write the guest channel program and ORB to the I/O region.
262 K1. Copy from guest to kernel.
263 K2. Translate the guest channel program to a host kernel space
264 channel program, which becomes runnable for a real device.
265 K3. With the necessary information contained in the orb passed in
266 by Qemu, issue the ccwchain to the device.
267 K4. Return the ssch CC code.
268Q5. Return the CC code to the guest.
269
270... ...
271
272 K5. Interrupt handler gets the I/O result and write the result to
273 the I/O region.
274 K6. Signal Qemu to retrieve the result.
275Q6. Get the signal and event handler reads out the result from the I/O
276 region.
277Q7. Update the irb for the guest.
278
279Limitations
280-----------
281
282The current vfio-ccw implementation focuses on supporting basic commands
283needed to implement block device functionality (read/write) of DASD/ECKD
284device only. Some commands may need special handling in the future, for
285example, anything related to path grouping.
286
287DASD is a kind of storage device. While ECKD is a data recording format.
288More information for DASD and ECKD could be found here:
289https://en.wikipedia.org/wiki/Direct-access_storage_device
290https://en.wikipedia.org/wiki/Count_key_data
291
292Together with the corresponding work in Qemu, we can bring the passed
293through DASD/ECKD device online in a guest now and use it as a block
294device.
295
296Reference
297---------
2981. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
2992. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
3003. https://en.wikipedia.org/wiki/Channel_I/O
3014. Documentation/s390/cds.txt
3025. Documentation/vfio.txt
3036. Documentation/vfio-mediated-device.txt
diff --git a/MAINTAINERS b/MAINTAINERS
index 33ecf266570f..5f91365ebc0d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7201,6 +7201,7 @@ S: Supported
7201F: Documentation/s390/kvm.txt 7201F: Documentation/s390/kvm.txt
7202F: arch/s390/include/asm/kvm* 7202F: arch/s390/include/asm/kvm*
7203F: arch/s390/kvm/ 7203F: arch/s390/kvm/
7204F: arch/s390/mm/gmap.c
7204 7205
7205KERNEL VIRTUAL MACHINE (KVM) FOR ARM 7206KERNEL VIRTUAL MACHINE (KVM) FOR ARM
7206M: Christoffer Dall <christoffer.dall@linaro.org> 7207M: Christoffer Dall <christoffer.dall@linaro.org>
@@ -10896,6 +10897,16 @@ W: http://www.ibm.com/developerworks/linux/linux390/
10896S: Supported 10897S: Supported
10897F: drivers/iommu/s390-iommu.c 10898F: drivers/iommu/s390-iommu.c
10898 10899
10900S390 VFIO-CCW DRIVER
10901M: Cornelia Huck <cornelia.huck@de.ibm.com>
10902M: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
10903L: linux-s390@vger.kernel.org
10904L: kvm@vger.kernel.org
10905S: Supported
10906F: drivers/s390/cio/vfio_ccw*
10907F: Documentation/s390/vfio-ccw.txt
10908F: include/uapi/linux/vfio_ccw.h
10909
10899S3C24XX SD/MMC Driver 10910S3C24XX SD/MMC Driver
10900M: Ben Dooks <ben-linux@fluff.org> 10911M: Ben Dooks <ben-linux@fluff.org>
10901L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 10912L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index e256592eb66e..eae2c64cf69d 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -1,7 +1,7 @@
1obj-y += kernel/ 1obj-y += kernel/
2obj-y += mm/ 2obj-y += mm/
3obj-$(CONFIG_KVM) += kvm/ 3obj-$(CONFIG_KVM) += kvm/
4obj-$(CONFIG_CRYPTO_HW) += crypto/ 4obj-y += crypto/
5obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ 5obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
6obj-$(CONFIG_APPLDATA_BASE) += appldata/ 6obj-$(CONFIG_APPLDATA_BASE) += appldata/
7obj-y += net/ 7obj-y += net/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b8b143432381..e161fafb495b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -105,6 +105,7 @@ config S390
105 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE 105 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
106 select ARCH_SAVE_PAGE_KEYS if HIBERNATION 106 select ARCH_SAVE_PAGE_KEYS if HIBERNATION
107 select ARCH_SUPPORTS_ATOMIC_RMW 107 select ARCH_SUPPORTS_ATOMIC_RMW
108 select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
108 select ARCH_SUPPORTS_NUMA_BALANCING 109 select ARCH_SUPPORTS_NUMA_BALANCING
109 select ARCH_USE_BUILTIN_BSWAP 110 select ARCH_USE_BUILTIN_BSWAP
110 select ARCH_USE_CMPXCHG_LOCKREF 111 select ARCH_USE_CMPXCHG_LOCKREF
@@ -123,7 +124,6 @@ config S390
123 select GENERIC_TIME_VSYSCALL 124 select GENERIC_TIME_VSYSCALL
124 select HAVE_ALIGNED_STRUCT_PAGE if SLUB 125 select HAVE_ALIGNED_STRUCT_PAGE if SLUB
125 select HAVE_ARCH_AUDITSYSCALL 126 select HAVE_ARCH_AUDITSYSCALL
126 select HAVE_ARCH_EARLY_PFN_TO_NID
127 select HAVE_ARCH_JUMP_LABEL 127 select HAVE_ARCH_JUMP_LABEL
128 select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES 128 select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
129 select HAVE_ARCH_SECCOMP_FILTER 129 select HAVE_ARCH_SECCOMP_FILTER
@@ -506,6 +506,21 @@ source kernel/Kconfig.preempt
506 506
507source kernel/Kconfig.hz 507source kernel/Kconfig.hz
508 508
509config ARCH_RANDOM
510 def_bool y
511 prompt "s390 architectural random number generation API"
512 help
513 Enable the s390 architectural random number generation API
514 to provide random data for all consumers within the Linux
515 kernel.
516
517 When enabled the arch_random_* functions declared in linux/random.h
518 are implemented. The implementation is based on the s390 CPACF
519 instruction subfunction TRNG which provides a real true random
520 number generator.
521
522 If unsure, say Y.
523
509endmenu 524endmenu
510 525
511menu "Memory setup" 526menu "Memory setup"
@@ -536,6 +551,16 @@ config FORCE_MAX_ZONEORDER
536 551
537source "mm/Kconfig" 552source "mm/Kconfig"
538 553
554config MAX_PHYSMEM_BITS
555 int "Maximum size of supported physical memory in bits (42-53)"
556 range 42 53
557 default "46"
558 help
559 This option specifies the maximum supported size of physical memory
560 in bits. Supported is any size between 2^42 (4TB) and 2^53 (8PB).
561 Increasing the number of bits also increases the kernel image size.
562 By default 46 bits (64TB) are supported.
563
539config PACK_STACK 564config PACK_STACK
540 def_bool y 565 def_bool y
541 prompt "Pack kernel stack" 566 prompt "Pack kernel stack"
@@ -613,7 +638,7 @@ if PCI
613config PCI_NR_FUNCTIONS 638config PCI_NR_FUNCTIONS
614 int "Maximum number of PCI functions (1-4096)" 639 int "Maximum number of PCI functions (1-4096)"
615 range 1 4096 640 range 1 4096
616 default "64" 641 default "128"
617 help 642 help
618 This allows you to specify the maximum number of PCI functions which 643 This allows you to specify the maximum number of PCI functions which
619 this kernel will support. 644 this kernel will support.
@@ -671,6 +696,16 @@ config EADM_SCH
671 To compile this driver as a module, choose M here: the 696 To compile this driver as a module, choose M here: the
672 module will be called eadm_sch. 697 module will be called eadm_sch.
673 698
699config VFIO_CCW
700 def_tristate n
701 prompt "Support for VFIO-CCW subchannels"
702 depends on S390_CCW_IOMMU && VFIO_MDEV
703 help
704 This driver allows usage of I/O subchannels via VFIO-CCW.
705
706 To compile this driver as a module, choose M here: the
707 module will be called vfio_ccw.
708
674endmenu 709endmenu
675 710
676menu "Dump support" 711menu "Dump support"
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 4b176fe83da4..a5039fa89314 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -73,6 +73,7 @@ CONFIG_ZSWAP=y
73CONFIG_ZBUD=m 73CONFIG_ZBUD=m
74CONFIG_ZSMALLOC=m 74CONFIG_ZSMALLOC=m
75CONFIG_ZSMALLOC_STAT=y 75CONFIG_ZSMALLOC_STAT=y
76CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
76CONFIG_IDLE_PAGE_TRACKING=y 77CONFIG_IDLE_PAGE_TRACKING=y
77CONFIG_PCI=y 78CONFIG_PCI=y
78CONFIG_PCI_DEBUG=y 79CONFIG_PCI_DEBUG=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 0de46cc397f6..83970b5afb2b 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -72,6 +72,7 @@ CONFIG_ZSWAP=y
72CONFIG_ZBUD=m 72CONFIG_ZBUD=m
73CONFIG_ZSMALLOC=m 73CONFIG_ZSMALLOC=m
74CONFIG_ZSMALLOC_STAT=y 74CONFIG_ZSMALLOC_STAT=y
75CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
75CONFIG_IDLE_PAGE_TRACKING=y 76CONFIG_IDLE_PAGE_TRACKING=y
76CONFIG_PCI=y 77CONFIG_PCI=y
77CONFIG_HOTPLUG_PCI=y 78CONFIG_HOTPLUG_PCI=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index e167557b434c..fbc6542aaf59 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -70,6 +70,7 @@ CONFIG_ZSWAP=y
70CONFIG_ZBUD=m 70CONFIG_ZBUD=m
71CONFIG_ZSMALLOC=m 71CONFIG_ZSMALLOC=m
72CONFIG_ZSMALLOC_STAT=y 72CONFIG_ZSMALLOC_STAT=y
73CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
73CONFIG_IDLE_PAGE_TRACKING=y 74CONFIG_IDLE_PAGE_TRACKING=y
74CONFIG_PCI=y 75CONFIG_PCI=y
75CONFIG_HOTPLUG_PCI=y 76CONFIG_HOTPLUG_PCI=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 4366a3e3e754..e23d97c13735 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -35,7 +35,6 @@ CONFIG_SCSI_ENCLOSURE=y
35CONFIG_SCSI_CONSTANTS=y 35CONFIG_SCSI_CONSTANTS=y
36CONFIG_SCSI_LOGGING=y 36CONFIG_SCSI_LOGGING=y
37CONFIG_SCSI_FC_ATTRS=y 37CONFIG_SCSI_FC_ATTRS=y
38CONFIG_SCSI_SRP_ATTRS=y
39CONFIG_ZFCP=y 38CONFIG_ZFCP=y
40# CONFIG_INPUT_MOUSEDEV_PSAUX is not set 39# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
41# CONFIG_INPUT_KEYBOARD is not set 40# CONFIG_INPUT_KEYBOARD is not set
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 402c530c6da5..678d9863e3f0 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -10,5 +10,6 @@ obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o
10obj-$(CONFIG_S390_PRNG) += prng.o 10obj-$(CONFIG_S390_PRNG) += prng.o
11obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o 11obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
12obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o 12obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
13obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
13 14
14crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o 15crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
new file mode 100644
index 000000000000..9317b3e645e2
--- /dev/null
+++ b/arch/s390/crypto/arch_random.c
@@ -0,0 +1,31 @@
1/*
2 * s390 arch random implementation.
3 *
4 * Copyright IBM Corp. 2017
5 * Author(s): Harald Freudenberger <freude@de.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/kernel.h>
14#include <linux/atomic.h>
15#include <linux/static_key.h>
16#include <asm/cpacf.h>
17
18DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
19
20atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
21EXPORT_SYMBOL(s390_arch_random_counter);
22
23static int __init s390_arch_random_init(void)
24{
25 /* check if subfunction CPACF_PRNO_TRNG is available */
26 if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
27 static_branch_enable(&s390_arch_random_available);
28
29 return 0;
30}
31arch_initcall(s390_arch_random_init);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 716b17238599..a4e903ed7e21 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -616,7 +616,7 @@ out_err:
616module_init(paes_s390_init); 616module_init(paes_s390_init);
617module_exit(paes_s390_fini); 617module_exit(paes_s390_fini);
618 618
619MODULE_ALIAS_CRYPTO("aes-all"); 619MODULE_ALIAS_CRYPTO("paes");
620 620
621MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys"); 621MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
622MODULE_LICENSE("GPL"); 622MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 5a3ec04a7082..3e47c4a0f18b 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -81,7 +81,7 @@ struct prng_ws_s {
81 u64 byte_counter; 81 u64 byte_counter;
82}; 82};
83 83
84struct ppno_ws_s { 84struct prno_ws_s {
85 u32 res; 85 u32 res;
86 u32 reseed_counter; 86 u32 reseed_counter;
87 u64 stream_bytes; 87 u64 stream_bytes;
@@ -93,7 +93,7 @@ struct prng_data_s {
93 struct mutex mutex; 93 struct mutex mutex;
94 union { 94 union {
95 struct prng_ws_s prngws; 95 struct prng_ws_s prngws;
96 struct ppno_ws_s ppnows; 96 struct prno_ws_s prnows;
97 }; 97 };
98 u8 *buf; 98 u8 *buf;
99 u32 rest; 99 u32 rest;
@@ -306,12 +306,12 @@ static int __init prng_sha512_selftest(void)
306 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 }; 306 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
307 307
308 u8 buf[sizeof(random)]; 308 u8 buf[sizeof(random)];
309 struct ppno_ws_s ws; 309 struct prno_ws_s ws;
310 310
311 memset(&ws, 0, sizeof(ws)); 311 memset(&ws, 0, sizeof(ws));
312 312
313 /* initial seed */ 313 /* initial seed */
314 cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, 314 cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
315 &ws, NULL, 0, seed, sizeof(seed)); 315 &ws, NULL, 0, seed, sizeof(seed));
316 316
317 /* check working states V and C */ 317 /* check working states V and C */
@@ -324,9 +324,9 @@ static int __init prng_sha512_selftest(void)
324 } 324 }
325 325
326 /* generate random bytes */ 326 /* generate random bytes */
327 cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, 327 cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
328 &ws, buf, sizeof(buf), NULL, 0); 328 &ws, buf, sizeof(buf), NULL, 0);
329 cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, 329 cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
330 &ws, buf, sizeof(buf), NULL, 0); 330 &ws, buf, sizeof(buf), NULL, 0);
331 331
332 /* check against expected data */ 332 /* check against expected data */
@@ -374,16 +374,16 @@ static int __init prng_sha512_instantiate(void)
374 /* followed by 16 bytes of unique nonce */ 374 /* followed by 16 bytes of unique nonce */
375 get_tod_clock_ext(seed + 64 + 32); 375 get_tod_clock_ext(seed + 64 + 32);
376 376
377 /* initial seed of the ppno drng */ 377 /* initial seed of the prno drng */
378 cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, 378 cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
379 &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); 379 &prng_data->prnows, NULL, 0, seed, sizeof(seed));
380 380
381 /* if fips mode is enabled, generate a first block of random 381 /* if fips mode is enabled, generate a first block of random
382 bytes for the FIPS 140-2 Conditional Self Test */ 382 bytes for the FIPS 140-2 Conditional Self Test */
383 if (fips_enabled) { 383 if (fips_enabled) {
384 prng_data->prev = prng_data->buf + prng_chunk_size; 384 prng_data->prev = prng_data->buf + prng_chunk_size;
385 cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, 385 cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
386 &prng_data->ppnows, 386 &prng_data->prnows,
387 prng_data->prev, prng_chunk_size, NULL, 0); 387 prng_data->prev, prng_chunk_size, NULL, 0);
388 } 388 }
389 389
@@ -412,9 +412,9 @@ static int prng_sha512_reseed(void)
412 if (ret != sizeof(seed)) 412 if (ret != sizeof(seed))
413 return ret; 413 return ret;
414 414
415 /* do a reseed of the ppno drng with this bytestring */ 415 /* do a reseed of the prno drng with this bytestring */
416 cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, 416 cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
417 &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); 417 &prng_data->prnows, NULL, 0, seed, sizeof(seed));
418 418
419 return 0; 419 return 0;
420} 420}
@@ -425,15 +425,15 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes)
425 int ret; 425 int ret;
426 426
427 /* reseed needed ? */ 427 /* reseed needed ? */
428 if (prng_data->ppnows.reseed_counter > prng_reseed_limit) { 428 if (prng_data->prnows.reseed_counter > prng_reseed_limit) {
429 ret = prng_sha512_reseed(); 429 ret = prng_sha512_reseed();
430 if (ret) 430 if (ret)
431 return ret; 431 return ret;
432 } 432 }
433 433
434 /* PPNO generate */ 434 /* PRNO generate */
435 cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, 435 cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
436 &prng_data->ppnows, buf, nbytes, NULL, 0); 436 &prng_data->prnows, buf, nbytes, NULL, 0);
437 437
438 /* FIPS 140-2 Conditional Self Test */ 438 /* FIPS 140-2 Conditional Self Test */
439 if (fips_enabled) { 439 if (fips_enabled) {
@@ -653,7 +653,7 @@ static ssize_t prng_counter_show(struct device *dev,
653 if (mutex_lock_interruptible(&prng_data->mutex)) 653 if (mutex_lock_interruptible(&prng_data->mutex))
654 return -ERESTARTSYS; 654 return -ERESTARTSYS;
655 if (prng_mode == PRNG_MODE_SHA512) 655 if (prng_mode == PRNG_MODE_SHA512)
656 counter = prng_data->ppnows.stream_bytes; 656 counter = prng_data->prnows.stream_bytes;
657 else 657 else
658 counter = prng_data->prngws.byte_counter; 658 counter = prng_data->prngws.byte_counter;
659 mutex_unlock(&prng_data->mutex); 659 mutex_unlock(&prng_data->mutex);
@@ -774,8 +774,8 @@ static int __init prng_init(void)
774 774
775 /* choose prng mode */ 775 /* choose prng mode */
776 if (prng_mode != PRNG_MODE_TDES) { 776 if (prng_mode != PRNG_MODE_TDES) {
777 /* check for MSA5 support for PPNO operations */ 777 /* check for MSA5 support for PRNO operations */
778 if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) { 778 if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
779 if (prng_mode == PRNG_MODE_SHA512) { 779 if (prng_mode == PRNG_MODE_SHA512) {
780 pr_err("The prng module cannot " 780 pr_err("The prng module cannot "
781 "start in SHA-512 mode\n"); 781 "start in SHA-512 mode\n");
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 8aea32fe8bd2..7e3481eb2174 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,8 +1,14 @@
1generic-y += asm-offsets.h 1generic-y += asm-offsets.h
2generic-y += clkdev.h 2generic-y += clkdev.h
3generic-y += dma-contiguous.h 3generic-y += dma-contiguous.h
4generic-y += div64.h
5generic-y += emergency-restart.h
4generic-y += export.h 6generic-y += export.h
7generic-y += irq_regs.h
5generic-y += irq_work.h 8generic-y += irq_work.h
9generic-y += kmap_types.h
10generic-y += local.h
11generic-y += local64.h
6generic-y += mcs_spinlock.h 12generic-y += mcs_spinlock.h
7generic-y += mm-arch-hooks.h 13generic-y += mm-arch-hooks.h
8generic-y += preempt.h 14generic-y += preempt.h
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
new file mode 100644
index 000000000000..6033901a40b2
--- /dev/null
+++ b/arch/s390/include/asm/archrandom.h
@@ -0,0 +1,69 @@
1/*
2 * Kernel interface for the s390 arch_random_* functions
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author: Harald Freudenberger <freude@de.ibm.com>
7 *
8 */
9
10#ifndef _ASM_S390_ARCHRANDOM_H
11#define _ASM_S390_ARCHRANDOM_H
12
13#ifdef CONFIG_ARCH_RANDOM
14
15#include <linux/static_key.h>
16#include <linux/atomic.h>
17#include <asm/cpacf.h>
18
19DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
20extern atomic64_t s390_arch_random_counter;
21
22static void s390_arch_random_generate(u8 *buf, unsigned int nbytes)
23{
24 cpacf_trng(NULL, 0, buf, nbytes);
25 atomic64_add(nbytes, &s390_arch_random_counter);
26}
27
28static inline bool arch_has_random(void)
29{
30 if (static_branch_likely(&s390_arch_random_available))
31 return true;
32 return false;
33}
34
35static inline bool arch_has_random_seed(void)
36{
37 return arch_has_random();
38}
39
40static inline bool arch_get_random_long(unsigned long *v)
41{
42 if (static_branch_likely(&s390_arch_random_available)) {
43 s390_arch_random_generate((u8 *)v, sizeof(*v));
44 return true;
45 }
46 return false;
47}
48
49static inline bool arch_get_random_int(unsigned int *v)
50{
51 if (static_branch_likely(&s390_arch_random_available)) {
52 s390_arch_random_generate((u8 *)v, sizeof(*v));
53 return true;
54 }
55 return false;
56}
57
58static inline bool arch_get_random_seed_long(unsigned long *v)
59{
60 return arch_get_random_long(v);
61}
62
63static inline bool arch_get_random_seed_int(unsigned int *v)
64{
65 return arch_get_random_int(v);
66}
67
68#endif /* CONFIG_ARCH_RANDOM */
69#endif /* _ASM_S390_ARCHRANDOM_H */
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index ac9e2b939d04..ba6d29412344 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -111,20 +111,22 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
111 111
112static inline int __atomic_cmpxchg(int *ptr, int old, int new) 112static inline int __atomic_cmpxchg(int *ptr, int old, int new)
113{ 113{
114 asm volatile( 114 return __sync_val_compare_and_swap(ptr, old, new);
115 " cs %[old],%[new],%[ptr]" 115}
116 : [old] "+d" (old), [ptr] "+Q" (*ptr) 116
117 : [new] "d" (new) : "cc", "memory"); 117static inline int __atomic_cmpxchg_bool(int *ptr, int old, int new)
118 return old; 118{
119 return __sync_bool_compare_and_swap(ptr, old, new);
119} 120}
120 121
121static inline long __atomic64_cmpxchg(long *ptr, long old, long new) 122static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
122{ 123{
123 asm volatile( 124 return __sync_val_compare_and_swap(ptr, old, new);
124 " csg %[old],%[new],%[ptr]" 125}
125 : [old] "+d" (old), [ptr] "+Q" (*ptr) 126
126 : [new] "d" (new) : "cc", "memory"); 127static inline long __atomic64_cmpxchg_bool(long *ptr, long old, long new)
127 return old; 128{
129 return __sync_bool_compare_and_swap(ptr, old, new);
128} 130}
129 131
130#endif /* __ARCH_S390_ATOMIC_OPS__ */ 132#endif /* __ARCH_S390_ATOMIC_OPS__ */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index d92047da5ccb..99902b7b9f0c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -15,14 +15,6 @@
15 * end up numbered: 15 * end up numbered:
16 * |63..............0|127............64|191...........128|255...........192| 16 * |63..............0|127............64|191...........128|255...........192|
17 * 17 *
18 * There are a few little-endian macros used mostly for filesystem
19 * bitmaps, these work on similar bit array layouts, but byte-oriented:
20 * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
21 *
22 * The main difference is that bit 3-5 in the bit number field needs to be
23 * reversed compared to the big-endian bit fields. This can be achieved by
24 * XOR with 0x38.
25 *
26 * We also have special functions which work with an MSB0 encoding. 18 * We also have special functions which work with an MSB0 encoding.
27 * The bits are numbered: 19 * The bits are numbered:
28 * |0..............63|64............127|128...........191|192...........255| 20 * |0..............63|64............127|128...........191|192...........255|
@@ -253,6 +245,11 @@ unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
253unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, 245unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
254 unsigned long offset); 246 unsigned long offset);
255 247
248#define for_each_set_bit_inv(bit, addr, size) \
249 for ((bit) = find_first_bit_inv((addr), (size)); \
250 (bit) < (size); \
251 (bit) = find_next_bit_inv((addr), (size), (bit) + 1))
252
256static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr) 253static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
257{ 254{
258 return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); 255 return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index f7ed88cc066e..7a38ca85190b 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -33,6 +33,24 @@ struct ccw1 {
33 __u32 cda; 33 __u32 cda;
34} __attribute__ ((packed,aligned(8))); 34} __attribute__ ((packed,aligned(8)));
35 35
36/**
37 * struct ccw0 - channel command word
38 * @cmd_code: command code
39 * @cda: data address
40 * @flags: flags, like IDA addressing, etc.
41 * @reserved: will be ignored
42 * @count: byte count
43 *
44 * The format-0 ccw structure.
45 */
46struct ccw0 {
47 __u8 cmd_code;
48 __u32 cda : 24;
49 __u8 flags;
50 __u8 reserved;
51 __u16 count;
52} __packed __aligned(8);
53
36#define CCW_FLAG_DC 0x80 54#define CCW_FLAG_DC 0x80
37#define CCW_FLAG_CC 0x40 55#define CCW_FLAG_CC 0x40
38#define CCW_FLAG_SLI 0x20 56#define CCW_FLAG_SLI 0x20
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index e2dfbf280d12..e06f2556b316 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -25,7 +25,8 @@
25#define CPACF_KMO 0xb92b /* MSA4 */ 25#define CPACF_KMO 0xb92b /* MSA4 */
26#define CPACF_PCC 0xb92c /* MSA4 */ 26#define CPACF_PCC 0xb92c /* MSA4 */
27#define CPACF_KMCTR 0xb92d /* MSA4 */ 27#define CPACF_KMCTR 0xb92d /* MSA4 */
28#define CPACF_PPNO 0xb93c /* MSA5 */ 28#define CPACF_PRNO 0xb93c /* MSA5 */
29#define CPACF_KMA 0xb929 /* MSA8 */
29 30
30/* 31/*
31 * En/decryption modifier bits 32 * En/decryption modifier bits
@@ -123,12 +124,14 @@
123#define CPACF_PCKMO_ENC_AES_256_KEY 0x14 124#define CPACF_PCKMO_ENC_AES_256_KEY 0x14
124 125
125/* 126/*
126 * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) 127 * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
127 * instruction 128 * instruction
128 */ 129 */
129#define CPACF_PPNO_QUERY 0x00 130#define CPACF_PRNO_QUERY 0x00
130#define CPACF_PPNO_SHA512_DRNG_GEN 0x03 131#define CPACF_PRNO_SHA512_DRNG_GEN 0x03
131#define CPACF_PPNO_SHA512_DRNG_SEED 0x83 132#define CPACF_PRNO_SHA512_DRNG_SEED 0x83
133#define CPACF_PRNO_TRNG_Q_R2C_RATIO 0x70
134#define CPACF_PRNO_TRNG 0x72
132 135
133typedef struct { unsigned char bytes[16]; } cpacf_mask_t; 136typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
134 137
@@ -149,8 +152,8 @@ static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
149 152
150 asm volatile( 153 asm volatile(
151 " spm 0\n" /* pckmo doesn't change the cc */ 154 " spm 0\n" /* pckmo doesn't change the cc */
152 /* Parameter registers are ignored, but may not be 0 */ 155 /* Parameter regs are ignored, but must be nonzero and unique */
153 "0: .insn rrf,%[opc] << 16,2,2,2,0\n" 156 "0: .insn rrf,%[opc] << 16,2,4,6,0\n"
154 " brc 1,0b\n" /* handle partial completion */ 157 " brc 1,0b\n" /* handle partial completion */
155 : "=m" (*mask) 158 : "=m" (*mask)
156 : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode) 159 : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
@@ -173,7 +176,7 @@ static inline int __cpacf_check_opcode(unsigned int opcode)
173 case CPACF_PCC: 176 case CPACF_PCC:
174 case CPACF_KMCTR: 177 case CPACF_KMCTR:
175 return test_facility(77); /* check for MSA4 */ 178 return test_facility(77); /* check for MSA4 */
176 case CPACF_PPNO: 179 case CPACF_PRNO:
177 return test_facility(57); /* check for MSA5 */ 180 return test_facility(57); /* check for MSA5 */
178 default: 181 default:
179 BUG(); 182 BUG();
@@ -373,18 +376,18 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
373} 376}
374 377
375/** 378/**
376 * cpacf_ppno() - executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) 379 * cpacf_prno() - executes the PRNO (PERFORM RANDOM NUMBER OPERATION)
377 * instruction 380 * instruction
378 * @func: the function code passed to PPNO; see CPACF_PPNO_xxx defines 381 * @func: the function code passed to PRNO; see CPACF_PRNO_xxx defines
379 * @param: address of parameter block; see POP for details on each func 382 * @param: address of parameter block; see POP for details on each func
380 * @dest: address of destination memory area 383 * @dest: address of destination memory area
381 * @dest_len: size of destination memory area in bytes 384 * @dest_len: size of destination memory area in bytes
382 * @seed: address of seed data 385 * @seed: address of seed data
383 * @seed_len: size of seed data in bytes 386 * @seed_len: size of seed data in bytes
384 */ 387 */
385static inline void cpacf_ppno(unsigned long func, void *param, 388static inline void cpacf_prno(unsigned long func, void *param,
386 u8 *dest, long dest_len, 389 u8 *dest, unsigned long dest_len,
387 const u8 *seed, long seed_len) 390 const u8 *seed, unsigned long seed_len)
388{ 391{
389 register unsigned long r0 asm("0") = (unsigned long) func; 392 register unsigned long r0 asm("0") = (unsigned long) func;
390 register unsigned long r1 asm("1") = (unsigned long) param; 393 register unsigned long r1 asm("1") = (unsigned long) param;
@@ -398,7 +401,32 @@ static inline void cpacf_ppno(unsigned long func, void *param,
398 " brc 1,0b\n" /* handle partial completion */ 401 " brc 1,0b\n" /* handle partial completion */
399 : [dst] "+a" (r2), [dlen] "+d" (r3) 402 : [dst] "+a" (r2), [dlen] "+d" (r3)
400 : [fc] "d" (r0), [pba] "a" (r1), 403 : [fc] "d" (r0), [pba] "a" (r1),
401 [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO) 404 [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO)
405 : "cc", "memory");
406}
407
408/**
409 * cpacf_trng() - executes the TRNG subfunction of the PRNO instruction
410 * @ucbuf: buffer for unconditioned data
411 * @ucbuf_len: amount of unconditioned data to fetch in bytes
412 * @cbuf: buffer for conditioned data
413 * @cbuf_len: amount of conditioned data to fetch in bytes
414 */
415static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
416 u8 *cbuf, unsigned long cbuf_len)
417{
418 register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG;
419 register unsigned long r2 asm("2") = (unsigned long) ucbuf;
420 register unsigned long r3 asm("3") = (unsigned long) ucbuf_len;
421 register unsigned long r4 asm("4") = (unsigned long) cbuf;
422 register unsigned long r5 asm("5") = (unsigned long) cbuf_len;
423
424 asm volatile (
425 "0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
426 " brc 1,0b\n" /* handle partial completion */
427 : [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3),
428 [cbuf] "+a" (r4), [cbuflen] "+d" (r5)
429 : [fc] "d" (r0), [opc] "i" (CPACF_PRNO)
402 : "cc", "memory"); 430 : "cc", "memory");
403} 431}
404 432
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index d1e0707310fd..05480e4cc5ca 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -20,9 +20,11 @@
20#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ 20#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */
21#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ 21#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */
22#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ 22#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */
23#define CPU_MF_INT_CF_MTDA (1 << 15) /* loss of MT ctr. data alert */
23#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ 24#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */
24#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ 25#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */
25#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) 26#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_MTDA|CPU_MF_INT_CF_CACA| \
27 CPU_MF_INT_CF_LCDA)
26#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ 28#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \
27 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ 29 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \
28 CPU_MF_INT_SF_LSDA) 30 CPU_MF_INT_SF_LSDA)
@@ -172,7 +174,7 @@ static inline int lcctl(u64 ctl)
172/* Extract CPU counter */ 174/* Extract CPU counter */
173static inline int __ecctr(u64 ctr, u64 *content) 175static inline int __ecctr(u64 ctr, u64 *content)
174{ 176{
175 register u64 _content asm("4") = 0; 177 u64 _content;
176 int cc; 178 int cc;
177 179
178 asm volatile ( 180 asm volatile (
diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/s390/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/div64.h>
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1d48880b3cc1..e8f623041769 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -105,6 +105,7 @@
105#define HWCAP_S390_VXRS 2048 105#define HWCAP_S390_VXRS 2048
106#define HWCAP_S390_VXRS_BCD 4096 106#define HWCAP_S390_VXRS_BCD 4096
107#define HWCAP_S390_VXRS_EXT 8192 107#define HWCAP_S390_VXRS_EXT 8192
108#define HWCAP_S390_GS 16384
108 109
109/* Internal bits, not exposed via elf */ 110/* Internal bits, not exposed via elf */
110#define HWCAP_INT_SIE 1UL 111#define HWCAP_INT_SIE 1UL
diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/arch/s390/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_EMERGENCY_RESTART_H
2#define _ASM_EMERGENCY_RESTART_H
3
4#include <asm-generic/emergency-restart.h>
5
6#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 09b406db7529..cb60d5c5755d 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -8,14 +8,11 @@
8#define __ASM_FACILITY_H 8#define __ASM_FACILITY_H
9 9
10#include <generated/facilities.h> 10#include <generated/facilities.h>
11
12#ifndef __ASSEMBLY__
13
14#include <linux/string.h> 11#include <linux/string.h>
15#include <linux/preempt.h> 12#include <linux/preempt.h>
16#include <asm/lowcore.h> 13#include <asm/lowcore.h>
17 14
18#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ 15#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8)
19 16
20static inline int __test_facility(unsigned long nr, void *facilities) 17static inline int __test_facility(unsigned long nr, void *facilities)
21{ 18{
@@ -72,5 +69,4 @@ static inline void stfle(u64 *stfle_fac_list, int size)
72 preempt_enable(); 69 preempt_enable();
73} 70}
74 71
75#endif /* __ASSEMBLY__ */
76#endif /* __ASM_FACILITY_H */ 72#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/s390/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
index 68d7d68300f2..8a0b721a9b8d 100644
--- a/arch/s390/include/asm/isc.h
+++ b/arch/s390/include/asm/isc.h
@@ -16,6 +16,7 @@
16#define CONSOLE_ISC 1 /* console I/O subchannel */ 16#define CONSOLE_ISC 1 /* console I/O subchannel */
17#define EADM_SCH_ISC 4 /* EADM subchannels */ 17#define EADM_SCH_ISC 4 /* EADM subchannels */
18#define CHSC_SCH_ISC 7 /* CHSC subchannels */ 18#define CHSC_SCH_ISC 7 /* CHSC subchannels */
19#define VFIO_CCW_ISC IO_SCH_ISC /* VFIO-CCW I/O subchannels */
19/* Adapter interrupts. */ 20/* Adapter interrupts. */
20#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ 21#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */
21#define PCI_ISC 2 /* PCI I/O subchannels */ 22#define PCI_ISC 2 /* PCI I/O subchannels */
diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h
deleted file mode 100644
index 0a88622339ee..000000000000
--- a/arch/s390/include/asm/kmap_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_KMAP_TYPES_H
2#define _ASM_KMAP_TYPES_H
3
4#include <asm-generic/kmap_types.h>
5
6#endif
diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/s390/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/local.h>
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/s390/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/local64.h>
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 61261e0e95c0..8a5b082797f8 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,8 +157,8 @@ struct lowcore {
157 __u64 stfle_fac_list[32]; /* 0x0f00 */ 157 __u64 stfle_fac_list[32]; /* 0x0f00 */
158 __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ 158 __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
159 159
160 /* Pointer to vector register save area */ 160 /* Pointer to the machine check extended save area */
161 __u64 vector_save_area_addr; /* 0x11b0 */ 161 __u64 mcesad; /* 0x11b0 */
162 162
163 /* 64 bit extparam used for pfault/diag 250: defined by architecture */ 163 /* 64 bit extparam used for pfault/diag 250: defined by architecture */
164 __u64 ext_params2; /* 0x11B8 */ 164 __u64 ext_params2; /* 0x11B8 */
@@ -182,10 +182,7 @@ struct lowcore {
182 182
183 /* Transaction abort diagnostic block */ 183 /* Transaction abort diagnostic block */
184 __u8 pgm_tdb[256]; /* 0x1800 */ 184 __u8 pgm_tdb[256]; /* 0x1800 */
185 __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ 185 __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
186
187 /* Software defined save area for vector registers */
188 __u8 vector_save_area[1024]; /* 0x1c00 */
189} __packed; 186} __packed;
190 187
191#define S390_lowcore (*((struct lowcore *) 0)) 188#define S390_lowcore (*((struct lowcore *) 0))
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index b55a59e1d134..b79813d9cf68 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -8,8 +8,4 @@
8 8
9#include <uapi/asm/mman.h> 9#include <uapi/asm/mman.h>
10 10
11#ifndef __ASSEMBLY__
12int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
13#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags)
14#endif
15#endif /* __S390_MMAN_H__ */ 11#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bea785d7f853..bd6f30304518 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -22,6 +22,8 @@ typedef struct {
22 unsigned int has_pgste:1; 22 unsigned int has_pgste:1;
23 /* The mmu context uses storage keys. */ 23 /* The mmu context uses storage keys. */
24 unsigned int use_skey:1; 24 unsigned int use_skey:1;
25 /* The mmu context uses CMMA. */
26 unsigned int use_cmma:1;
25} mm_context_t; 27} mm_context_t;
26 28
27#define INIT_MM_CONTEXT(name) \ 29#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fa2bf69be182..8712e11bead4 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -28,6 +28,7 @@ static inline int init_new_context(struct task_struct *tsk,
28 mm->context.alloc_pgste = page_table_allocate_pgste; 28 mm->context.alloc_pgste = page_table_allocate_pgste;
29 mm->context.has_pgste = 0; 29 mm->context.has_pgste = 0;
30 mm->context.use_skey = 0; 30 mm->context.use_skey = 0;
31 mm->context.use_cmma = 0;
31#endif 32#endif
32 switch (mm->context.asce_limit) { 33 switch (mm->context.asce_limit) {
33 case 1UL << 42: 34 case 1UL << 42:
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b75fd910386a..e3e8895f5d3e 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -58,7 +58,9 @@ union mci {
58 u64 ie : 1; /* 32 indirect storage error */ 58 u64 ie : 1; /* 32 indirect storage error */
59 u64 ar : 1; /* 33 access register validity */ 59 u64 ar : 1; /* 33 access register validity */
60 u64 da : 1; /* 34 delayed access exception */ 60 u64 da : 1; /* 34 delayed access exception */
61 u64 : 7; /* 35-41 */ 61 u64 : 1; /* 35 */
62 u64 gs : 1; /* 36 guarded storage registers */
63 u64 : 5; /* 37-41 */
62 u64 pr : 1; /* 42 tod programmable register validity */ 64 u64 pr : 1; /* 42 tod programmable register validity */
63 u64 fc : 1; /* 43 fp control register validity */ 65 u64 fc : 1; /* 43 fp control register validity */
64 u64 ap : 1; /* 44 ancillary report */ 66 u64 ap : 1; /* 44 ancillary report */
@@ -69,6 +71,14 @@ union mci {
69 }; 71 };
70}; 72};
71 73
74#define MCESA_ORIGIN_MASK (~0x3ffUL)
75#define MCESA_LC_MASK (0xfUL)
76
77struct mcesa {
78 u8 vector_save_area[1024];
79 u8 guarded_storage_save_area[32];
80};
81
72struct pt_regs; 82struct pt_regs;
73 83
74extern void s390_handle_mcck(void); 84extern void s390_handle_mcck(void);
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
new file mode 100644
index 000000000000..42267a2fe29e
--- /dev/null
+++ b/arch/s390/include/asm/page-states.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright IBM Corp. 2017
3 * Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
4 */
5
6#ifndef PAGE_STATES_H
7#define PAGE_STATES_H
8
9#define ESSA_GET_STATE 0
10#define ESSA_SET_STABLE 1
11#define ESSA_SET_UNUSED 2
12#define ESSA_SET_VOLATILE 3
13#define ESSA_SET_POT_VOLATILE 4
14#define ESSA_SET_STABLE_RESIDENT 5
15#define ESSA_SET_STABLE_IF_RESIDENT 6
16
17#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT
18
19#endif
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index c64c0befd3f3..dd32beb9d30c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Performance event support - s390 specific definitions. 2 * Performance event support - s390 specific definitions.
3 * 3 *
4 * Copyright IBM Corp. 2009, 2013 4 * Copyright IBM Corp. 2009, 2017
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 * Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 6 * Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
7 */ 7 */
@@ -47,7 +47,7 @@ struct perf_sf_sde_regs {
47}; 47};
48 48
49/* Perf PMU definitions for the counter facility */ 49/* Perf PMU definitions for the counter facility */
50#define PERF_CPUM_CF_MAX_CTR 256 50#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
51 51
52/* Perf PMU definitions for the sampling facility */ 52/* Perf PMU definitions for the sampling facility */
53#define PERF_CPUM_SF_MAX_CTR 2 53#define PERF_CPUM_SF_MAX_CTR 2
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ecec682bb516..e6e3b887bee3 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -372,10 +372,12 @@ static inline int is_module_addr(void *addr)
372#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ 372#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
373 373
374/* Guest Page State used for virtualization */ 374/* Guest Page State used for virtualization */
375#define _PGSTE_GPS_ZERO 0x0000000080000000UL 375#define _PGSTE_GPS_ZERO 0x0000000080000000UL
376#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL 376#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
377#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL 377#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
378#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL 378#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
379#define _PGSTE_GPS_USAGE_POT_VOLATILE 0x0000000002000000UL
380#define _PGSTE_GPS_USAGE_VOLATILE _PGSTE_GPS_USAGE_MASK
379 381
380/* 382/*
381 * A user page table pointer has the space-switch-event bit, the 383 * A user page table pointer has the space-switch-event bit, the
@@ -1041,6 +1043,12 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
1041int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 1043int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
1042 unsigned char *key); 1044 unsigned char *key);
1043 1045
1046int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
1047 unsigned long bits, unsigned long value);
1048int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
1049int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
1050 unsigned long *oldpte, unsigned long *oldpgste);
1051
1044/* 1052/*
1045 * Certain architectures need to do special things when PTEs 1053 * Certain architectures need to do special things when PTEs
1046 * within a page table are directly modified. Thus, the following 1054 * within a page table are directly modified. Thus, the following
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index b48aef4188f6..4c484590d858 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -87,4 +87,25 @@ int pkey_findcard(const struct pkey_seckey *seckey,
87int pkey_skey2pkey(const struct pkey_seckey *seckey, 87int pkey_skey2pkey(const struct pkey_seckey *seckey,
88 struct pkey_protkey *protkey); 88 struct pkey_protkey *protkey);
89 89
90/*
91 * Verify the given secure key for being able to be useable with
92 * the pkey module. Check for correct key type and check for having at
93 * least one crypto card being able to handle this key (master key
94 * or old master key verification pattern matches).
95 * Return some info about the key: keysize in bits, keytype (currently
96 * only AES), flag if key is wrapped with an old MKVP.
97 * @param seckey pointer to buffer with the input secure key
98 * @param pcardnr pointer to cardnr, receives the card number on success
99 * @param pdomain pointer to domain, receives the domain number on success
100 * @param pkeysize pointer to keysize, receives the bitsize of the key
101 * @param pattributes pointer to attributes, receives additional info
102 * PKEY_VERIFY_ATTR_AES if the key is an AES key
103 * PKEY_VERIFY_ATTR_OLD_MKVP if key has old mkvp stored in
104 * @return 0 on success, negative errno value on failure. If no card could
105 * be found which is able to handle this key, -ENODEV is returned.
106 */
107int pkey_verifykey(const struct pkey_seckey *seckey,
108 u16 *pcardnr, u16 *pdomain,
109 u16 *pkeysize, u32 *pattributes);
110
90#endif /* _KAPI_PKEY_H */ 111#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e4988710aa86..60d395fdc864 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -91,14 +91,15 @@ extern void execve_tail(void);
91 * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. 91 * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
92 */ 92 */
93 93
94#define TASK_SIZE_OF(tsk) ((tsk)->mm ? \ 94#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
95 (tsk)->mm->context.asce_limit : TASK_MAX_SIZE) 95 (1UL << 31) : (1UL << 53))
96#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ 96#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
97 (1UL << 30) : (1UL << 41)) 97 (1UL << 30) : (1UL << 41))
98#define TASK_SIZE TASK_SIZE_OF(current) 98#define TASK_SIZE TASK_SIZE_OF(current)
99#define TASK_MAX_SIZE (1UL << 53) 99#define TASK_SIZE_MAX (1UL << 53)
100 100
101#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) 101#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
102 (1UL << 31) : (1UL << 42))
102#define STACK_TOP_MAX (1UL << 42) 103#define STACK_TOP_MAX (1UL << 42)
103 104
104#define HAVE_ARCH_PICK_MMAP_LAYOUT 105#define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -135,6 +136,8 @@ struct thread_struct {
135 struct list_head list; 136 struct list_head list;
136 /* cpu runtime instrumentation */ 137 /* cpu runtime instrumentation */
137 struct runtime_instr_cb *ri_cb; 138 struct runtime_instr_cb *ri_cb;
139 struct gs_cb *gs_cb; /* Current guarded storage cb */
140 struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
138 unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ 141 unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
139 /* 142 /*
140 * Warning: 'fpu' is dynamically-sized. It *MUST* be at 143 * Warning: 'fpu' is dynamically-sized. It *MUST* be at
@@ -215,6 +218,9 @@ void show_cacheinfo(struct seq_file *m);
215/* Free all resources held by a thread. */ 218/* Free all resources held by a thread. */
216extern void release_thread(struct task_struct *); 219extern void release_thread(struct task_struct *);
217 220
221/* Free guarded storage control block for current */
222void exit_thread_gs(void);
223
218/* 224/*
219 * Return saved PC of a blocked thread. 225 * Return saved PC of a blocked thread.
220 */ 226 */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 30bdb5a027f3..cd78155b1829 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,8 @@
29#define MACHINE_FLAG_TE _BITUL(11) 29#define MACHINE_FLAG_TE _BITUL(11)
30#define MACHINE_FLAG_TLB_LC _BITUL(12) 30#define MACHINE_FLAG_TLB_LC _BITUL(12)
31#define MACHINE_FLAG_VX _BITUL(13) 31#define MACHINE_FLAG_VX _BITUL(13)
32#define MACHINE_FLAG_CAD _BITUL(14) 32#define MACHINE_FLAG_NX _BITUL(14)
33#define MACHINE_FLAG_NX _BITUL(15) 33#define MACHINE_FLAG_GS _BITUL(15)
34 34
35#define LPP_MAGIC _BITUL(31) 35#define LPP_MAGIC _BITUL(31)
36#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) 36#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -68,8 +68,8 @@ extern void detect_memory_memblock(void);
68#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) 68#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
69#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) 69#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
70#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) 70#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
71#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
72#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) 71#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
72#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
73 73
74/* 74/*
75 * Console mode. Override with conmode= 75 * Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
index 487428b6d099..334e279f1bce 100644
--- a/arch/s390/include/asm/sparsemem.h
+++ b/arch/s390/include/asm/sparsemem.h
@@ -2,6 +2,6 @@
2#define _ASM_S390_SPARSEMEM_H 2#define _ASM_S390_SPARSEMEM_H
3 3
4#define SECTION_SIZE_BITS 28 4#define SECTION_SIZE_BITS 28
5#define MAX_PHYSMEM_BITS 46 5#define MAX_PHYSMEM_BITS CONFIG_MAX_PHYSMEM_BITS
6 6
7#endif /* _ASM_S390_SPARSEMEM_H */ 7#endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index ffc45048ea7d..f7838ecd83c6 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,7 @@
10#define __ASM_SPINLOCK_H 10#define __ASM_SPINLOCK_H
11 11
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <asm/atomic_ops.h>
13#include <asm/barrier.h> 14#include <asm/barrier.h>
14#include <asm/processor.h> 15#include <asm/processor.h>
15 16
@@ -17,12 +18,6 @@
17 18
18extern int spin_retry; 19extern int spin_retry;
19 20
20static inline int
21_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
22{
23 return __sync_bool_compare_and_swap(lock, old, new);
24}
25
26#ifndef CONFIG_SMP 21#ifndef CONFIG_SMP
27static inline bool arch_vcpu_is_preempted(int cpu) { return false; } 22static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
28#else 23#else
@@ -40,7 +35,7 @@ bool arch_vcpu_is_preempted(int cpu);
40 * (the type definitions are in asm/spinlock_types.h) 35 * (the type definitions are in asm/spinlock_types.h)
41 */ 36 */
42 37
43void arch_lock_relax(unsigned int cpu); 38void arch_lock_relax(int cpu);
44 39
45void arch_spin_lock_wait(arch_spinlock_t *); 40void arch_spin_lock_wait(arch_spinlock_t *);
46int arch_spin_trylock_retry(arch_spinlock_t *); 41int arch_spin_trylock_retry(arch_spinlock_t *);
@@ -70,7 +65,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
70{ 65{
71 barrier(); 66 barrier();
72 return likely(arch_spin_value_unlocked(*lp) && 67 return likely(arch_spin_value_unlocked(*lp) &&
73 _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); 68 __atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
74} 69}
75 70
76static inline void arch_spin_lock(arch_spinlock_t *lp) 71static inline void arch_spin_lock(arch_spinlock_t *lp)
@@ -95,7 +90,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
95 90
96static inline void arch_spin_unlock(arch_spinlock_t *lp) 91static inline void arch_spin_unlock(arch_spinlock_t *lp)
97{ 92{
98 typecheck(unsigned int, lp->lock); 93 typecheck(int, lp->lock);
99 asm volatile( 94 asm volatile(
100 "st %1,%0\n" 95 "st %1,%0\n"
101 : "+Q" (lp->lock) 96 : "+Q" (lp->lock)
@@ -141,16 +136,16 @@ extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
141 136
142static inline int arch_read_trylock_once(arch_rwlock_t *rw) 137static inline int arch_read_trylock_once(arch_rwlock_t *rw)
143{ 138{
144 unsigned int old = ACCESS_ONCE(rw->lock); 139 int old = ACCESS_ONCE(rw->lock);
145 return likely((int) old >= 0 && 140 return likely(old >= 0 &&
146 _raw_compare_and_swap(&rw->lock, old, old + 1)); 141 __atomic_cmpxchg_bool(&rw->lock, old, old + 1));
147} 142}
148 143
149static inline int arch_write_trylock_once(arch_rwlock_t *rw) 144static inline int arch_write_trylock_once(arch_rwlock_t *rw)
150{ 145{
151 unsigned int old = ACCESS_ONCE(rw->lock); 146 int old = ACCESS_ONCE(rw->lock);
152 return likely(old == 0 && 147 return likely(old == 0 &&
153 _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); 148 __atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000));
154} 149}
155 150
156#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 151#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -161,9 +156,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
161 156
162#define __RAW_LOCK(ptr, op_val, op_string) \ 157#define __RAW_LOCK(ptr, op_val, op_string) \
163({ \ 158({ \
164 unsigned int old_val; \ 159 int old_val; \
165 \ 160 \
166 typecheck(unsigned int *, ptr); \ 161 typecheck(int *, ptr); \
167 asm volatile( \ 162 asm volatile( \
168 op_string " %0,%2,%1\n" \ 163 op_string " %0,%2,%1\n" \
169 "bcr 14,0\n" \ 164 "bcr 14,0\n" \
@@ -175,9 +170,9 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
175 170
176#define __RAW_UNLOCK(ptr, op_val, op_string) \ 171#define __RAW_UNLOCK(ptr, op_val, op_string) \
177({ \ 172({ \
178 unsigned int old_val; \ 173 int old_val; \
179 \ 174 \
180 typecheck(unsigned int *, ptr); \ 175 typecheck(int *, ptr); \
181 asm volatile( \ 176 asm volatile( \
182 op_string " %0,%2,%1\n" \ 177 op_string " %0,%2,%1\n" \
183 : "=d" (old_val), "+Q" (*ptr) \ 178 : "=d" (old_val), "+Q" (*ptr) \
@@ -187,14 +182,14 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
187}) 182})
188 183
189extern void _raw_read_lock_wait(arch_rwlock_t *lp); 184extern void _raw_read_lock_wait(arch_rwlock_t *lp);
190extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev); 185extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev);
191 186
192static inline void arch_read_lock(arch_rwlock_t *rw) 187static inline void arch_read_lock(arch_rwlock_t *rw)
193{ 188{
194 unsigned int old; 189 int old;
195 190
196 old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); 191 old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD);
197 if ((int) old < 0) 192 if (old < 0)
198 _raw_read_lock_wait(rw); 193 _raw_read_lock_wait(rw);
199} 194}
200 195
@@ -205,7 +200,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
205 200
206static inline void arch_write_lock(arch_rwlock_t *rw) 201static inline void arch_write_lock(arch_rwlock_t *rw)
207{ 202{
208 unsigned int old; 203 int old;
209 204
210 old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); 205 old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
211 if (old != 0) 206 if (old != 0)
@@ -232,11 +227,11 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
232 227
233static inline void arch_read_unlock(arch_rwlock_t *rw) 228static inline void arch_read_unlock(arch_rwlock_t *rw)
234{ 229{
235 unsigned int old; 230 int old;
236 231
237 do { 232 do {
238 old = ACCESS_ONCE(rw->lock); 233 old = ACCESS_ONCE(rw->lock);
239 } while (!_raw_compare_and_swap(&rw->lock, old, old - 1)); 234 } while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1));
240} 235}
241 236
242static inline void arch_write_lock(arch_rwlock_t *rw) 237static inline void arch_write_lock(arch_rwlock_t *rw)
@@ -248,7 +243,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
248 243
249static inline void arch_write_unlock(arch_rwlock_t *rw) 244static inline void arch_write_unlock(arch_rwlock_t *rw)
250{ 245{
251 typecheck(unsigned int, rw->lock); 246 typecheck(int, rw->lock);
252 247
253 rw->owner = 0; 248 rw->owner = 0;
254 asm volatile( 249 asm volatile(
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index d84b6939237c..fe755eec275f 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -6,14 +6,14 @@
6#endif 6#endif
7 7
8typedef struct { 8typedef struct {
9 unsigned int lock; 9 int lock;
10} __attribute__ ((aligned (4))) arch_spinlock_t; 10} __attribute__ ((aligned (4))) arch_spinlock_t;
11 11
12#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } 12#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
13 13
14typedef struct { 14typedef struct {
15 unsigned int lock; 15 int lock;
16 unsigned int owner; 16 int owner;
17} arch_rwlock_t; 17} arch_rwlock_t;
18 18
19#define __ARCH_RW_LOCK_UNLOCKED { 0 } 19#define __ARCH_RW_LOCK_UNLOCKED { 0 }
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 12d45f0cfdd9..f6c2b5814ab0 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -10,6 +10,7 @@
10#include <linux/thread_info.h> 10#include <linux/thread_info.h>
11#include <asm/fpu/api.h> 11#include <asm/fpu/api.h>
12#include <asm/ptrace.h> 12#include <asm/ptrace.h>
13#include <asm/guarded_storage.h>
13 14
14extern struct task_struct *__switch_to(void *, void *); 15extern struct task_struct *__switch_to(void *, void *);
15extern void update_cr_regs(struct task_struct *task); 16extern void update_cr_regs(struct task_struct *task);
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs)
33 save_fpu_regs(); \ 34 save_fpu_regs(); \
34 save_access_regs(&prev->thread.acrs[0]); \ 35 save_access_regs(&prev->thread.acrs[0]); \
35 save_ri_cb(prev->thread.ri_cb); \ 36 save_ri_cb(prev->thread.ri_cb); \
37 save_gs_cb(prev->thread.gs_cb); \
36 } \ 38 } \
37 if (next->mm) { \ 39 if (next->mm) { \
38 update_cr_regs(next); \ 40 update_cr_regs(next); \
39 set_cpu_flag(CIF_FPU); \ 41 set_cpu_flag(CIF_FPU); \
40 restore_access_regs(&next->thread.acrs[0]); \ 42 restore_access_regs(&next->thread.acrs[0]); \
41 restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ 43 restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
44 restore_gs_cb(next->thread.gs_cb); \
42 } \ 45 } \
43 prev = __switch_to(prev,next); \ 46 prev = __switch_to(prev,next); \
44} while (0) 47} while (0)
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 229326c942c7..73bff45ced55 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -142,7 +142,15 @@ struct sysinfo_3_2_2 {
142 142
143extern int topology_max_mnest; 143extern int topology_max_mnest;
144 144
145#define TOPOLOGY_CORE_BITS 64 145/*
146 * Returns the maximum nesting level supported by the cpu topology code.
147 * The current maximum level is 4 which is the drawer level.
148 */
149static inline int topology_mnest_limit(void)
150{
151 return min(topology_max_mnest, 4);
152}
153
146#define TOPOLOGY_NR_MAG 6 154#define TOPOLOGY_NR_MAG 6
147 155
148struct topology_core { 156struct topology_core {
@@ -152,7 +160,7 @@ struct topology_core {
152 unsigned char pp:2; 160 unsigned char pp:2;
153 unsigned char reserved1; 161 unsigned char reserved1;
154 unsigned short origin; 162 unsigned short origin;
155 unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG]; 163 unsigned long mask;
156}; 164};
157 165
158struct topology_container { 166struct topology_container {
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a5b54a445eb8..f36e6e2b73f0 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
54#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ 54#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
55#define TIF_SIGPENDING 1 /* signal pending */ 55#define TIF_SIGPENDING 1 /* signal pending */
56#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ 56#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
57#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ 57#define TIF_UPROBE 3 /* breakpointed or single-stepping */
58#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ 58#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
59#define TIF_SECCOMP 5 /* secure computing */ 59#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
60#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ 60#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
61#define TIF_UPROBE 7 /* breakpointed or single-stepping */ 61#define TIF_SECCOMP 10 /* secure computing */
62#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
62#define TIF_31BIT 16 /* 32bit process */ 63#define TIF_31BIT 16 /* 32bit process */
63#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ 64#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
64#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ 65#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
76#define _TIF_UPROBE _BITUL(TIF_UPROBE) 77#define _TIF_UPROBE _BITUL(TIF_UPROBE)
77#define _TIF_31BIT _BITUL(TIF_31BIT) 78#define _TIF_31BIT _BITUL(TIF_31BIT)
78#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) 79#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
80#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
79 81
80#endif /* _ASM_THREAD_INFO_H */ 82#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 6848ba5c1454..addb09cee0f5 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,6 +1,16 @@
1# UAPI Header export list 1# UAPI Header export list
2include include/uapi/asm-generic/Kbuild.asm 2include include/uapi/asm-generic/Kbuild.asm
3 3
4generic-y += errno.h
5generic-y += fcntl.h
6generic-y += ioctl.h
7generic-y += mman.h
8generic-y += param.h
9generic-y += poll.h
10generic-y += resource.h
11generic-y += sockios.h
12generic-y += termbits.h
13
4header-y += auxvec.h 14header-y += auxvec.h
5header-y += bitsperlong.h 15header-y += bitsperlong.h
6header-y += byteorder.h 16header-y += byteorder.h
@@ -11,25 +21,20 @@ header-y += cmb.h
11header-y += dasd.h 21header-y += dasd.h
12header-y += debug.h 22header-y += debug.h
13header-y += errno.h 23header-y += errno.h
14header-y += fcntl.h 24header-y += guarded_storage.h
15header-y += hypfs.h 25header-y += hypfs.h
16header-y += ioctl.h
17header-y += ioctls.h 26header-y += ioctls.h
18header-y += ipcbuf.h 27header-y += ipcbuf.h
19header-y += kvm.h 28header-y += kvm.h
20header-y += kvm_para.h 29header-y += kvm_para.h
21header-y += kvm_perf.h 30header-y += kvm_perf.h
22header-y += kvm_virtio.h 31header-y += kvm_virtio.h
23header-y += mman.h
24header-y += monwriter.h 32header-y += monwriter.h
25header-y += msgbuf.h 33header-y += msgbuf.h
26header-y += param.h
27header-y += pkey.h 34header-y += pkey.h
28header-y += poll.h
29header-y += posix_types.h 35header-y += posix_types.h
30header-y += ptrace.h 36header-y += ptrace.h
31header-y += qeth.h 37header-y += qeth.h
32header-y += resource.h
33header-y += schid.h 38header-y += schid.h
34header-y += sclp_ctl.h 39header-y += sclp_ctl.h
35header-y += sembuf.h 40header-y += sembuf.h
@@ -40,12 +45,10 @@ header-y += sigcontext.h
40header-y += siginfo.h 45header-y += siginfo.h
41header-y += signal.h 46header-y += signal.h
42header-y += socket.h 47header-y += socket.h
43header-y += sockios.h
44header-y += stat.h 48header-y += stat.h
45header-y += statfs.h 49header-y += statfs.h
46header-y += swab.h 50header-y += swab.h
47header-y += tape390.h 51header-y += tape390.h
48header-y += termbits.h
49header-y += termios.h 52header-y += termios.h
50header-y += types.h 53header-y += types.h
51header-y += ucontext.h 54header-y += ucontext.h
diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h
deleted file mode 100644
index 395e97d8005e..000000000000
--- a/arch/s390/include/uapi/asm/errno.h
+++ /dev/null
@@ -1,11 +0,0 @@
1/*
2 * S390 version
3 *
4 */
5
6#ifndef _S390_ERRNO_H
7#define _S390_ERRNO_H
8
9#include <asm-generic/errno.h>
10
11#endif
diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/s390/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/fcntl.h>
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 000000000000..852850e8e17e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,77 @@
1#ifndef _GUARDED_STORAGE_H
2#define _GUARDED_STORAGE_H
3
4#include <linux/types.h>
5
6struct gs_cb {
7 __u64 reserved;
8 __u64 gsd;
9 __u64 gssm;
10 __u64 gs_epl_a;
11};
12
13struct gs_epl {
14 __u8 pad1;
15 union {
16 __u8 gs_eam;
17 struct {
18 __u8 : 6;
19 __u8 e : 1;
20 __u8 b : 1;
21 };
22 };
23 union {
24 __u8 gs_eci;
25 struct {
26 __u8 tx : 1;
27 __u8 cx : 1;
28 __u8 : 5;
29 __u8 in : 1;
30 };
31 };
32 union {
33 __u8 gs_eai;
34 struct {
35 __u8 : 1;
36 __u8 t : 1;
37 __u8 as : 2;
38 __u8 ar : 4;
39 };
40 };
41 __u32 pad2;
42 __u64 gs_eha;
43 __u64 gs_eia;
44 __u64 gs_eoa;
45 __u64 gs_eir;
46 __u64 gs_era;
47};
48
49#define GS_ENABLE 0
50#define GS_DISABLE 1
51#define GS_SET_BC_CB 2
52#define GS_CLEAR_BC_CB 3
53#define GS_BROADCAST 4
54
55static inline void load_gs_cb(struct gs_cb *gs_cb)
56{
57 asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
58}
59
60static inline void store_gs_cb(struct gs_cb *gs_cb)
61{
62 asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
63}
64
65static inline void save_gs_cb(struct gs_cb *gs_cb)
66{
67 if (gs_cb)
68 store_gs_cb(gs_cb);
69}
70
71static inline void restore_gs_cb(struct gs_cb *gs_cb)
72{
73 if (gs_cb)
74 load_gs_cb(gs_cb);
75}
76
77#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/s390/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ioctl.h>
diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h
deleted file mode 100644
index de23da1f41b2..000000000000
--- a/arch/s390/include/uapi/asm/mman.h
+++ /dev/null
@@ -1,6 +0,0 @@
1/*
2 * S390 version
3 *
4 * Derived from "include/asm-i386/mman.h"
5 */
6#include <asm-generic/mman.h>
diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h
deleted file mode 100644
index c616821bf2ac..000000000000
--- a/arch/s390/include/uapi/asm/param.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASMS390_PARAM_H
2#define _ASMS390_PARAM_H
3
4#include <asm-generic/param.h>
5
6#endif /* _ASMS390_PARAM_H */
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index ed7f19c27ce5..e6c04faf8a6c 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -109,4 +109,23 @@ struct pkey_skey2pkey {
109}; 109};
110#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey) 110#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
111 111
112/*
113 * Verify the given secure key for being able to be useable with
114 * the pkey module. Check for correct key type and check for having at
115 * least one crypto card being able to handle this key (master key
116 * or old master key verification pattern matches).
117 * Return some info about the key: keysize in bits, keytype (currently
118 * only AES), flag if key is wrapped with an old MKVP.
119 */
120struct pkey_verifykey {
121 struct pkey_seckey seckey; /* in: the secure key blob */
122 __u16 cardnr; /* out: card number */
123 __u16 domain; /* out: domain number */
124 __u16 keysize; /* out: key size in bits */
125 __u32 attributes; /* out: attribute bits */
126};
127#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey)
128#define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */
129#define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */
130
112#endif /* _UAPI_PKEY_H */ 131#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/s390/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/poll.h>
diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h
deleted file mode 100644
index ec23d1c73c92..000000000000
--- a/arch/s390/include/uapi/asm/resource.h
+++ /dev/null
@@ -1,13 +0,0 @@
1/*
2 * S390 version
3 *
4 * Derived from "include/asm-i386/resources.h"
5 */
6
7#ifndef _S390_RESOURCE_H
8#define _S390_RESOURCE_H
9
10#include <asm-generic/resource.h>
11
12#endif
13
diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h
deleted file mode 100644
index 6f60eee73242..000000000000
--- a/arch/s390/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_S390_SOCKIOS_H
2#define _ASM_S390_SOCKIOS_H
3
4#include <asm-generic/sockios.h>
5
6#endif
diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h
deleted file mode 100644
index 71bf6ac6a2b9..000000000000
--- a/arch/s390/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _ASM_S390_TERMBITS_H
2#define _ASM_S390_TERMBITS_H
3
4#include <asm-generic/termbits.h>
5
6#endif
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 152de9b796e1..ea42290e7d51 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,7 @@
313#define __NR_copy_file_range 375 313#define __NR_copy_file_range 375
314#define __NR_preadv2 376 314#define __NR_preadv2 376
315#define __NR_pwritev2 377 315#define __NR_pwritev2 377
316/* Number 378 is reserved for guarded storage */ 316#define __NR_s390_guarded_storage 378
317#define __NR_statx 379 317#define __NR_statx 379
318#define NR_syscalls 380 318#define NR_syscalls 380
319 319
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 060ce548fe8b..adb3fe2e3d42 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -51,14 +51,12 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
51# 51#
52CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' 52CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
53 53
54CFLAGS_sysinfo.o += -w
55
56obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o 54obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
57obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o 55obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
58obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o 56obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
59obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o 57obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
60obj-y += runtime_instr.o cache.o fpu.o dumpstack.o 58obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o
61obj-y += entry.o reipl.o relocate_kernel.o 59obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o
62 60
63extra-y += head.o head64.o vmlinux.lds 61extra-y += head.o head64.o vmlinux.lds
64 62
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c4b3570ded5b..6bb29633e1f1 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,7 +175,7 @@ int main(void)
175 /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ 175 /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
176 OFFSET(__LC_DUMP_REIPL, lowcore, ipib); 176 OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
177 /* hardware defined lowcore locations 0x1000 - 0x18ff */ 177 /* hardware defined lowcore locations 0x1000 - 0x18ff */
178 OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); 178 OFFSET(__LC_MCESAD, lowcore, mcesad);
179 OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); 179 OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
180 OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); 180 OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
181 OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); 181 OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index e89cc2e71db1..986642a3543b 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
178COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); 178COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
179COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); 179COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
180COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); 180COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
181COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
181COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); 182COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index dd1d5c62c374..d628afc26708 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -429,6 +429,20 @@ static void *nt_vmcoreinfo(void *ptr)
429} 429}
430 430
431/* 431/*
432 * Initialize final note (needed for /proc/vmcore code)
433 */
434static void *nt_final(void *ptr)
435{
436 Elf64_Nhdr *note;
437
438 note = (Elf64_Nhdr *) ptr;
439 note->n_namesz = 0;
440 note->n_descsz = 0;
441 note->n_type = 0;
442 return PTR_ADD(ptr, sizeof(Elf64_Nhdr));
443}
444
445/*
432 * Initialize ELF header (new kernel) 446 * Initialize ELF header (new kernel)
433 */ 447 */
434static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) 448static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
@@ -515,6 +529,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
515 if (sa->prefix != 0) 529 if (sa->prefix != 0)
516 ptr = fill_cpu_elf_notes(ptr, cpu++, sa); 530 ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
517 ptr = nt_vmcoreinfo(ptr); 531 ptr = nt_vmcoreinfo(ptr);
532 ptr = nt_final(ptr);
518 memset(phdr, 0, sizeof(*phdr)); 533 memset(phdr, 0, sizeof(*phdr));
519 phdr->p_type = PT_NOTE; 534 phdr->p_type = PT_NOTE;
520 phdr->p_offset = notes_offset; 535 phdr->p_offset = notes_offset;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4e65c79cc5f2..5d20182ee8ae 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -231,9 +231,29 @@ static noinline __init void detect_machine_type(void)
231 S390_lowcore.machine_flags |= MACHINE_FLAG_VM; 231 S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
232} 232}
233 233
234/* Remove leading, trailing and double whitespace. */
235static inline void strim_all(char *str)
236{
237 char *s;
238
239 s = strim(str);
240 if (s != str)
241 memmove(str, s, strlen(s));
242 while (*str) {
243 if (!isspace(*str++))
244 continue;
245 if (isspace(*str)) {
246 s = skip_spaces(str);
247 memmove(str, s, strlen(s) + 1);
248 }
249 }
250}
251
234static noinline __init void setup_arch_string(void) 252static noinline __init void setup_arch_string(void)
235{ 253{
236 struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; 254 struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
255 struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page;
256 char mstr[80], hvstr[17];
237 257
238 if (stsi(mach, 1, 1, 1)) 258 if (stsi(mach, 1, 1, 1))
239 return; 259 return;
@@ -241,14 +261,21 @@ static noinline __init void setup_arch_string(void)
241 EBCASC(mach->type, sizeof(mach->type)); 261 EBCASC(mach->type, sizeof(mach->type));
242 EBCASC(mach->model, sizeof(mach->model)); 262 EBCASC(mach->model, sizeof(mach->model));
243 EBCASC(mach->model_capacity, sizeof(mach->model_capacity)); 263 EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
244 dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)", 264 sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s",
245 mach->manufacturer, 265 mach->manufacturer, mach->type,
246 mach->type, 266 mach->model, mach->model_capacity);
247 mach->model, 267 strim_all(mstr);
248 mach->model_capacity, 268 if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
249 MACHINE_IS_LPAR ? "LPAR" : 269 EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
250 MACHINE_IS_VM ? "z/VM" : 270 sprintf(hvstr, "%-16.16s", vm->vm[0].cpi);
251 MACHINE_IS_KVM ? "KVM" : "unknown"); 271 strim_all(hvstr);
272 } else {
273 sprintf(hvstr, "%s",
274 MACHINE_IS_LPAR ? "LPAR" :
275 MACHINE_IS_VM ? "z/VM" :
276 MACHINE_IS_KVM ? "KVM" : "unknown");
277 }
278 dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
252} 279}
253 280
254static __init void setup_topology(void) 281static __init void setup_topology(void)
@@ -358,6 +385,8 @@ static __init void detect_machine_facilities(void)
358 S390_lowcore.machine_flags |= MACHINE_FLAG_NX; 385 S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
359 __ctl_set_bit(0, 20); 386 __ctl_set_bit(0, 20);
360 } 387 }
388 if (test_facility(133))
389 S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
361} 390}
362 391
363static inline void save_vector_registers(void) 392static inline void save_vector_registers(void)
@@ -375,7 +404,7 @@ static int __init topology_setup(char *str)
375 404
376 rc = kstrtobool(str, &enabled); 405 rc = kstrtobool(str, &enabled);
377 if (!rc && !enabled) 406 if (!rc && !enabled)
378 S390_lowcore.machine_flags &= ~MACHINE_HAS_TOPOLOGY; 407 S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
379 return rc; 408 return rc;
380} 409}
381early_param("topology", topology_setup); 410early_param("topology", topology_setup);
@@ -405,23 +434,16 @@ early_param("noexec", noexec_setup);
405 434
406static int __init cad_setup(char *str) 435static int __init cad_setup(char *str)
407{ 436{
408 int val; 437 bool enabled;
409 438 int rc;
410 get_option(&str, &val);
411 if (val && test_facility(128))
412 S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
413 return 0;
414}
415early_param("cad", cad_setup);
416 439
417static int __init cad_init(void) 440 rc = kstrtobool(str, &enabled);
418{ 441 if (!rc && enabled && test_facility(128))
419 if (MACHINE_HAS_CAD)
420 /* Enable problem state CAD. */ 442 /* Enable problem state CAD. */
421 __ctl_set_bit(2, 3); 443 __ctl_set_bit(2, 3);
422 return 0; 444 return rc;
423} 445}
424early_initcall(cad_init); 446early_param("cad", cad_setup);
425 447
426static __init void memmove_early(void *dst, const void *src, size_t n) 448static __init void memmove_early(void *dst, const void *src, size_t n)
427{ 449{
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6a7d737d514c..c6cf338c9327 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT
47STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE 47STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
48 48
49_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ 49_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
50 _TIF_UPROBE) 50 _TIF_UPROBE | _TIF_GUARDED_STORAGE)
51_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ 51_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
52 _TIF_SYSCALL_TRACEPOINT) 52 _TIF_SYSCALL_TRACEPOINT)
53_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ 53_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -189,8 +189,6 @@ ENTRY(__switch_to)
189 stg %r3,__LC_CURRENT # store task struct of next 189 stg %r3,__LC_CURRENT # store task struct of next
190 stg %r15,__LC_KERNEL_STACK # store end of kernel stack 190 stg %r15,__LC_KERNEL_STACK # store end of kernel stack
191 lg %r15,__THREAD_ksp(%r1) # load kernel stack of next 191 lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
192 /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
193 lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
194 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next 192 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
195 lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task 193 lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
196 TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP 194 TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
@@ -332,6 +330,8 @@ ENTRY(system_call)
332 TSTMSK __TI_flags(%r12),_TIF_UPROBE 330 TSTMSK __TI_flags(%r12),_TIF_UPROBE
333 jo .Lsysc_uprobe_notify 331 jo .Lsysc_uprobe_notify
334#endif 332#endif
333 TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
334 jo .Lsysc_guarded_storage
335 TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP 335 TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
336 jo .Lsysc_singlestep 336 jo .Lsysc_singlestep
337 TSTMSK __TI_flags(%r12),_TIF_SIGPENDING 337 TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
@@ -409,6 +409,14 @@ ENTRY(system_call)
409#endif 409#endif
410 410
411# 411#
412# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
413#
414.Lsysc_guarded_storage:
415 lgr %r2,%r11 # pass pointer to pt_regs
416 larl %r14,.Lsysc_return
417 jg gs_load_bc_cb
418
419#
412# _PIF_PER_TRAP is set, call do_per_trap 420# _PIF_PER_TRAP is set, call do_per_trap
413# 421#
414.Lsysc_singlestep: 422.Lsysc_singlestep:
@@ -663,6 +671,8 @@ ENTRY(io_int_handler)
663 jo .Lio_sigpending 671 jo .Lio_sigpending
664 TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME 672 TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
665 jo .Lio_notify_resume 673 jo .Lio_notify_resume
674 TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
675 jo .Lio_guarded_storage
666 TSTMSK __LC_CPU_FLAGS,_CIF_FPU 676 TSTMSK __LC_CPU_FLAGS,_CIF_FPU
667 jo .Lio_vxrs 677 jo .Lio_vxrs
668 TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) 678 TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
@@ -697,6 +707,18 @@ ENTRY(io_int_handler)
697 jg load_fpu_regs 707 jg load_fpu_regs
698 708
699# 709#
710# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
711#
712.Lio_guarded_storage:
713 # TRACE_IRQS_ON already done at .Lio_return
714 ssm __LC_SVC_NEW_PSW # reenable interrupts
715 lgr %r2,%r11 # pass pointer to pt_regs
716 brasl %r14,gs_load_bc_cb
717 ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
718 TRACE_IRQS_OFF
719 j .Lio_return
720
721#
700# _TIF_NEED_RESCHED is set, call schedule 722# _TIF_NEED_RESCHED is set, call schedule
701# 723#
702.Lio_reschedule: 724.Lio_reschedule:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 33f901865326..dbf5f7e18246 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,12 +74,14 @@ long sys_sigreturn(void);
74 74
75long sys_s390_personality(unsigned int personality); 75long sys_s390_personality(unsigned int personality);
76long sys_s390_runtime_instr(int command, int signum); 76long sys_s390_runtime_instr(int command, int signum);
77long sys_s390_guarded_storage(int command, struct gs_cb __user *);
77long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); 78long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
78long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); 79long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
79 80
80DECLARE_PER_CPU(u64, mt_cycles[8]); 81DECLARE_PER_CPU(u64, mt_cycles[8]);
81 82
82void verify_facilities(void); 83void verify_facilities(void);
84void gs_load_bc_cb(struct pt_regs *regs);
83void set_fs_fixup(void); 85void set_fs_fixup(void);
84 86
85#endif /* _ENTRY_H */ 87#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 000000000000..6f064745c3b1
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
1/*
2 * Copyright IBM Corp. 2016
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4 */
5
6#include <linux/kernel.h>
7#include <linux/syscalls.h>
8#include <linux/signal.h>
9#include <linux/mm.h>
10#include <linux/slab.h>
11#include <asm/guarded_storage.h>
12#include "entry.h"
13
14void exit_thread_gs(void)
15{
16 kfree(current->thread.gs_cb);
17 kfree(current->thread.gs_bc_cb);
18 current->thread.gs_cb = current->thread.gs_bc_cb = NULL;
19}
20
21static int gs_enable(void)
22{
23 struct gs_cb *gs_cb;
24
25 if (!current->thread.gs_cb) {
26 gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
27 if (!gs_cb)
28 return -ENOMEM;
29 gs_cb->gsd = 25;
30 preempt_disable();
31 __ctl_set_bit(2, 4);
32 load_gs_cb(gs_cb);
33 current->thread.gs_cb = gs_cb;
34 preempt_enable();
35 }
36 return 0;
37}
38
39static int gs_disable(void)
40{
41 if (current->thread.gs_cb) {
42 preempt_disable();
43 kfree(current->thread.gs_cb);
44 current->thread.gs_cb = NULL;
45 __ctl_clear_bit(2, 4);
46 preempt_enable();
47 }
48 return 0;
49}
50
51static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
52{
53 struct gs_cb *gs_cb;
54
55 gs_cb = current->thread.gs_bc_cb;
56 if (!gs_cb) {
57 gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
58 if (!gs_cb)
59 return -ENOMEM;
60 current->thread.gs_bc_cb = gs_cb;
61 }
62 if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
63 return -EFAULT;
64 return 0;
65}
66
67static int gs_clear_bc_cb(void)
68{
69 struct gs_cb *gs_cb;
70
71 gs_cb = current->thread.gs_bc_cb;
72 current->thread.gs_bc_cb = NULL;
73 kfree(gs_cb);
74 return 0;
75}
76
77void gs_load_bc_cb(struct pt_regs *regs)
78{
79 struct gs_cb *gs_cb;
80
81 preempt_disable();
82 clear_thread_flag(TIF_GUARDED_STORAGE);
83 gs_cb = current->thread.gs_bc_cb;
84 if (gs_cb) {
85 kfree(current->thread.gs_cb);
86 current->thread.gs_bc_cb = NULL;
87 __ctl_set_bit(2, 4);
88 load_gs_cb(gs_cb);
89 current->thread.gs_cb = gs_cb;
90 }
91 preempt_enable();
92}
93
94static int gs_broadcast(void)
95{
96 struct task_struct *sibling;
97
98 read_lock(&tasklist_lock);
99 for_each_thread(current, sibling) {
100 if (!sibling->thread.gs_bc_cb)
101 continue;
102 if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
103 kick_process(sibling);
104 }
105 read_unlock(&tasklist_lock);
106 return 0;
107}
108
109SYSCALL_DEFINE2(s390_guarded_storage, int, command,
110 struct gs_cb __user *, gs_cb)
111{
112 if (!MACHINE_HAS_GS)
113 return -EOPNOTSUPP;
114 switch (command) {
115 case GS_ENABLE:
116 return gs_enable();
117 case GS_DISABLE:
118 return gs_disable();
119 case GS_SET_BC_CB:
120 return gs_set_bc_cb(gs_cb);
121 case GS_CLEAR_BC_CB:
122 return gs_clear_bc_cb();
123 case GS_BROADCAST:
124 return gs_broadcast();
125 default:
126 return -EINVAL;
127 }
128}
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 0b5ebf8a3d30..eff5b31671d4 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -25,7 +25,6 @@
25#include <linux/linkage.h> 25#include <linux/linkage.h>
26#include <asm/asm-offsets.h> 26#include <asm/asm-offsets.h>
27#include <asm/thread_info.h> 27#include <asm/thread_info.h>
28#include <asm/facility.h>
29#include <asm/page.h> 28#include <asm/page.h>
30#include <asm/ptrace.h> 29#include <asm/ptrace.h>
31 30
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 482d3526e32b..31c91f24e562 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -52,7 +52,7 @@ ENTRY(startup_continue)
52 .quad 0 # cr1: primary space segment table 52 .quad 0 # cr1: primary space segment table
53 .quad .Lduct # cr2: dispatchable unit control table 53 .quad .Lduct # cr2: dispatchable unit control table
54 .quad 0 # cr3: instruction authorization 54 .quad 0 # cr3: instruction authorization
55 .quad 0 # cr4: instruction authorization 55 .quad 0xffff # cr4: instruction authorization
56 .quad .Lduct # cr5: primary-aste origin 56 .quad .Lduct # cr5: primary-aste origin
57 .quad 0 # cr6: I/O interrupts 57 .quad 0 # cr6: I/O interrupts
58 .quad 0 # cr7: secondary space segment table 58 .quad 0 # cr7: secondary space segment table
diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c
new file mode 100644
index 000000000000..ee85e17dd79d
--- /dev/null
+++ b/arch/s390/kernel/kdebugfs.c
@@ -0,0 +1,15 @@
1#include <linux/debugfs.h>
2#include <linux/export.h>
3#include <linux/init.h>
4
5struct dentry *arch_debugfs_dir;
6EXPORT_SYMBOL(arch_debugfs_dir);
7
8static int __init arch_kdebugfs_init(void)
9{
10 arch_debugfs_dir = debugfs_create_dir("s390", NULL);
11 if (IS_ERR(arch_debugfs_dir))
12 arch_debugfs_dir = NULL;
13 return 0;
14}
15postcore_initcall(arch_kdebugfs_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3074c1d83829..db5658daf994 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/os_info.h> 28#include <asm/os_info.h>
29#include <asm/switch_to.h> 29#include <asm/switch_to.h>
30#include <asm/nmi.h>
30 31
31typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); 32typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
32 33
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image)
102 */ 103 */
103static noinline void __machine_kdump(void *image) 104static noinline void __machine_kdump(void *image)
104{ 105{
106 struct mcesa *mcesa;
107 unsigned long cr2_old, cr2_new;
105 int this_cpu, cpu; 108 int this_cpu, cpu;
106 109
107 lgr_info_log(); 110 lgr_info_log();
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image)
114 continue; 117 continue;
115 } 118 }
116 /* Store status of the boot CPU */ 119 /* Store status of the boot CPU */
120 mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
117 if (MACHINE_HAS_VX) 121 if (MACHINE_HAS_VX)
118 save_vx_regs((void *) &S390_lowcore.vector_save_area); 122 save_vx_regs((__vector128 *) mcesa->vector_save_area);
123 if (MACHINE_HAS_GS) {
124 __ctl_store(cr2_old, 2, 2);
125 cr2_new = cr2_old | (1UL << 4);
126 __ctl_load(cr2_new, 2, 2);
127 save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
128 __ctl_load(cr2_old, 2, 2);
129 }
119 /* 130 /*
120 * To create a good backchain for this CPU in the dump store_status 131 * To create a good backchain for this CPU in the dump store_status
121 * is passed the address of a function. The address is saved into 132 * is passed the address of a function. The address is saved into
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 9bf8327154ee..985589523970 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
106 int kill_task; 106 int kill_task;
107 u64 zero; 107 u64 zero;
108 void *fpt_save_area; 108 void *fpt_save_area;
109 struct mcesa *mcesa;
109 110
110 kill_task = 0; 111 kill_task = 0;
111 zero = 0; 112 zero = 0;
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
165 : : "Q" (S390_lowcore.fpt_creg_save_area)); 166 : : "Q" (S390_lowcore.fpt_creg_save_area));
166 } 167 }
167 168
169 mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
168 if (!MACHINE_HAS_VX) { 170 if (!MACHINE_HAS_VX) {
169 /* Validate floating point registers */ 171 /* Validate floating point registers */
170 asm volatile( 172 asm volatile(
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode)
209 " la 1,%0\n" 211 " la 1,%0\n"
210 " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ 212 " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
211 " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ 213 " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
212 : : "Q" (*(struct vx_array *) 214 : : "Q" (*(struct vx_array *) mcesa->vector_save_area)
213 &S390_lowcore.vector_save_area) : "1"); 215 : "1");
214 __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); 216 __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
215 } 217 }
216 /* Validate access registers */ 218 /* Validate access registers */
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
224 */ 226 */
225 kill_task = 1; 227 kill_task = 1;
226 } 228 }
229 /* Validate guarded storage registers */
230 if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) {
231 if (!mci.gs)
232 /*
233 * Guarded storage register can't be restored and
234 * the current processes uses guarded storage.
235 * It has to be terminated.
236 */
237 kill_task = 1;
238 else
239 load_gs_cb((struct gs_cb *)
240 mcesa->guarded_storage_save_area);
241 }
227 /* 242 /*
228 * We don't even try to validate the TOD register, since we simply 243 * We don't even try to validate the TOD register, since we simply
229 * can't write something sensible into that register. 244 * can't write something sensible into that register.
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1aba10e90906..746d03423333 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Performance event support for s390x - CPU-measurement Counter Facility 2 * Performance event support for s390x - CPU-measurement Counter Facility
3 * 3 *
4 * Copyright IBM Corp. 2012 4 * Copyright IBM Corp. 2012, 2017
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
@@ -22,19 +22,12 @@
22#include <asm/irq.h> 22#include <asm/irq.h>
23#include <asm/cpu_mf.h> 23#include <asm/cpu_mf.h>
24 24
25/* CPU-measurement counter facility supports these CPU counter sets:
26 * For CPU counter sets:
27 * Basic counter set: 0-31
28 * Problem-state counter set: 32-63
29 * Crypto-activity counter set: 64-127
30 * Extented counter set: 128-159
31 */
32enum cpumf_ctr_set { 25enum cpumf_ctr_set {
33 /* CPU counter sets */ 26 CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
34 CPUMF_CTR_SET_BASIC = 0, 27 CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
35 CPUMF_CTR_SET_USER = 1, 28 CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
36 CPUMF_CTR_SET_CRYPTO = 2, 29 CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
37 CPUMF_CTR_SET_EXT = 3, 30 CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
38 31
39 /* Maximum number of counter sets */ 32 /* Maximum number of counter sets */
40 CPUMF_CTR_SET_MAX, 33 CPUMF_CTR_SET_MAX,
@@ -47,6 +40,7 @@ static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
47 [CPUMF_CTR_SET_USER] = 0x04, 40 [CPUMF_CTR_SET_USER] = 0x04,
48 [CPUMF_CTR_SET_CRYPTO] = 0x08, 41 [CPUMF_CTR_SET_CRYPTO] = 0x08,
49 [CPUMF_CTR_SET_EXT] = 0x01, 42 [CPUMF_CTR_SET_EXT] = 0x01,
43 [CPUMF_CTR_SET_MT_DIAG] = 0x20,
50}; 44};
51 45
52static void ctr_set_enable(u64 *state, int ctr_set) 46static void ctr_set_enable(u64 *state, int ctr_set)
@@ -76,19 +70,20 @@ struct cpu_hw_events {
76}; 70};
77static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 71static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
78 .ctr_set = { 72 .ctr_set = {
79 [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), 73 [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
80 [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), 74 [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
81 [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), 75 [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
82 [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), 76 [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
77 [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
83 }, 78 },
84 .state = 0, 79 .state = 0,
85 .flags = 0, 80 .flags = 0,
86 .txn_flags = 0, 81 .txn_flags = 0,
87}; 82};
88 83
89static int get_counter_set(u64 event) 84static enum cpumf_ctr_set get_counter_set(u64 event)
90{ 85{
91 int set = -1; 86 int set = CPUMF_CTR_SET_MAX;
92 87
93 if (event < 32) 88 if (event < 32)
94 set = CPUMF_CTR_SET_BASIC; 89 set = CPUMF_CTR_SET_BASIC;
@@ -98,34 +93,17 @@ static int get_counter_set(u64 event)
98 set = CPUMF_CTR_SET_CRYPTO; 93 set = CPUMF_CTR_SET_CRYPTO;
99 else if (event < 256) 94 else if (event < 256)
100 set = CPUMF_CTR_SET_EXT; 95 set = CPUMF_CTR_SET_EXT;
96 else if (event >= 448 && event < 496)
97 set = CPUMF_CTR_SET_MT_DIAG;
101 98
102 return set; 99 return set;
103} 100}
104 101
105static int validate_event(const struct hw_perf_event *hwc)
106{
107 switch (hwc->config_base) {
108 case CPUMF_CTR_SET_BASIC:
109 case CPUMF_CTR_SET_USER:
110 case CPUMF_CTR_SET_CRYPTO:
111 case CPUMF_CTR_SET_EXT:
112 /* check for reserved counters */
113 if ((hwc->config >= 6 && hwc->config <= 31) ||
114 (hwc->config >= 38 && hwc->config <= 63) ||
115 (hwc->config >= 80 && hwc->config <= 127))
116 return -EOPNOTSUPP;
117 break;
118 default:
119 return -EINVAL;
120 }
121
122 return 0;
123}
124
125static int validate_ctr_version(const struct hw_perf_event *hwc) 102static int validate_ctr_version(const struct hw_perf_event *hwc)
126{ 103{
127 struct cpu_hw_events *cpuhw; 104 struct cpu_hw_events *cpuhw;
128 int err = 0; 105 int err = 0;
106 u16 mtdiag_ctl;
129 107
130 cpuhw = &get_cpu_var(cpu_hw_events); 108 cpuhw = &get_cpu_var(cpu_hw_events);
131 109
@@ -145,6 +123,27 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
145 (cpuhw->info.csvn > 2 && hwc->config > 255)) 123 (cpuhw->info.csvn > 2 && hwc->config > 255))
146 err = -EOPNOTSUPP; 124 err = -EOPNOTSUPP;
147 break; 125 break;
126 case CPUMF_CTR_SET_MT_DIAG:
127 if (cpuhw->info.csvn <= 3)
128 err = -EOPNOTSUPP;
129 /*
130 * MT-diagnostic counters are read-only. The counter set
131 * is automatically enabled and activated on all CPUs with
132 * multithreading (SMT). Deactivation of multithreading
133 * also disables the counter set. State changes are ignored
134 * by lcctl(). Because Linux controls SMT enablement through
135 * a kernel parameter only, the counter set is either disabled
136 * or enabled and active.
137 *
138 * Thus, the counters can only be used if SMT is on and the
139 * counter set is enabled and active.
140 */
141 mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG];
142 if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
143 (cpuhw->info.enable_ctl & mtdiag_ctl) &&
144 (cpuhw->info.act_ctl & mtdiag_ctl)))
145 err = -EOPNOTSUPP;
146 break;
148 } 147 }
149 148
150 put_cpu_var(cpu_hw_events); 149 put_cpu_var(cpu_hw_events);
@@ -250,6 +249,11 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
250 /* loss of counter data alert */ 249 /* loss of counter data alert */
251 if (alert & CPU_MF_INT_CF_LCDA) 250 if (alert & CPU_MF_INT_CF_LCDA)
252 pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); 251 pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
252
253 /* loss of MT counter data alert */
254 if (alert & CPU_MF_INT_CF_MTDA)
255 pr_warn("CPU[%i] MT counter data was lost\n",
256 smp_processor_id());
253} 257}
254 258
255#define PMC_INIT 0 259#define PMC_INIT 0
@@ -330,6 +334,7 @@ static int __hw_perf_event_init(struct perf_event *event)
330{ 334{
331 struct perf_event_attr *attr = &event->attr; 335 struct perf_event_attr *attr = &event->attr;
332 struct hw_perf_event *hwc = &event->hw; 336 struct hw_perf_event *hwc = &event->hw;
337 enum cpumf_ctr_set set;
333 int err; 338 int err;
334 u64 ev; 339 u64 ev;
335 340
@@ -370,25 +375,30 @@ static int __hw_perf_event_init(struct perf_event *event)
370 if (ev == -1) 375 if (ev == -1)
371 return -ENOENT; 376 return -ENOENT;
372 377
373 if (ev >= PERF_CPUM_CF_MAX_CTR) 378 if (ev > PERF_CPUM_CF_MAX_CTR)
374 return -EINVAL; 379 return -EINVAL;
375 380
376 /* Use the hardware perf event structure to store the counter number 381 /* Obtain the counter set to which the specified counter belongs */
377 * in 'config' member and the counter set to which the counter belongs 382 set = get_counter_set(ev);
378 * in the 'config_base'. The counter set (config_base) is then used 383 switch (set) {
379 * to enable/disable the counters. 384 case CPUMF_CTR_SET_BASIC:
380 */ 385 case CPUMF_CTR_SET_USER:
381 hwc->config = ev; 386 case CPUMF_CTR_SET_CRYPTO:
382 hwc->config_base = get_counter_set(ev); 387 case CPUMF_CTR_SET_EXT:
383 388 case CPUMF_CTR_SET_MT_DIAG:
384 /* Validate the counter that is assigned to this event. 389 /*
385 * Because the counter facility can use numerous counters at the 390 * Use the hardware perf event structure to store the
386 * same time without constraints, it is not necessary to explicitly 391 * counter number in the 'config' member and the counter
387 * validate event groups (event->group_leader != event). 392 * set number in the 'config_base'. The counter set number
388 */ 393 * is then later used to enable/disable the counter(s).
389 err = validate_event(hwc); 394 */
390 if (err) 395 hwc->config = ev;
391 return err; 396 hwc->config_base = set;
397 break;
398 case CPUMF_CTR_SET_MAX:
399 /* The counter could not be associated to a counter set */
400 return -EINVAL;
401 };
392 402
393 /* Initialize for using the CPU-measurement counter facility */ 403 /* Initialize for using the CPU-measurement counter facility */
394 if (!atomic_inc_not_zero(&num_events)) { 404 if (!atomic_inc_not_zero(&num_events)) {
@@ -452,7 +462,7 @@ static int hw_perf_event_reset(struct perf_event *event)
452 return err; 462 return err;
453} 463}
454 464
455static int hw_perf_event_update(struct perf_event *event) 465static void hw_perf_event_update(struct perf_event *event)
456{ 466{
457 u64 prev, new, delta; 467 u64 prev, new, delta;
458 int err; 468 int err;
@@ -461,14 +471,12 @@ static int hw_perf_event_update(struct perf_event *event)
461 prev = local64_read(&event->hw.prev_count); 471 prev = local64_read(&event->hw.prev_count);
462 err = ecctr(event->hw.config, &new); 472 err = ecctr(event->hw.config, &new);
463 if (err) 473 if (err)
464 goto out; 474 return;
465 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 475 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
466 476
467 delta = (prev <= new) ? new - prev 477 delta = (prev <= new) ? new - prev
468 : (-1ULL - prev) + new + 1; /* overflow */ 478 : (-1ULL - prev) + new + 1; /* overflow */
469 local64_add(delta, &event->count); 479 local64_add(delta, &event->count);
470out:
471 return err;
472} 480}
473 481
474static void cpumf_pmu_read(struct perf_event *event) 482static void cpumf_pmu_read(struct perf_event *event)
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index c343ac2cf6c5..d3133285b7d1 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -114,8 +114,64 @@ CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
114CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); 114CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
115CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); 115CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
116CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); 116CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
117CPUMF_EVENT_ATTR(cf_z13, L1D_WRITES_RO_EXCL, 0x0080);
118CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
119CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
120CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
121CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084);
122CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085);
123CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086);
124CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087);
125CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088);
126CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089);
127CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a);
128CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b);
129CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c);
130CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d);
131CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f);
132CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
133CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091);
134CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092);
135CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093);
136CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094);
137CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095);
138CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096);
139CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097);
140CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098);
141CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099);
142CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a);
143CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b);
144CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c);
145CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d);
146CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e);
147CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f);
148CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0);
149CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1);
150CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
151CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3);
152CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4);
153CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5);
154CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6);
155CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7);
156CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8);
157CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9);
158CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa);
159CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab);
160CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac);
161CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad);
162CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae);
163CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af);
164CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0);
165CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1);
166CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2);
167CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3);
168CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da);
169CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
170CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
171CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
172CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
117 173
118static struct attribute *cpumcf_pmu_event_attr[] = { 174static struct attribute *cpumcf_pmu_event_attr[] __initdata = {
119 CPUMF_EVENT_PTR(cf, CPU_CYCLES), 175 CPUMF_EVENT_PTR(cf, CPU_CYCLES),
120 CPUMF_EVENT_PTR(cf, INSTRUCTIONS), 176 CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
121 CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), 177 CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
@@ -236,28 +292,87 @@ static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
236 NULL, 292 NULL,
237}; 293};
238 294
295static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
296 CPUMF_EVENT_PTR(cf_z13, L1D_WRITES_RO_EXCL),
297 CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
298 CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
299 CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
300 CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES),
301 CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES),
302 CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES),
303 CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES),
304 CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES),
305 CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES),
306 CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES),
307 CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES),
308 CPUMF_EVENT_PTR(cf_z13, TX_C_TEND),
309 CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND),
310 CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES),
311 CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES),
312 CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
313 CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES),
314 CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV),
315 CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES),
316 CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES),
317 CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV),
318 CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES),
319 CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
320 CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
321 CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
322 CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
323 CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
324 CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
325 CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES),
326 CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES),
327 CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES),
328 CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES),
329 CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES),
330 CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
331 CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES),
332 CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV),
333 CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES),
334 CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES),
335 CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV),
336 CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES),
337 CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
338 CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
339 CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
340 CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
341 CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
342 CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
343 CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES),
344 CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES),
345 CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES),
346 CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES),
347 CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT),
348 CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL),
349 CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL),
350 CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
351 CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
352 NULL,
353};
354
239/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ 355/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
240 356
241static struct attribute_group cpumsf_pmu_events_group = { 357static struct attribute_group cpumcf_pmu_events_group = {
242 .name = "events", 358 .name = "events",
243 .attrs = cpumcf_pmu_event_attr,
244}; 359};
245 360
246PMU_FORMAT_ATTR(event, "config:0-63"); 361PMU_FORMAT_ATTR(event, "config:0-63");
247 362
248static struct attribute *cpumsf_pmu_format_attr[] = { 363static struct attribute *cpumcf_pmu_format_attr[] = {
249 &format_attr_event.attr, 364 &format_attr_event.attr,
250 NULL, 365 NULL,
251}; 366};
252 367
253static struct attribute_group cpumsf_pmu_format_group = { 368static struct attribute_group cpumcf_pmu_format_group = {
254 .name = "format", 369 .name = "format",
255 .attrs = cpumsf_pmu_format_attr, 370 .attrs = cpumcf_pmu_format_attr,
256}; 371};
257 372
258static const struct attribute_group *cpumsf_pmu_attr_groups[] = { 373static const struct attribute_group *cpumcf_pmu_attr_groups[] = {
259 &cpumsf_pmu_events_group, 374 &cpumcf_pmu_events_group,
260 &cpumsf_pmu_format_group, 375 &cpumcf_pmu_format_group,
261 NULL, 376 NULL,
262}; 377};
263 378
@@ -290,6 +405,7 @@ static __init struct attribute **merge_attr(struct attribute **a,
290__init const struct attribute_group **cpumf_cf_event_group(void) 405__init const struct attribute_group **cpumf_cf_event_group(void)
291{ 406{
292 struct attribute **combined, **model; 407 struct attribute **combined, **model;
408 struct attribute *none[] = { NULL };
293 struct cpuid cpu_id; 409 struct cpuid cpu_id;
294 410
295 get_cpu_id(&cpu_id); 411 get_cpu_id(&cpu_id);
@@ -306,17 +422,17 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
306 case 0x2828: 422 case 0x2828:
307 model = cpumcf_zec12_pmu_event_attr; 423 model = cpumcf_zec12_pmu_event_attr;
308 break; 424 break;
425 case 0x2964:
426 case 0x2965:
427 model = cpumcf_z13_pmu_event_attr;
428 break;
309 default: 429 default:
310 model = NULL; 430 model = none;
311 break; 431 break;
312 } 432 }
313 433
314 if (!model)
315 goto out;
316
317 combined = merge_attr(cpumcf_pmu_event_attr, model); 434 combined = merge_attr(cpumcf_pmu_event_attr, model);
318 if (combined) 435 if (combined)
319 cpumsf_pmu_events_group.attrs = combined; 436 cpumcf_pmu_events_group.attrs = combined;
320out: 437 return cpumcf_pmu_attr_groups;
321 return cpumsf_pmu_attr_groups;
322} 438}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1c0b58545c04..9a4f279d25ca 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1009,8 +1009,8 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
1009 * sample. Some early samples or samples from guests without 1009 * sample. Some early samples or samples from guests without
1010 * lpp usage would be misaccounted to the host. We use the asn 1010 * lpp usage would be misaccounted to the host. We use the asn
1011 * value as an addon heuristic to detect most of these guest samples. 1011 * value as an addon heuristic to detect most of these guest samples.
1012 * If the value differs from the host hpp value, we assume to be a 1012 * If the value differs from 0xffff (the host value), we assume to
1013 * KVM guest. 1013 * be a KVM guest.
1014 */ 1014 */
1015 switch (sfr->basic.CL) { 1015 switch (sfr->basic.CL) {
1016 case 1: /* logical partition */ 1016 case 1: /* logical partition */
@@ -1020,8 +1020,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
1020 sde_regs->in_guest = 1; 1020 sde_regs->in_guest = 1;
1021 break; 1021 break;
1022 default: /* old machine, use heuristics */ 1022 default: /* old machine, use heuristics */
1023 if (sfr->basic.gpp || 1023 if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff)
1024 sfr->basic.prim_asn != (u16)sfr->basic.hpp)
1025 sde_regs->in_guest = 1; 1024 sde_regs->in_guest = 1;
1026 break; 1025 break;
1027 } 1026 }
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f29e41c5e2ec..999d7154bbdc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void);
73 */ 73 */
74void exit_thread(struct task_struct *tsk) 74void exit_thread(struct task_struct *tsk)
75{ 75{
76 if (tsk == current) 76 if (tsk == current) {
77 exit_thread_runtime_instr(); 77 exit_thread_runtime_instr();
78 exit_thread_gs();
79 }
78} 80}
79 81
80void flush_thread(void) 82void flush_thread(void)
@@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
159 /* Don't copy runtime instrumentation info */ 161 /* Don't copy runtime instrumentation info */
160 p->thread.ri_cb = NULL; 162 p->thread.ri_cb = NULL;
161 frame->childregs.psw.mask &= ~PSW_MASK_RI; 163 frame->childregs.psw.mask &= ~PSW_MASK_RI;
164 /* Don't copy guarded storage control block */
165 p->thread.gs_cb = NULL;
166 p->thread.gs_bc_cb = NULL;
162 167
163 /* Set a new TLS ? */ 168 /* Set a new TLS ? */
164 if (clone_flags & CLONE_SETTLS) { 169 if (clone_flags & CLONE_SETTLS) {
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 928b929a6261..778cd6536175 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -7,6 +7,7 @@
7#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 7#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
8 8
9#include <linux/cpufeature.h> 9#include <linux/cpufeature.h>
10#include <linux/bitops.h>
10#include <linux/kernel.h> 11#include <linux/kernel.h>
11#include <linux/sched/mm.h> 12#include <linux/sched/mm.h>
12#include <linux/init.h> 13#include <linux/init.h>
@@ -91,11 +92,23 @@ int cpu_have_feature(unsigned int num)
91} 92}
92EXPORT_SYMBOL(cpu_have_feature); 93EXPORT_SYMBOL(cpu_have_feature);
93 94
95static void show_facilities(struct seq_file *m)
96{
97 unsigned int bit;
98 long *facilities;
99
100 facilities = (long *)&S390_lowcore.stfle_fac_list;
101 seq_puts(m, "facilities :");
102 for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT)
103 seq_printf(m, " %d", bit);
104 seq_putc(m, '\n');
105}
106
94static void show_cpu_summary(struct seq_file *m, void *v) 107static void show_cpu_summary(struct seq_file *m, void *v)
95{ 108{
96 static const char *hwcap_str[] = { 109 static const char *hwcap_str[] = {
97 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", 110 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
98 "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" 111 "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
99 }; 112 };
100 static const char * const int_hwcap_str[] = { 113 static const char * const int_hwcap_str[] = {
101 "sie" 114 "sie"
@@ -116,6 +129,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
116 if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) 129 if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
117 seq_printf(m, "%s ", int_hwcap_str[i]); 130 seq_printf(m, "%s ", int_hwcap_str[i]);
118 seq_puts(m, "\n"); 131 seq_puts(m, "\n");
132 show_facilities(m);
119 show_cacheinfo(m); 133 show_cacheinfo(m);
120 for_each_online_cpu(cpu) { 134 for_each_online_cpu(cpu) {
121 struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu); 135 struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c14df0a1ec3c..488c5bb8dc77 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task)
44 struct pt_regs *regs = task_pt_regs(task); 44 struct pt_regs *regs = task_pt_regs(task);
45 struct thread_struct *thread = &task->thread; 45 struct thread_struct *thread = &task->thread;
46 struct per_regs old, new; 46 struct per_regs old, new;
47 47 unsigned long cr0_old, cr0_new;
48 unsigned long cr2_old, cr2_new;
49 int cr0_changed, cr2_changed;
50
51 __ctl_store(cr0_old, 0, 0);
52 __ctl_store(cr2_old, 2, 2);
53 cr0_new = cr0_old;
54 cr2_new = cr2_old;
48 /* Take care of the enable/disable of transactional execution. */ 55 /* Take care of the enable/disable of transactional execution. */
49 if (MACHINE_HAS_TE) { 56 if (MACHINE_HAS_TE) {
50 unsigned long cr, cr_new;
51
52 __ctl_store(cr, 0, 0);
53 /* Set or clear transaction execution TXC bit 8. */ 57 /* Set or clear transaction execution TXC bit 8. */
54 cr_new = cr | (1UL << 55); 58 cr0_new |= (1UL << 55);
55 if (task->thread.per_flags & PER_FLAG_NO_TE) 59 if (task->thread.per_flags & PER_FLAG_NO_TE)
56 cr_new &= ~(1UL << 55); 60 cr0_new &= ~(1UL << 55);
57 if (cr_new != cr)
58 __ctl_load(cr_new, 0, 0);
59 /* Set or clear transaction execution TDC bits 62 and 63. */ 61 /* Set or clear transaction execution TDC bits 62 and 63. */
60 __ctl_store(cr, 2, 2); 62 cr2_new &= ~3UL;
61 cr_new = cr & ~3UL;
62 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { 63 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
63 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) 64 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
64 cr_new |= 1UL; 65 cr2_new |= 1UL;
65 else 66 else
66 cr_new |= 2UL; 67 cr2_new |= 2UL;
67 } 68 }
68 if (cr_new != cr)
69 __ctl_load(cr_new, 2, 2);
70 } 69 }
70 /* Take care of enable/disable of guarded storage. */
71 if (MACHINE_HAS_GS) {
72 cr2_new &= ~(1UL << 4);
73 if (task->thread.gs_cb)
74 cr2_new |= (1UL << 4);
75 }
76 /* Load control register 0/2 iff changed */
77 cr0_changed = cr0_new != cr0_old;
78 cr2_changed = cr2_new != cr2_old;
79 if (cr0_changed)
80 __ctl_load(cr0_new, 0, 0);
81 if (cr2_changed)
82 __ctl_load(cr2_new, 2, 2);
71 /* Copy user specified PER registers */ 83 /* Copy user specified PER registers */
72 new.control = thread->per_user.control; 84 new.control = thread->per_user.control;
73 new.start = thread->per_user.start; 85 new.start = thread->per_user.start;
@@ -1137,6 +1149,74 @@ static int s390_system_call_set(struct task_struct *target,
1137 data, 0, sizeof(unsigned int)); 1149 data, 0, sizeof(unsigned int));
1138} 1150}
1139 1151
1152static int s390_gs_cb_get(struct task_struct *target,
1153 const struct user_regset *regset,
1154 unsigned int pos, unsigned int count,
1155 void *kbuf, void __user *ubuf)
1156{
1157 struct gs_cb *data = target->thread.gs_cb;
1158
1159 if (!MACHINE_HAS_GS)
1160 return -ENODEV;
1161 if (!data)
1162 return -ENODATA;
1163 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
1164 data, 0, sizeof(struct gs_cb));
1165}
1166
1167static int s390_gs_cb_set(struct task_struct *target,
1168 const struct user_regset *regset,
1169 unsigned int pos, unsigned int count,
1170 const void *kbuf, const void __user *ubuf)
1171{
1172 struct gs_cb *data = target->thread.gs_cb;
1173
1174 if (!MACHINE_HAS_GS)
1175 return -ENODEV;
1176 if (!data) {
1177 data = kzalloc(sizeof(*data), GFP_KERNEL);
1178 if (!data)
1179 return -ENOMEM;
1180 target->thread.gs_cb = data;
1181 }
1182 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
1183 data, 0, sizeof(struct gs_cb));
1184}
1185
1186static int s390_gs_bc_get(struct task_struct *target,
1187 const struct user_regset *regset,
1188 unsigned int pos, unsigned int count,
1189 void *kbuf, void __user *ubuf)
1190{
1191 struct gs_cb *data = target->thread.gs_bc_cb;
1192
1193 if (!MACHINE_HAS_GS)
1194 return -ENODEV;
1195 if (!data)
1196 return -ENODATA;
1197 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
1198 data, 0, sizeof(struct gs_cb));
1199}
1200
1201static int s390_gs_bc_set(struct task_struct *target,
1202 const struct user_regset *regset,
1203 unsigned int pos, unsigned int count,
1204 const void *kbuf, const void __user *ubuf)
1205{
1206 struct gs_cb *data = target->thread.gs_bc_cb;
1207
1208 if (!MACHINE_HAS_GS)
1209 return -ENODEV;
1210 if (!data) {
1211 data = kzalloc(sizeof(*data), GFP_KERNEL);
1212 if (!data)
1213 return -ENOMEM;
1214 target->thread.gs_bc_cb = data;
1215 }
1216 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
1217 data, 0, sizeof(struct gs_cb));
1218}
1219
1140static const struct user_regset s390_regsets[] = { 1220static const struct user_regset s390_regsets[] = {
1141 { 1221 {
1142 .core_note_type = NT_PRSTATUS, 1222 .core_note_type = NT_PRSTATUS,
@@ -1194,6 +1274,22 @@ static const struct user_regset s390_regsets[] = {
1194 .get = s390_vxrs_high_get, 1274 .get = s390_vxrs_high_get,
1195 .set = s390_vxrs_high_set, 1275 .set = s390_vxrs_high_set,
1196 }, 1276 },
1277 {
1278 .core_note_type = NT_S390_GS_CB,
1279 .n = sizeof(struct gs_cb) / sizeof(__u64),
1280 .size = sizeof(__u64),
1281 .align = sizeof(__u64),
1282 .get = s390_gs_cb_get,
1283 .set = s390_gs_cb_set,
1284 },
1285 {
1286 .core_note_type = NT_S390_GS_BC,
1287 .n = sizeof(struct gs_cb) / sizeof(__u64),
1288 .size = sizeof(__u64),
1289 .align = sizeof(__u64),
1290 .get = s390_gs_bc_get,
1291 .set = s390_gs_bc_set,
1292 },
1197}; 1293};
1198 1294
1199static const struct user_regset_view user_s390_view = { 1295static const struct user_regset_view user_s390_view = {
@@ -1422,6 +1518,14 @@ static const struct user_regset s390_compat_regsets[] = {
1422 .get = s390_compat_regs_high_get, 1518 .get = s390_compat_regs_high_get,
1423 .set = s390_compat_regs_high_set, 1519 .set = s390_compat_regs_high_set,
1424 }, 1520 },
1521 {
1522 .core_note_type = NT_S390_GS_CB,
1523 .n = sizeof(struct gs_cb) / sizeof(__u64),
1524 .size = sizeof(__u64),
1525 .align = sizeof(__u64),
1526 .get = s390_gs_cb_get,
1527 .set = s390_gs_cb_set,
1528 },
1425}; 1529};
1426 1530
1427static const struct user_regset_view user_s390_compat_view = { 1531static const struct user_regset_view user_s390_compat_view = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 911dc0b49be0..3ae756c0db3d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void)
339 lc->stfl_fac_list = S390_lowcore.stfl_fac_list; 339 lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
340 memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, 340 memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
341 MAX_FACILITY_BIT/8); 341 MAX_FACILITY_BIT/8);
342 if (MACHINE_HAS_VX) 342 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
343 lc->vector_save_area_addr = 343 unsigned long bits, size;
344 (unsigned long) &lc->vector_save_area; 344
345 bits = MACHINE_HAS_GS ? 11 : 10;
346 size = 1UL << bits;
347 lc->mcesad = (__u64) memblock_virt_alloc(size, size);
348 if (MACHINE_HAS_GS)
349 lc->mcesad |= bits;
350 }
345 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; 351 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
346 lc->sync_enter_timer = S390_lowcore.sync_enter_timer; 352 lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
347 lc->async_enter_timer = S390_lowcore.async_enter_timer; 353 lc->async_enter_timer = S390_lowcore.async_enter_timer;
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void)
779 elf_hwcap |= HWCAP_S390_VXRS_BCD; 785 elf_hwcap |= HWCAP_S390_VXRS_BCD;
780 } 786 }
781 787
788 /*
789 * Guarded storage support HWCAP_S390_GS is bit 12.
790 */
791 if (MACHINE_HAS_GS)
792 elf_hwcap |= HWCAP_S390_GS;
793
782 get_cpu_id(&cpu_id); 794 get_cpu_id(&cpu_id);
783 add_device_randomness(&cpu_id, sizeof(cpu_id)); 795 add_device_randomness(&cpu_id, sizeof(cpu_id));
784 switch (cpu_id.machine) { 796 switch (cpu_id.machine) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 5dab859b0d54..363000a77ffc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -51,6 +51,7 @@
51#include <asm/os_info.h> 51#include <asm/os_info.h>
52#include <asm/sigp.h> 52#include <asm/sigp.h>
53#include <asm/idle.h> 53#include <asm/idle.h>
54#include <asm/nmi.h>
54#include "entry.h" 55#include "entry.h"
55 56
56enum { 57enum {
@@ -78,6 +79,8 @@ struct pcpu {
78static u8 boot_core_type; 79static u8 boot_core_type;
79static struct pcpu pcpu_devices[NR_CPUS]; 80static struct pcpu pcpu_devices[NR_CPUS];
80 81
82static struct kmem_cache *pcpu_mcesa_cache;
83
81unsigned int smp_cpu_mt_shift; 84unsigned int smp_cpu_mt_shift;
82EXPORT_SYMBOL(smp_cpu_mt_shift); 85EXPORT_SYMBOL(smp_cpu_mt_shift);
83 86
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
188static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) 191static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
189{ 192{
190 unsigned long async_stack, panic_stack; 193 unsigned long async_stack, panic_stack;
194 unsigned long mcesa_origin, mcesa_bits;
191 struct lowcore *lc; 195 struct lowcore *lc;
192 196
197 mcesa_origin = mcesa_bits = 0;
193 if (pcpu != &pcpu_devices[0]) { 198 if (pcpu != &pcpu_devices[0]) {
194 pcpu->lowcore = (struct lowcore *) 199 pcpu->lowcore = (struct lowcore *)
195 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); 200 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
197 panic_stack = __get_free_page(GFP_KERNEL); 202 panic_stack = __get_free_page(GFP_KERNEL);
198 if (!pcpu->lowcore || !panic_stack || !async_stack) 203 if (!pcpu->lowcore || !panic_stack || !async_stack)
199 goto out; 204 goto out;
205 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
206 mcesa_origin = (unsigned long)
207 kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
208 if (!mcesa_origin)
209 goto out;
210 mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
211 }
200 } else { 212 } else {
201 async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; 213 async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
202 panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; 214 panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
215 mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
216 mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK;
203 } 217 }
204 lc = pcpu->lowcore; 218 lc = pcpu->lowcore;
205 memcpy(lc, &S390_lowcore, 512); 219 memcpy(lc, &S390_lowcore, 512);
206 memset((char *) lc + 512, 0, sizeof(*lc) - 512); 220 memset((char *) lc + 512, 0, sizeof(*lc) - 512);
207 lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; 221 lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
208 lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; 222 lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
223 lc->mcesad = mcesa_origin | mcesa_bits;
209 lc->cpu_nr = cpu; 224 lc->cpu_nr = cpu;
210 lc->spinlock_lockval = arch_spin_lockval(cpu); 225 lc->spinlock_lockval = arch_spin_lockval(cpu);
211 if (MACHINE_HAS_VX)
212 lc->vector_save_area_addr =
213 (unsigned long) &lc->vector_save_area;
214 if (vdso_alloc_per_cpu(lc)) 226 if (vdso_alloc_per_cpu(lc))
215 goto out; 227 goto out;
216 lowcore_ptr[cpu] = lc; 228 lowcore_ptr[cpu] = lc;
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
218 return 0; 230 return 0;
219out: 231out:
220 if (pcpu != &pcpu_devices[0]) { 232 if (pcpu != &pcpu_devices[0]) {
233 if (mcesa_origin)
234 kmem_cache_free(pcpu_mcesa_cache,
235 (void *) mcesa_origin);
221 free_page(panic_stack); 236 free_page(panic_stack);
222 free_pages(async_stack, ASYNC_ORDER); 237 free_pages(async_stack, ASYNC_ORDER);
223 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 238 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -229,11 +244,17 @@ out:
229 244
230static void pcpu_free_lowcore(struct pcpu *pcpu) 245static void pcpu_free_lowcore(struct pcpu *pcpu)
231{ 246{
247 unsigned long mcesa_origin;
248
232 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); 249 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
233 lowcore_ptr[pcpu - pcpu_devices] = NULL; 250 lowcore_ptr[pcpu - pcpu_devices] = NULL;
234 vdso_free_per_cpu(pcpu->lowcore); 251 vdso_free_per_cpu(pcpu->lowcore);
235 if (pcpu == &pcpu_devices[0]) 252 if (pcpu == &pcpu_devices[0])
236 return; 253 return;
254 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
255 mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
256 kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin);
257 }
237 free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); 258 free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
238 free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); 259 free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
239 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 260 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -550,9 +571,11 @@ int smp_store_status(int cpu)
550 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, 571 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
551 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 572 pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
552 return -EIO; 573 return -EIO;
553 if (!MACHINE_HAS_VX) 574 if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
554 return 0; 575 return 0;
555 pa = __pa(pcpu->lowcore->vector_save_area_addr); 576 pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
577 if (MACHINE_HAS_GS)
578 pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
556 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, 579 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
557 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 580 pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
558 return -EIO; 581 return -EIO;
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void)
897 920
898void __init smp_prepare_cpus(unsigned int max_cpus) 921void __init smp_prepare_cpus(unsigned int max_cpus)
899{ 922{
923 unsigned long size;
924
900 /* request the 0x1201 emergency signal external interrupt */ 925 /* request the 0x1201 emergency signal external interrupt */
901 if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) 926 if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
902 panic("Couldn't request external interrupt 0x1201"); 927 panic("Couldn't request external interrupt 0x1201");
903 /* request the 0x1202 external call external interrupt */ 928 /* request the 0x1202 external call external interrupt */
904 if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) 929 if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
905 panic("Couldn't request external interrupt 0x1202"); 930 panic("Couldn't request external interrupt 0x1202");
931 /* create slab cache for the machine-check-extended-save-areas */
932 if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
933 size = 1UL << (MACHINE_HAS_GS ? 11 : 10);
934 pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas",
935 size, size, 0, NULL);
936 if (!pcpu_mcesa_cache)
937 panic("Couldn't create nmi save area cache");
938 }
906} 939}
907 940
908void __init smp_prepare_boot_cpu(void) 941void __init smp_prepare_boot_cpu(void)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2659b5cfeddb..54fce7b065de 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
386SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ 386SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
387SYSCALL(sys_preadv2,compat_sys_preadv2) 387SYSCALL(sys_preadv2,compat_sys_preadv2)
388SYSCALL(sys_pwritev2,compat_sys_pwritev2) 388SYSCALL(sys_pwritev2,compat_sys_pwritev2)
389NI_SYSCALL 389SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
390SYSCALL(sys_statx,compat_sys_statx) 390SYSCALL(sys_statx,compat_sys_statx)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 12b6b138e354..eefcb54872a5 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -4,6 +4,7 @@
4 * Martin Schwidefsky <schwidefsky@de.ibm.com>, 4 * Martin Schwidefsky <schwidefsky@de.ibm.com>,
5 */ 5 */
6 6
7#include <linux/debugfs.h>
7#include <linux/kernel.h> 8#include <linux/kernel.h>
8#include <linux/mm.h> 9#include <linux/mm.h>
9#include <linux/proc_fs.h> 10#include <linux/proc_fs.h>
@@ -13,6 +14,7 @@
13#include <linux/export.h> 14#include <linux/export.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
15#include <asm/ebcdic.h> 16#include <asm/ebcdic.h>
17#include <asm/debug.h>
16#include <asm/sysinfo.h> 18#include <asm/sysinfo.h>
17#include <asm/cpcmd.h> 19#include <asm/cpcmd.h>
18#include <asm/topology.h> 20#include <asm/topology.h>
@@ -485,3 +487,99 @@ void calibrate_delay(void)
485 "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), 487 "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
486 (loops_per_jiffy/(5000/HZ)) % 100); 488 (loops_per_jiffy/(5000/HZ)) % 100);
487} 489}
490
491#ifdef CONFIG_DEBUG_FS
492
493#define STSI_FILE(fc, s1, s2) \
494static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\
495{ \
496 file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \
497 if (!file->private_data) \
498 return -ENOMEM; \
499 if (stsi(file->private_data, fc, s1, s2)) { \
500 free_page((unsigned long)file->private_data); \
501 file->private_data = NULL; \
502 return -EACCES; \
503 } \
504 return nonseekable_open(inode, file); \
505} \
506 \
507static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
508 .open = stsi_open_##fc##_##s1##_##s2, \
509 .release = stsi_release, \
510 .read = stsi_read, \
511 .llseek = no_llseek, \
512};
513
514static int stsi_release(struct inode *inode, struct file *file)
515{
516 free_page((unsigned long)file->private_data);
517 return 0;
518}
519
520static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
521{
522 return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE);
523}
524
525STSI_FILE( 1, 1, 1);
526STSI_FILE( 1, 2, 1);
527STSI_FILE( 1, 2, 2);
528STSI_FILE( 2, 2, 1);
529STSI_FILE( 2, 2, 2);
530STSI_FILE( 3, 2, 2);
531STSI_FILE(15, 1, 2);
532STSI_FILE(15, 1, 3);
533STSI_FILE(15, 1, 4);
534STSI_FILE(15, 1, 5);
535STSI_FILE(15, 1, 6);
536
537struct stsi_file {
538 const struct file_operations *fops;
539 char *name;
540};
541
542static struct stsi_file stsi_file[] __initdata = {
543 {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"},
544 {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"},
545 {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"},
546 {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"},
547 {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"},
548 {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"},
549 {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"},
550 {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"},
551 {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"},
552 {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"},
553 {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"},
554};
555
556static u8 stsi_0_0_0;
557
558static __init int stsi_init_debugfs(void)
559{
560 struct dentry *stsi_root;
561 struct stsi_file *sf;
562 int lvl, i;
563
564 stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir);
565 if (IS_ERR_OR_NULL(stsi_root))
566 return 0;
567 lvl = stsi(NULL, 0, 0, 0);
568 if (lvl > 0)
569 stsi_0_0_0 = lvl;
570 debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0);
571 for (i = 0; i < ARRAY_SIZE(stsi_file); i++) {
572 sf = &stsi_file[i];
573 debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
574 }
575 if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
576 char link_to[10];
577
578 sprintf(link_to, "15_1_%d", topology_mnest_limit());
579 debugfs_create_symlink("topology", stsi_root, link_to);
580 }
581 return 0;
582}
583device_initcall(stsi_init_debugfs);
584
585#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 17660e800e74..bb47c92476f0 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -83,6 +83,8 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
83 return mask; 83 return mask;
84} 84}
85 85
86#define TOPOLOGY_CORE_BITS 64
87
86static void add_cpus_to_mask(struct topology_core *tl_core, 88static void add_cpus_to_mask(struct topology_core *tl_core,
87 struct mask_info *drawer, 89 struct mask_info *drawer,
88 struct mask_info *book, 90 struct mask_info *book,
@@ -91,7 +93,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
91 struct cpu_topology_s390 *topo; 93 struct cpu_topology_s390 *topo;
92 unsigned int core; 94 unsigned int core;
93 95
94 for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { 96 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
95 unsigned int rcore; 97 unsigned int rcore;
96 int lcpu, i; 98 int lcpu, i;
97 99
@@ -244,7 +246,7 @@ static void update_cpu_masks(void)
244 246
245void store_topology(struct sysinfo_15_1_x *info) 247void store_topology(struct sysinfo_15_1_x *info)
246{ 248{
247 stsi(info, 15, 1, min(topology_max_mnest, 4)); 249 stsi(info, 15, 1, topology_mnest_limit());
248} 250}
249 251
250static int __arch_update_cpu_topology(void) 252static int __arch_update_cpu_topology(void)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0f8f14199734..169558dc7daf 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
420 save_access_regs(vcpu->run->s.regs.acrs); 420 save_access_regs(vcpu->run->s.regs.acrs);
421 421
422 /* Extended save area */ 422 /* Extended save area */
423 rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, 423 rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
424 sizeof(unsigned long)); 424 sizeof(unsigned long));
425 /* Only bits 0-53 are used for address formation */ 425 /* Only bits 0-53 are used for address formation */
426 ext_sa_addr &= ~0x3ffUL; 426 ext_sa_addr &= ~0x3ffUL;
427 if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { 427 if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd6cd05bb6a7..d5c5c911821a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -273,7 +273,7 @@ static void kvm_s390_cpu_feat_init(void)
273 kvm_s390_available_subfunc.pcc); 273 kvm_s390_available_subfunc.pcc);
274 } 274 }
275 if (test_facility(57)) /* MSA5 */ 275 if (test_facility(57)) /* MSA5 */
276 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *) 276 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.ppno); 277 kvm_s390_available_subfunc.ppno);
278 278
279 if (MACHINE_HAS_ESOP) 279 if (MACHINE_HAS_ESOP)
@@ -1512,9 +1512,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1512 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 1512 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1513 } else { 1513 } else {
1514 if (sclp.hamax == U64_MAX) 1514 if (sclp.hamax == U64_MAX)
1515 kvm->arch.mem_limit = TASK_MAX_SIZE; 1515 kvm->arch.mem_limit = TASK_SIZE_MAX;
1516 else 1516 else
1517 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, 1517 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1518 sclp.hamax + 1); 1518 sclp.hamax + 1);
1519 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1); 1519 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1520 if (!kvm->arch.gmap) 1520 if (!kvm->arch.gmap)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ba427eb6f14c..ffb15bd4c593 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -17,7 +17,7 @@ int spin_retry = -1;
17static int __init spin_retry_init(void) 17static int __init spin_retry_init(void)
18{ 18{
19 if (spin_retry < 0) 19 if (spin_retry < 0)
20 spin_retry = MACHINE_HAS_CAD ? 10 : 1000; 20 spin_retry = 1000;
21 return 0; 21 return 0;
22} 22}
23early_initcall(spin_retry_init); 23early_initcall(spin_retry_init);
@@ -32,23 +32,17 @@ static int __init spin_retry_setup(char *str)
32} 32}
33__setup("spin_retry=", spin_retry_setup); 33__setup("spin_retry=", spin_retry_setup);
34 34
35static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old)
36{
37 asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock));
38}
39
40void arch_spin_lock_wait(arch_spinlock_t *lp) 35void arch_spin_lock_wait(arch_spinlock_t *lp)
41{ 36{
42 unsigned int cpu = SPINLOCK_LOCKVAL; 37 int cpu = SPINLOCK_LOCKVAL;
43 unsigned int owner; 38 int owner, count, first_diag;
44 int count, first_diag;
45 39
46 first_diag = 1; 40 first_diag = 1;
47 while (1) { 41 while (1) {
48 owner = ACCESS_ONCE(lp->lock); 42 owner = ACCESS_ONCE(lp->lock);
49 /* Try to get the lock if it is free. */ 43 /* Try to get the lock if it is free. */
50 if (!owner) { 44 if (!owner) {
51 if (_raw_compare_and_swap(&lp->lock, 0, cpu)) 45 if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
52 return; 46 return;
53 continue; 47 continue;
54 } 48 }
@@ -61,8 +55,6 @@ void arch_spin_lock_wait(arch_spinlock_t *lp)
61 /* Loop for a while on the lock value. */ 55 /* Loop for a while on the lock value. */
62 count = spin_retry; 56 count = spin_retry;
63 do { 57 do {
64 if (MACHINE_HAS_CAD)
65 _raw_compare_and_delay(&lp->lock, owner);
66 owner = ACCESS_ONCE(lp->lock); 58 owner = ACCESS_ONCE(lp->lock);
67 } while (owner && count-- > 0); 59 } while (owner && count-- > 0);
68 if (!owner) 60 if (!owner)
@@ -82,9 +74,8 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
82 74
83void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) 75void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
84{ 76{
85 unsigned int cpu = SPINLOCK_LOCKVAL; 77 int cpu = SPINLOCK_LOCKVAL;
86 unsigned int owner; 78 int owner, count, first_diag;
87 int count, first_diag;
88 79
89 local_irq_restore(flags); 80 local_irq_restore(flags);
90 first_diag = 1; 81 first_diag = 1;
@@ -93,7 +84,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
93 /* Try to get the lock if it is free. */ 84 /* Try to get the lock if it is free. */
94 if (!owner) { 85 if (!owner) {
95 local_irq_disable(); 86 local_irq_disable();
96 if (_raw_compare_and_swap(&lp->lock, 0, cpu)) 87 if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
97 return; 88 return;
98 local_irq_restore(flags); 89 local_irq_restore(flags);
99 continue; 90 continue;
@@ -107,8 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
107 /* Loop for a while on the lock value. */ 98 /* Loop for a while on the lock value. */
108 count = spin_retry; 99 count = spin_retry;
109 do { 100 do {
110 if (MACHINE_HAS_CAD)
111 _raw_compare_and_delay(&lp->lock, owner);
112 owner = ACCESS_ONCE(lp->lock); 101 owner = ACCESS_ONCE(lp->lock);
113 } while (owner && count-- > 0); 102 } while (owner && count-- > 0);
114 if (!owner) 103 if (!owner)
@@ -128,18 +117,16 @@ EXPORT_SYMBOL(arch_spin_lock_wait_flags);
128 117
129int arch_spin_trylock_retry(arch_spinlock_t *lp) 118int arch_spin_trylock_retry(arch_spinlock_t *lp)
130{ 119{
131 unsigned int cpu = SPINLOCK_LOCKVAL; 120 int cpu = SPINLOCK_LOCKVAL;
132 unsigned int owner; 121 int owner, count;
133 int count;
134 122
135 for (count = spin_retry; count > 0; count--) { 123 for (count = spin_retry; count > 0; count--) {
136 owner = READ_ONCE(lp->lock); 124 owner = READ_ONCE(lp->lock);
137 /* Try to get the lock if it is free. */ 125 /* Try to get the lock if it is free. */
138 if (!owner) { 126 if (!owner) {
139 if (_raw_compare_and_swap(&lp->lock, 0, cpu)) 127 if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
140 return 1; 128 return 1;
141 } else if (MACHINE_HAS_CAD) 129 }
142 _raw_compare_and_delay(&lp->lock, owner);
143 } 130 }
144 return 0; 131 return 0;
145} 132}
@@ -147,8 +134,8 @@ EXPORT_SYMBOL(arch_spin_trylock_retry);
147 134
148void _raw_read_lock_wait(arch_rwlock_t *rw) 135void _raw_read_lock_wait(arch_rwlock_t *rw)
149{ 136{
150 unsigned int owner, old;
151 int count = spin_retry; 137 int count = spin_retry;
138 int owner, old;
152 139
153#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 140#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
154 __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD); 141 __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
@@ -162,12 +149,9 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
162 } 149 }
163 old = ACCESS_ONCE(rw->lock); 150 old = ACCESS_ONCE(rw->lock);
164 owner = ACCESS_ONCE(rw->owner); 151 owner = ACCESS_ONCE(rw->owner);
165 if ((int) old < 0) { 152 if (old < 0)
166 if (MACHINE_HAS_CAD)
167 _raw_compare_and_delay(&rw->lock, old);
168 continue; 153 continue;
169 } 154 if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
170 if (_raw_compare_and_swap(&rw->lock, old, old + 1))
171 return; 155 return;
172 } 156 }
173} 157}
@@ -175,17 +159,14 @@ EXPORT_SYMBOL(_raw_read_lock_wait);
175 159
176int _raw_read_trylock_retry(arch_rwlock_t *rw) 160int _raw_read_trylock_retry(arch_rwlock_t *rw)
177{ 161{
178 unsigned int old;
179 int count = spin_retry; 162 int count = spin_retry;
163 int old;
180 164
181 while (count-- > 0) { 165 while (count-- > 0) {
182 old = ACCESS_ONCE(rw->lock); 166 old = ACCESS_ONCE(rw->lock);
183 if ((int) old < 0) { 167 if (old < 0)
184 if (MACHINE_HAS_CAD)
185 _raw_compare_and_delay(&rw->lock, old);
186 continue; 168 continue;
187 } 169 if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
188 if (_raw_compare_and_swap(&rw->lock, old, old + 1))
189 return 1; 170 return 1;
190 } 171 }
191 return 0; 172 return 0;
@@ -194,10 +175,10 @@ EXPORT_SYMBOL(_raw_read_trylock_retry);
194 175
195#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 176#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
196 177
197void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) 178void _raw_write_lock_wait(arch_rwlock_t *rw, int prev)
198{ 179{
199 unsigned int owner, old;
200 int count = spin_retry; 180 int count = spin_retry;
181 int owner, old;
201 182
202 owner = 0; 183 owner = 0;
203 while (1) { 184 while (1) {
@@ -209,14 +190,12 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
209 old = ACCESS_ONCE(rw->lock); 190 old = ACCESS_ONCE(rw->lock);
210 owner = ACCESS_ONCE(rw->owner); 191 owner = ACCESS_ONCE(rw->owner);
211 smp_mb(); 192 smp_mb();
212 if ((int) old >= 0) { 193 if (old >= 0) {
213 prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); 194 prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
214 old = prev; 195 old = prev;
215 } 196 }
216 if ((old & 0x7fffffff) == 0 && (int) prev >= 0) 197 if ((old & 0x7fffffff) == 0 && prev >= 0)
217 break; 198 break;
218 if (MACHINE_HAS_CAD)
219 _raw_compare_and_delay(&rw->lock, old);
220 } 199 }
221} 200}
222EXPORT_SYMBOL(_raw_write_lock_wait); 201EXPORT_SYMBOL(_raw_write_lock_wait);
@@ -225,8 +204,8 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
225 204
226void _raw_write_lock_wait(arch_rwlock_t *rw) 205void _raw_write_lock_wait(arch_rwlock_t *rw)
227{ 206{
228 unsigned int owner, old, prev;
229 int count = spin_retry; 207 int count = spin_retry;
208 int owner, old, prev;
230 209
231 prev = 0x80000000; 210 prev = 0x80000000;
232 owner = 0; 211 owner = 0;
@@ -238,15 +217,13 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
238 } 217 }
239 old = ACCESS_ONCE(rw->lock); 218 old = ACCESS_ONCE(rw->lock);
240 owner = ACCESS_ONCE(rw->owner); 219 owner = ACCESS_ONCE(rw->owner);
241 if ((int) old >= 0 && 220 if (old >= 0 &&
242 _raw_compare_and_swap(&rw->lock, old, old | 0x80000000)) 221 __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000))
243 prev = old; 222 prev = old;
244 else 223 else
245 smp_mb(); 224 smp_mb();
246 if ((old & 0x7fffffff) == 0 && (int) prev >= 0) 225 if ((old & 0x7fffffff) == 0 && prev >= 0)
247 break; 226 break;
248 if (MACHINE_HAS_CAD)
249 _raw_compare_and_delay(&rw->lock, old);
250 } 227 }
251} 228}
252EXPORT_SYMBOL(_raw_write_lock_wait); 229EXPORT_SYMBOL(_raw_write_lock_wait);
@@ -255,24 +232,21 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
255 232
256int _raw_write_trylock_retry(arch_rwlock_t *rw) 233int _raw_write_trylock_retry(arch_rwlock_t *rw)
257{ 234{
258 unsigned int old;
259 int count = spin_retry; 235 int count = spin_retry;
236 int old;
260 237
261 while (count-- > 0) { 238 while (count-- > 0) {
262 old = ACCESS_ONCE(rw->lock); 239 old = ACCESS_ONCE(rw->lock);
263 if (old) { 240 if (old)
264 if (MACHINE_HAS_CAD)
265 _raw_compare_and_delay(&rw->lock, old);
266 continue; 241 continue;
267 } 242 if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000))
268 if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
269 return 1; 243 return 1;
270 } 244 }
271 return 0; 245 return 0;
272} 246}
273EXPORT_SYMBOL(_raw_write_trylock_retry); 247EXPORT_SYMBOL(_raw_write_trylock_retry);
274 248
275void arch_lock_relax(unsigned int cpu) 249void arch_lock_relax(int cpu)
276{ 250{
277 if (!cpu) 251 if (!cpu)
278 return; 252 return;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a07b1ec1391d..7f6db1e6c048 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -431,7 +431,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
431 if ((from | to | len) & (PMD_SIZE - 1)) 431 if ((from | to | len) & (PMD_SIZE - 1))
432 return -EINVAL; 432 return -EINVAL;
433 if (len == 0 || from + len < from || to + len < to || 433 if (len == 0 || from + len < from || to + len < to ||
434 from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) 434 from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
435 return -EINVAL; 435 return -EINVAL;
436 436
437 flush = 0; 437 flush = 0;
@@ -2004,20 +2004,12 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
2004 * Called with sg->parent->shadow_lock. 2004 * Called with sg->parent->shadow_lock.
2005 */ 2005 */
2006static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, 2006static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
2007 unsigned long offset, pte_t *pte) 2007 unsigned long gaddr, pte_t *pte)
2008{ 2008{
2009 struct gmap_rmap *rmap, *rnext, *head; 2009 struct gmap_rmap *rmap, *rnext, *head;
2010 unsigned long gaddr, start, end, bits, raddr; 2010 unsigned long start, end, bits, raddr;
2011 unsigned long *table;
2012 2011
2013 BUG_ON(!gmap_is_shadow(sg)); 2012 BUG_ON(!gmap_is_shadow(sg));
2014 spin_lock(&sg->parent->guest_table_lock);
2015 table = radix_tree_lookup(&sg->parent->host_to_guest,
2016 vmaddr >> PMD_SHIFT);
2017 gaddr = table ? __gmap_segment_gaddr(table) + offset : 0;
2018 spin_unlock(&sg->parent->guest_table_lock);
2019 if (!table)
2020 return;
2021 2013
2022 spin_lock(&sg->guest_table_lock); 2014 spin_lock(&sg->guest_table_lock);
2023 if (sg->removed) { 2015 if (sg->removed) {
@@ -2076,7 +2068,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
2076void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, 2068void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
2077 pte_t *pte, unsigned long bits) 2069 pte_t *pte, unsigned long bits)
2078{ 2070{
2079 unsigned long offset, gaddr; 2071 unsigned long offset, gaddr = 0;
2080 unsigned long *table; 2072 unsigned long *table;
2081 struct gmap *gmap, *sg, *next; 2073 struct gmap *gmap, *sg, *next;
2082 2074
@@ -2084,22 +2076,23 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
2084 offset = offset * (4096 / sizeof(pte_t)); 2076 offset = offset * (4096 / sizeof(pte_t));
2085 rcu_read_lock(); 2077 rcu_read_lock();
2086 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2078 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2087 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
2088 spin_lock(&gmap->shadow_lock);
2089 list_for_each_entry_safe(sg, next,
2090 &gmap->children, list)
2091 gmap_shadow_notify(sg, vmaddr, offset, pte);
2092 spin_unlock(&gmap->shadow_lock);
2093 }
2094 if (!(bits & PGSTE_IN_BIT))
2095 continue;
2096 spin_lock(&gmap->guest_table_lock); 2079 spin_lock(&gmap->guest_table_lock);
2097 table = radix_tree_lookup(&gmap->host_to_guest, 2080 table = radix_tree_lookup(&gmap->host_to_guest,
2098 vmaddr >> PMD_SHIFT); 2081 vmaddr >> PMD_SHIFT);
2099 if (table) 2082 if (table)
2100 gaddr = __gmap_segment_gaddr(table) + offset; 2083 gaddr = __gmap_segment_gaddr(table) + offset;
2101 spin_unlock(&gmap->guest_table_lock); 2084 spin_unlock(&gmap->guest_table_lock);
2102 if (table) 2085 if (!table)
2086 continue;
2087
2088 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
2089 spin_lock(&gmap->shadow_lock);
2090 list_for_each_entry_safe(sg, next,
2091 &gmap->children, list)
2092 gmap_shadow_notify(sg, vmaddr, gaddr, pte);
2093 spin_unlock(&gmap->shadow_lock);
2094 }
2095 if (bits & PGSTE_IN_BIT)
2103 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); 2096 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
2104 } 2097 }
2105 rcu_read_unlock(); 2098 rcu_read_unlock();
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 18d4107e10ee..b7b779c40a5b 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -211,7 +211,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
211 addr = start; 211 addr = start;
212 len = (unsigned long) nr_pages << PAGE_SHIFT; 212 len = (unsigned long) nr_pages << PAGE_SHIFT;
213 end = start + len; 213 end = start + len;
214 if ((end <= start) || (end > TASK_SIZE)) 214 if ((end <= start) || (end > mm->context.asce_limit))
215 return 0; 215 return 0;
216 /* 216 /*
217 * local_irq_save() doesn't prevent pagetable teardown, but does 217 * local_irq_save() doesn't prevent pagetable teardown, but does
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 50618614881f..b017daed6887 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -89,19 +89,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
89 struct mm_struct *mm = current->mm; 89 struct mm_struct *mm = current->mm;
90 struct vm_area_struct *vma; 90 struct vm_area_struct *vma;
91 struct vm_unmapped_area_info info; 91 struct vm_unmapped_area_info info;
92 int rc;
92 93
93 if (len > TASK_SIZE - mmap_min_addr) 94 if (len > TASK_SIZE - mmap_min_addr)
94 return -ENOMEM; 95 return -ENOMEM;
95 96
96 if (flags & MAP_FIXED) 97 if (flags & MAP_FIXED)
97 return addr; 98 goto check_asce_limit;
98 99
99 if (addr) { 100 if (addr) {
100 addr = PAGE_ALIGN(addr); 101 addr = PAGE_ALIGN(addr);
101 vma = find_vma(mm, addr); 102 vma = find_vma(mm, addr);
102 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 103 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
103 (!vma || addr + len <= vma->vm_start)) 104 (!vma || addr + len <= vma->vm_start))
104 return addr; 105 goto check_asce_limit;
105 } 106 }
106 107
107 info.flags = 0; 108 info.flags = 0;
@@ -113,7 +114,18 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
113 else 114 else
114 info.align_mask = 0; 115 info.align_mask = 0;
115 info.align_offset = pgoff << PAGE_SHIFT; 116 info.align_offset = pgoff << PAGE_SHIFT;
116 return vm_unmapped_area(&info); 117 addr = vm_unmapped_area(&info);
118 if (addr & ~PAGE_MASK)
119 return addr;
120
121check_asce_limit:
122 if (addr + len > current->mm->context.asce_limit) {
123 rc = crst_table_upgrade(mm);
124 if (rc)
125 return (unsigned long) rc;
126 }
127
128 return addr;
117} 129}
118 130
119unsigned long 131unsigned long
@@ -125,13 +137,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
125 struct mm_struct *mm = current->mm; 137 struct mm_struct *mm = current->mm;
126 unsigned long addr = addr0; 138 unsigned long addr = addr0;
127 struct vm_unmapped_area_info info; 139 struct vm_unmapped_area_info info;
140 int rc;
128 141
129 /* requested length too big for entire address space */ 142 /* requested length too big for entire address space */
130 if (len > TASK_SIZE - mmap_min_addr) 143 if (len > TASK_SIZE - mmap_min_addr)
131 return -ENOMEM; 144 return -ENOMEM;
132 145
133 if (flags & MAP_FIXED) 146 if (flags & MAP_FIXED)
134 return addr; 147 goto check_asce_limit;
135 148
136 /* requesting a specific address */ 149 /* requesting a specific address */
137 if (addr) { 150 if (addr) {
@@ -139,7 +152,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
139 vma = find_vma(mm, addr); 152 vma = find_vma(mm, addr);
140 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 153 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
141 (!vma || addr + len <= vma->vm_start)) 154 (!vma || addr + len <= vma->vm_start))
142 return addr; 155 goto check_asce_limit;
143 } 156 }
144 157
145 info.flags = VM_UNMAPPED_AREA_TOPDOWN; 158 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -165,65 +178,20 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
165 info.low_limit = TASK_UNMAPPED_BASE; 178 info.low_limit = TASK_UNMAPPED_BASE;
166 info.high_limit = TASK_SIZE; 179 info.high_limit = TASK_SIZE;
167 addr = vm_unmapped_area(&info); 180 addr = vm_unmapped_area(&info);
181 if (addr & ~PAGE_MASK)
182 return addr;
168 } 183 }
169 184
170 return addr; 185check_asce_limit:
171} 186 if (addr + len > current->mm->context.asce_limit) {
172
173int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
174{
175 if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
176 return 0;
177 if (!(flags & MAP_FIXED))
178 addr = 0;
179 if ((addr + len) >= TASK_SIZE)
180 return crst_table_upgrade(current->mm);
181 return 0;
182}
183
184static unsigned long
185s390_get_unmapped_area(struct file *filp, unsigned long addr,
186 unsigned long len, unsigned long pgoff, unsigned long flags)
187{
188 struct mm_struct *mm = current->mm;
189 unsigned long area;
190 int rc;
191
192 area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
193 if (!(area & ~PAGE_MASK))
194 return area;
195 if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
196 /* Upgrade the page table to 4 levels and retry. */
197 rc = crst_table_upgrade(mm); 187 rc = crst_table_upgrade(mm);
198 if (rc) 188 if (rc)
199 return (unsigned long) rc; 189 return (unsigned long) rc;
200 area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
201 } 190 }
202 return area;
203}
204
205static unsigned long
206s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
207 const unsigned long len, const unsigned long pgoff,
208 const unsigned long flags)
209{
210 struct mm_struct *mm = current->mm;
211 unsigned long area;
212 int rc;
213 191
214 area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); 192 return addr;
215 if (!(area & ~PAGE_MASK))
216 return area;
217 if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
218 /* Upgrade the page table to 4 levels and retry. */
219 rc = crst_table_upgrade(mm);
220 if (rc)
221 return (unsigned long) rc;
222 area = arch_get_unmapped_area_topdown(filp, addr, len,
223 pgoff, flags);
224 }
225 return area;
226} 193}
194
227/* 195/*
228 * This function, called very early during the creation of a new 196 * This function, called very early during the creation of a new
229 * process VM image, sets up which VM layout function to use: 197 * process VM image, sets up which VM layout function to use:
@@ -241,9 +209,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
241 */ 209 */
242 if (mmap_is_legacy()) { 210 if (mmap_is_legacy()) {
243 mm->mmap_base = mmap_base_legacy(random_factor); 211 mm->mmap_base = mmap_base_legacy(random_factor);
244 mm->get_unmapped_area = s390_get_unmapped_area; 212 mm->get_unmapped_area = arch_get_unmapped_area;
245 } else { 213 } else {
246 mm->mmap_base = mmap_base(random_factor); 214 mm->mmap_base = mmap_base(random_factor);
247 mm->get_unmapped_area = s390_get_unmapped_area_topdown; 215 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
248 } 216 }
249} 217}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 3330ea124eec..69a7b01ae746 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -13,8 +13,7 @@
13#include <linux/gfp.h> 13#include <linux/gfp.h>
14#include <linux/init.h> 14#include <linux/init.h>
15 15
16#define ESSA_SET_STABLE 1 16#include <asm/page-states.h>
17#define ESSA_SET_UNUSED 2
18 17
19static int cmma_flag = 1; 18static int cmma_flag = 1;
20 19
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index fc5dc33bb141..fc321c5ec30e 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -94,7 +94,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
94 new = pte_wrprotect(new); 94 new = pte_wrprotect(new);
95 else if (flags & SET_MEMORY_RW) 95 else if (flags & SET_MEMORY_RW)
96 new = pte_mkwrite(pte_mkdirty(new)); 96 new = pte_mkwrite(pte_mkdirty(new));
97 if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) 97 if (flags & SET_MEMORY_NX)
98 pte_val(new) |= _PAGE_NOEXEC; 98 pte_val(new) |= _PAGE_NOEXEC;
99 else if (flags & SET_MEMORY_X) 99 else if (flags & SET_MEMORY_X)
100 pte_val(new) &= ~_PAGE_NOEXEC; 100 pte_val(new) &= ~_PAGE_NOEXEC;
@@ -144,7 +144,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
144 new = pmd_wrprotect(new); 144 new = pmd_wrprotect(new);
145 else if (flags & SET_MEMORY_RW) 145 else if (flags & SET_MEMORY_RW)
146 new = pmd_mkwrite(pmd_mkdirty(new)); 146 new = pmd_mkwrite(pmd_mkdirty(new));
147 if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) 147 if (flags & SET_MEMORY_NX)
148 pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; 148 pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
149 else if (flags & SET_MEMORY_X) 149 else if (flags & SET_MEMORY_X)
150 pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; 150 pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
@@ -221,7 +221,7 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
221 new = pud_wrprotect(new); 221 new = pud_wrprotect(new);
222 else if (flags & SET_MEMORY_RW) 222 else if (flags & SET_MEMORY_RW)
223 new = pud_mkwrite(pud_mkdirty(new)); 223 new = pud_mkwrite(pud_mkdirty(new));
224 if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX) 224 if (flags & SET_MEMORY_NX)
225 pud_val(new) |= _REGION_ENTRY_NOEXEC; 225 pud_val(new) |= _REGION_ENTRY_NOEXEC;
226 else if (flags & SET_MEMORY_X) 226 else if (flags & SET_MEMORY_X)
227 pud_val(new) &= ~_REGION_ENTRY_NOEXEC; 227 pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
@@ -288,6 +288,10 @@ static int change_page_attr(unsigned long addr, unsigned long end,
288 288
289int __set_memory(unsigned long addr, int numpages, unsigned long flags) 289int __set_memory(unsigned long addr, int numpages, unsigned long flags)
290{ 290{
291 if (!MACHINE_HAS_NX)
292 flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
293 if (!flags)
294 return 0;
291 addr &= PAGE_MASK; 295 addr &= PAGE_MASK;
292 return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags); 296 return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
293} 297}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 995f78532cc2..f502cbe657af 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -95,7 +95,6 @@ int crst_table_upgrade(struct mm_struct *mm)
95 mm->context.asce_limit = 1UL << 53; 95 mm->context.asce_limit = 1UL << 53;
96 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 96 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
97 _ASCE_USER_BITS | _ASCE_TYPE_REGION2; 97 _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
98 mm->task_size = mm->context.asce_limit;
99 spin_unlock_bh(&mm->page_table_lock); 98 spin_unlock_bh(&mm->page_table_lock);
100 99
101 on_each_cpu(__crst_table_upgrade, mm, 0); 100 on_each_cpu(__crst_table_upgrade, mm, 0);
@@ -119,7 +118,6 @@ void crst_table_downgrade(struct mm_struct *mm)
119 mm->context.asce_limit = 1UL << 31; 118 mm->context.asce_limit = 1UL << 31;
120 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 119 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
121 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; 120 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
122 mm->task_size = mm->context.asce_limit;
123 crst_table_free(mm, (unsigned long *) pgd); 121 crst_table_free(mm, (unsigned long *) pgd);
124 122
125 if (current->active_mm == mm) 123 if (current->active_mm == mm)
@@ -144,7 +142,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
144 struct page *page; 142 struct page *page;
145 unsigned long *table; 143 unsigned long *table;
146 144
147 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 145 page = alloc_page(GFP_KERNEL);
148 if (page) { 146 if (page) {
149 table = (unsigned long *) page_to_phys(page); 147 table = (unsigned long *) page_to_phys(page);
150 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 148 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 463e5ef02304..947b66a5cdba 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
23#include <asm/tlb.h> 23#include <asm/tlb.h>
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
26#include <asm/page-states.h>
26 27
27static inline pte_t ptep_flush_direct(struct mm_struct *mm, 28static inline pte_t ptep_flush_direct(struct mm_struct *mm,
28 unsigned long addr, pte_t *ptep) 29 unsigned long addr, pte_t *ptep)
@@ -787,4 +788,156 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
787 return 0; 788 return 0;
788} 789}
789EXPORT_SYMBOL(get_guest_storage_key); 790EXPORT_SYMBOL(get_guest_storage_key);
791
792/**
793 * pgste_perform_essa - perform ESSA actions on the PGSTE.
794 * @mm: the memory context. It must have PGSTEs, no check is performed here!
795 * @hva: the host virtual address of the page whose PGSTE is to be processed
796 * @orc: the specific action to perform, see the ESSA_SET_* macros.
797 * @oldpte: the PTE will be saved there if the pointer is not NULL.
798 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
799 *
800 * Return: 1 if the page is to be added to the CBRL, otherwise 0,
801 * or < 0 in case of error. -EINVAL is returned for invalid values
802 * of orc, -EFAULT for invalid addresses.
803 */
804int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
805 unsigned long *oldpte, unsigned long *oldpgste)
806{
807 unsigned long pgstev;
808 spinlock_t *ptl;
809 pgste_t pgste;
810 pte_t *ptep;
811 int res = 0;
812
813 WARN_ON_ONCE(orc > ESSA_MAX);
814 if (unlikely(orc > ESSA_MAX))
815 return -EINVAL;
816 ptep = get_locked_pte(mm, hva, &ptl);
817 if (unlikely(!ptep))
818 return -EFAULT;
819 pgste = pgste_get_lock(ptep);
820 pgstev = pgste_val(pgste);
821 if (oldpte)
822 *oldpte = pte_val(*ptep);
823 if (oldpgste)
824 *oldpgste = pgstev;
825
826 switch (orc) {
827 case ESSA_GET_STATE:
828 break;
829 case ESSA_SET_STABLE:
830 pgstev &= ~_PGSTE_GPS_USAGE_MASK;
831 pgstev |= _PGSTE_GPS_USAGE_STABLE;
832 break;
833 case ESSA_SET_UNUSED:
834 pgstev &= ~_PGSTE_GPS_USAGE_MASK;
835 pgstev |= _PGSTE_GPS_USAGE_UNUSED;
836 if (pte_val(*ptep) & _PAGE_INVALID)
837 res = 1;
838 break;
839 case ESSA_SET_VOLATILE:
840 pgstev &= ~_PGSTE_GPS_USAGE_MASK;
841 pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
842 if (pte_val(*ptep) & _PAGE_INVALID)
843 res = 1;
844 break;
845 case ESSA_SET_POT_VOLATILE:
846 pgstev &= ~_PGSTE_GPS_USAGE_MASK;
847 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
848 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
849 break;
850 }
851 if (pgstev & _PGSTE_GPS_ZERO) {
852 pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
853 break;
854 }
855 if (!(pgstev & PGSTE_GC_BIT)) {
856 pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
857 res = 1;
858 break;
859 }
860 break;
861 case ESSA_SET_STABLE_RESIDENT:
862 pgstev &= ~_PGSTE_GPS_USAGE_MASK;
863 pgstev |= _PGSTE_GPS_USAGE_STABLE;
864 /*
865 * Since the resident state can go away any time after this
866 * call, we will not make this page resident. We can revisit
867 * this decision if a guest will ever start using this.
868 */
869 break;
870 case ESSA_SET_STABLE_IF_RESIDENT:
871 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
872 pgstev &= ~_PGSTE_GPS_USAGE_MASK;
873 pgstev |= _PGSTE_GPS_USAGE_STABLE;
874 }
875 break;
876 default:
877 /* we should never get here! */
878 break;
879 }
880 /* If we are discarding a page, set it to logical zero */
881 if (res)
882 pgstev |= _PGSTE_GPS_ZERO;
883
884 pgste_val(pgste) = pgstev;
885 pgste_set_unlock(ptep, pgste);
886 pte_unmap_unlock(ptep, ptl);
887 return res;
888}
889EXPORT_SYMBOL(pgste_perform_essa);
890
891/**
892 * set_pgste_bits - set specific PGSTE bits.
893 * @mm: the memory context. It must have PGSTEs, no check is performed here!
894 * @hva: the host virtual address of the page whose PGSTE is to be processed
895 * @bits: a bitmask representing the bits that will be touched
896 * @value: the values of the bits to be written. Only the bits in the mask
897 * will be written.
898 *
899 * Return: 0 on success, < 0 in case of error.
900 */
901int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
902 unsigned long bits, unsigned long value)
903{
904 spinlock_t *ptl;
905 pgste_t new;
906 pte_t *ptep;
907
908 ptep = get_locked_pte(mm, hva, &ptl);
909 if (unlikely(!ptep))
910 return -EFAULT;
911 new = pgste_get_lock(ptep);
912
913 pgste_val(new) &= ~bits;
914 pgste_val(new) |= value & bits;
915
916 pgste_set_unlock(ptep, new);
917 pte_unmap_unlock(ptep, ptl);
918 return 0;
919}
920EXPORT_SYMBOL(set_pgste_bits);
921
922/**
923 * get_pgste - get the current PGSTE for the given address.
924 * @mm: the memory context. It must have PGSTEs, no check is performed here!
925 * @hva: the host virtual address of the page whose PGSTE is to be processed
926 * @pgstep: will be written with the current PGSTE for the given address.
927 *
928 * Return: 0 on success, < 0 in case of error.
929 */
930int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
931{
932 spinlock_t *ptl;
933 pte_t *ptep;
934
935 ptep = get_locked_pte(mm, hva, &ptl);
936 if (unlikely(!ptep))
937 return -EFAULT;
938 *pgstep = pgste_val(pgste_get(ptep));
939 pte_unmap_unlock(ptep, ptl);
940 return 0;
941}
942EXPORT_SYMBOL(get_pgste);
790#endif 943#endif
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 364b9d824be3..8051df109db3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -60,16 +60,8 @@ static DEFINE_SPINLOCK(zpci_domain_lock);
60static struct airq_iv *zpci_aisb_iv; 60static struct airq_iv *zpci_aisb_iv;
61static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; 61static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
62 62
63/* Adapter interrupt definitions */
64static void zpci_irq_handler(struct airq_struct *airq);
65
66static struct airq_struct zpci_airq = {
67 .handler = zpci_irq_handler,
68 .isc = PCI_ISC,
69};
70
71#define ZPCI_IOMAP_ENTRIES \ 63#define ZPCI_IOMAP_ENTRIES \
72 min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT), \ 64 min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \
73 ZPCI_IOMAP_MAX_ENTRIES) 65 ZPCI_IOMAP_MAX_ENTRIES)
74 66
75static DEFINE_SPINLOCK(zpci_iomap_lock); 67static DEFINE_SPINLOCK(zpci_iomap_lock);
@@ -214,8 +206,6 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
214 return rc; 206 return rc;
215} 207}
216 208
217#define ZPCI_PCIAS_CFGSPC 15
218
219static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) 209static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
220{ 210{
221 u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len); 211 u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
@@ -507,6 +497,11 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
507 } 497 }
508} 498}
509 499
500static struct airq_struct zpci_airq = {
501 .handler = zpci_irq_handler,
502 .isc = PCI_ISC,
503};
504
510static int __init zpci_irq_init(void) 505static int __init zpci_irq_init(void)
511{ 506{
512 int rc; 507 int rc;
@@ -871,11 +866,6 @@ int zpci_report_error(struct pci_dev *pdev,
871} 866}
872EXPORT_SYMBOL(zpci_report_error); 867EXPORT_SYMBOL(zpci_report_error);
873 868
874static inline int barsize(u8 size)
875{
876 return (size) ? (1 << size) >> 10 : 0;
877}
878
879static int zpci_mem_init(void) 869static int zpci_mem_init(void)
880{ 870{
881 BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || 871 BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 0cafe08919c9..b9918fb9587d 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -423,6 +423,20 @@ config HW_RANDOM_CAVIUM
423 423
424 If unsure, say Y. 424 If unsure, say Y.
425 425
426config HW_RANDOM_S390
427 tristate "S390 True Random Number Generator support"
428 depends on S390
429 default HW_RANDOM
430 ---help---
431 This driver provides kernel-side support for the True
432 Random Number Generator available as CPACF extension
433 on modern s390 hardware platforms.
434
435 To compile this driver as a module, choose M here: the
436 module will be called s390-trng.
437
438 If unsure, say Y.
439
426endif # HW_RANDOM 440endif # HW_RANDOM
427 441
428config UML_RANDOM 442config UML_RANDOM
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 5f52b1e4e7be..dd1765246255 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
36obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o 36obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
37obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o 37obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
38obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o 38obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
39obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c
new file mode 100644
index 000000000000..aca48e893fca
--- /dev/null
+++ b/drivers/char/hw_random/s390-trng.c
@@ -0,0 +1,268 @@
1/*
2 * s390 TRNG device driver
3 *
4 * Driver for the TRNG (true random number generation) command
5 * available via CPACF extension MSA 7 on the s390 arch.
6
7 * Copyright IBM Corp. 2017
8 * Author(s): Harald Freudenberger <freude@de.ibm.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License (version 2 only)
12 * as published by the Free Software Foundation.
13 *
14 */
15
16#define KMSG_COMPONENT "trng"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19#include <linux/hw_random.h>
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/cpufeature.h>
23#include <linux/miscdevice.h>
24#include <linux/debugfs.h>
25#include <linux/atomic.h>
26#include <linux/random.h>
27#include <linux/sched/signal.h>
28#include <asm/debug.h>
29#include <asm/cpacf.h>
30
31MODULE_LICENSE("GPL v2");
32MODULE_AUTHOR("IBM Corporation");
33MODULE_DESCRIPTION("s390 CPACF TRNG device driver");
34
35
36/* trng related debug feature things */
37
38static debug_info_t *debug_info;
39
40#define DEBUG_DBG(...) debug_sprintf_event(debug_info, 6, ##__VA_ARGS__)
41#define DEBUG_INFO(...) debug_sprintf_event(debug_info, 5, ##__VA_ARGS__)
42#define DEBUG_WARN(...) debug_sprintf_event(debug_info, 4, ##__VA_ARGS__)
43#define DEBUG_ERR(...) debug_sprintf_event(debug_info, 3, ##__VA_ARGS__)
44
45
46/* trng helpers */
47
48static atomic64_t trng_dev_counter = ATOMIC64_INIT(0);
49static atomic64_t trng_hwrng_counter = ATOMIC64_INIT(0);
50
51
52/* file io functions */
53
54static int trng_open(struct inode *inode, struct file *file)
55{
56 return nonseekable_open(inode, file);
57}
58
59static ssize_t trng_read(struct file *file, char __user *ubuf,
60 size_t nbytes, loff_t *ppos)
61{
62 u8 buf[32];
63 u8 *p = buf;
64 unsigned int n;
65 ssize_t ret = 0;
66
67 /*
68 * use buf for requests <= sizeof(buf),
69 * otherwise allocate one page and fetch
70 * pagewise.
71 */
72
73 if (nbytes > sizeof(buf)) {
74 p = (u8 *) __get_free_page(GFP_KERNEL);
75 if (!p)
76 return -ENOMEM;
77 }
78
79 while (nbytes) {
80 if (need_resched()) {
81 if (signal_pending(current)) {
82 if (ret == 0)
83 ret = -ERESTARTSYS;
84 break;
85 }
86 schedule();
87 }
88 n = nbytes > PAGE_SIZE ? PAGE_SIZE : nbytes;
89 cpacf_trng(NULL, 0, p, n);
90 atomic64_add(n, &trng_dev_counter);
91 if (copy_to_user(ubuf, p, n)) {
92 ret = -EFAULT;
93 break;
94 }
95 nbytes -= n;
96 ubuf += n;
97 ret += n;
98 }
99
100 if (p != buf)
101 free_page((unsigned long) p);
102
103 DEBUG_DBG("trng_read()=%zd\n", ret);
104 return ret;
105}
106
107
108/* sysfs */
109
110static ssize_t trng_counter_show(struct device *dev,
111 struct device_attribute *attr, char *buf)
112{
113 u64 dev_counter = atomic64_read(&trng_dev_counter);
114 u64 hwrng_counter = atomic64_read(&trng_hwrng_counter);
115#if IS_ENABLED(CONFIG_ARCH_RANDOM)
116 u64 arch_counter = atomic64_read(&s390_arch_random_counter);
117
118 return snprintf(buf, PAGE_SIZE,
119 "trng: %llu\n"
120 "hwrng: %llu\n"
121 "arch: %llu\n"
122 "total: %llu\n",
123 dev_counter, hwrng_counter, arch_counter,
124 dev_counter + hwrng_counter + arch_counter);
125#else
126 return snprintf(buf, PAGE_SIZE,
127 "trng: %llu\n"
128 "hwrng: %llu\n"
129 "total: %llu\n",
130 dev_counter, hwrng_counter,
131 dev_counter + hwrng_counter);
132#endif
133}
134static DEVICE_ATTR(byte_counter, 0444, trng_counter_show, NULL);
135
136static struct attribute *trng_dev_attrs[] = {
137 &dev_attr_byte_counter.attr,
138 NULL
139};
140
141static const struct attribute_group trng_dev_attr_group = {
142 .attrs = trng_dev_attrs
143};
144
145static const struct attribute_group *trng_dev_attr_groups[] = {
146 &trng_dev_attr_group,
147 NULL
148};
149
150static const struct file_operations trng_fops = {
151 .owner = THIS_MODULE,
152 .open = &trng_open,
153 .release = NULL,
154 .read = &trng_read,
155 .llseek = noop_llseek,
156};
157
158static struct miscdevice trng_dev = {
159 .name = "trng",
160 .minor = MISC_DYNAMIC_MINOR,
161 .mode = 0444,
162 .fops = &trng_fops,
163 .groups = trng_dev_attr_groups,
164};
165
166
167/* hwrng_register */
168
169static inline void _trng_hwrng_read(u8 *buf, size_t len)
170{
171 cpacf_trng(NULL, 0, buf, len);
172 atomic64_add(len, &trng_hwrng_counter);
173}
174
175static int trng_hwrng_data_read(struct hwrng *rng, u32 *data)
176{
177 size_t len = sizeof(*data);
178
179 _trng_hwrng_read((u8 *) data, len);
180
181 DEBUG_DBG("trng_hwrng_data_read()=%zu\n", len);
182
183 return len;
184}
185
186static int trng_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
187{
188 size_t len = max <= PAGE_SIZE ? max : PAGE_SIZE;
189
190 _trng_hwrng_read((u8 *) data, len);
191
192 DEBUG_DBG("trng_hwrng_read()=%zu\n", len);
193
194 return len;
195}
196
197/*
198 * hwrng register struct
199 * The trng is suppost to have 100% entropy, and thus
200 * we register with a very high quality value.
201 */
202static struct hwrng trng_hwrng_dev = {
203 .name = "s390-trng",
204 .data_read = trng_hwrng_data_read,
205 .read = trng_hwrng_read,
206 .quality = 999,
207};
208
209
210/* init and exit */
211
212static void __init trng_debug_init(void)
213{
214 debug_info = debug_register("trng", 1, 1, 4 * sizeof(long));
215 debug_register_view(debug_info, &debug_sprintf_view);
216 debug_set_level(debug_info, 3);
217}
218
219static void trng_debug_exit(void)
220{
221 debug_unregister(debug_info);
222}
223
224static int __init trng_init(void)
225{
226 int ret;
227
228 trng_debug_init();
229
230 /* check if subfunction CPACF_PRNO_TRNG is available */
231 if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) {
232 DEBUG_INFO("trng_init CPACF_PRNO_TRNG not available\n");
233 ret = -ENODEV;
234 goto out_dbg;
235 }
236
237 ret = misc_register(&trng_dev);
238 if (ret) {
239 DEBUG_WARN("trng_init misc_register() failed rc=%d\n", ret);
240 goto out_dbg;
241 }
242
243 ret = hwrng_register(&trng_hwrng_dev);
244 if (ret) {
245 DEBUG_WARN("trng_init hwrng_register() failed rc=%d\n", ret);
246 goto out_misc;
247 }
248
249 DEBUG_DBG("trng_init successful\n");
250
251 return 0;
252
253out_misc:
254 misc_deregister(&trng_dev);
255out_dbg:
256 trng_debug_exit();
257 return ret;
258}
259
260static void __exit trng_exit(void)
261{
262 hwrng_unregister(&trng_hwrng_dev);
263 misc_deregister(&trng_dev);
264 trng_debug_exit();
265}
266
267module_cpu_feature_match(MSA, trng_init);
268module_exit(trng_exit);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 37e204f3d9be..6ee3a25ae731 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -327,6 +327,14 @@ config S390_IOMMU
327 help 327 help
328 Support for the IOMMU API for s390 PCI devices. 328 Support for the IOMMU API for s390 PCI devices.
329 329
330config S390_CCW_IOMMU
331 bool "S390 CCW IOMMU Support"
332 depends on S390 && CCW
333 select IOMMU_API
334 help
335 Enables bits of IOMMU API required by VFIO. The iommu_ops
336 is not implemented as it is not necessary for VFIO.
337
330config MTK_IOMMU 338config MTK_IOMMU
331 bool "MTK IOMMU Support" 339 bool "MTK IOMMU Support"
332 depends on ARM || ARM64 340 depends on ARM || ARM64
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 774da20ceb58..107cd3361e29 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -1052,8 +1052,9 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
1052 } else { 1052 } else {
1053 /* fatal error - set status to FAILED 1053 /* fatal error - set status to FAILED
1054 internal error 09 - Command Reject */ 1054 internal error 09 - Command Reject */
1055 dev_err(&device->cdev->dev, "An error occurred in the DASD " 1055 if (!test_bit(DASD_CQR_SUPPRESS_CR, &erp->flags))
1056 "device driver, reason=%s\n", "09"); 1056 dev_err(&device->cdev->dev,
1057 "An error occurred in the DASD device driver, reason=09\n");
1057 1058
1058 erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); 1059 erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
1059 } 1060 }
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 0b38217f8147..122456e4db89 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -4927,10 +4927,14 @@ static void dasd_eckd_dump_sense(struct dasd_device *device,
4927 dasd_eckd_dump_sense_tcw(device, req, irb); 4927 dasd_eckd_dump_sense_tcw(device, req, irb);
4928 } else { 4928 } else {
4929 /* 4929 /*
4930 * In some cases the 'No Record Found' error might be expected 4930 * In some cases the 'Command Reject' or 'No Record Found'
4931 * and log messages shouldn't be written then. Check if the 4931 * error might be expected and log messages shouldn't be
4932 * according suppress bit is set. 4932 * written then. Check if the according suppress bit is set.
4933 */ 4933 */
4934 if (sense && sense[0] & SNS0_CMD_REJECT &&
4935 test_bit(DASD_CQR_SUPPRESS_CR, &req->flags))
4936 return;
4937
4934 if (sense && sense[1] & SNS1_NO_REC_FOUND && 4938 if (sense && sense[1] & SNS1_NO_REC_FOUND &&
4935 test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) 4939 test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags))
4936 return; 4940 return;
@@ -5172,6 +5176,10 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
5172 if (!device->block && private->lcu->pav == HYPER_PAV) 5176 if (!device->block && private->lcu->pav == HYPER_PAV)
5173 return -EOPNOTSUPP; 5177 return -EOPNOTSUPP;
5174 5178
5179 /* may not be supported by the storage server */
5180 if (!(private->features.feature[14] & 0x80))
5181 return -EOPNOTSUPP;
5182
5175 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, 5183 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
5176 sizeof(struct dasd_psf_prssd_data) + 1, 5184 sizeof(struct dasd_psf_prssd_data) + 1,
5177 device); 5185 device);
@@ -5219,6 +5227,8 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
5219 5227
5220 cqr->buildclk = get_tod_clock(); 5228 cqr->buildclk = get_tod_clock();
5221 cqr->status = DASD_CQR_FILLED; 5229 cqr->status = DASD_CQR_FILLED;
5230 /* the command might not be supported, suppress error message */
5231 __set_bit(DASD_CQR_SUPPRESS_CR, &cqr->flags);
5222 rc = dasd_sleep_on_interruptible(cqr); 5232 rc = dasd_sleep_on_interruptible(cqr);
5223 if (rc == 0) { 5233 if (rc == 0) {
5224 *data = *host_access; 5234 *data = *host_access;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 518dba2732d5..dca7cb1e6f65 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -239,11 +239,11 @@ struct dasd_ccw_req {
239 */ 239 */
240/* 240/*
241 * The following flags are used to suppress output of certain errors. 241 * The following flags are used to suppress output of certain errors.
242 * These flags should only be used for format checks!
243 */ 242 */
244#define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */ 243#define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */
245#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/ 244#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/
246#define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ 245#define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */
246#define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */
247 247
248/* Signature for error recovery functions. */ 248/* Signature for error recovery functions. */
249typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *); 249typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *);
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index 3ab9aedeb84a..bdf47526038a 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
17 17
18qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o 18qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
19obj-$(CONFIG_QDIO) += qdio.o 19obj-$(CONFIG_QDIO) += qdio.o
20
21vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
22obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 1b350665c823..89216174fcbb 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -170,12 +170,14 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */
170 return ccode; 170 return ccode;
171 } 171 }
172} 172}
173EXPORT_SYMBOL_GPL(cio_start_key);
173 174
174int 175int
175cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm) 176cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm)
176{ 177{
177 return cio_start_key(sch, cpa, lpm, PAGE_DEFAULT_KEY); 178 return cio_start_key(sch, cpa, lpm, PAGE_DEFAULT_KEY);
178} 179}
180EXPORT_SYMBOL_GPL(cio_start);
179 181
180/* 182/*
181 * resume suspended I/O operation 183 * resume suspended I/O operation
@@ -208,6 +210,7 @@ cio_resume (struct subchannel *sch)
208 return -ENODEV; 210 return -ENODEV;
209 } 211 }
210} 212}
213EXPORT_SYMBOL_GPL(cio_resume);
211 214
212/* 215/*
213 * halt I/O operation 216 * halt I/O operation
@@ -241,6 +244,7 @@ cio_halt(struct subchannel *sch)
241 return -ENODEV; 244 return -ENODEV;
242 } 245 }
243} 246}
247EXPORT_SYMBOL_GPL(cio_halt);
244 248
245/* 249/*
246 * Clear I/O operation 250 * Clear I/O operation
@@ -271,6 +275,7 @@ cio_clear(struct subchannel *sch)
271 return -ENODEV; 275 return -ENODEV;
272 } 276 }
273} 277}
278EXPORT_SYMBOL_GPL(cio_clear);
274 279
275/* 280/*
276 * Function: cio_cancel 281 * Function: cio_cancel
@@ -308,7 +313,68 @@ cio_cancel (struct subchannel *sch)
308 return -ENODEV; 313 return -ENODEV;
309 } 314 }
310} 315}
316EXPORT_SYMBOL_GPL(cio_cancel);
311 317
318/**
319 * cio_cancel_halt_clear - Cancel running I/O by performing cancel, halt
320 * and clear ordinally if subchannel is valid.
321 * @sch: subchannel on which to perform the cancel_halt_clear operation
322 * @iretry: the number of the times remained to retry the next operation
323 *
324 * This should be called repeatedly since halt/clear are asynchronous
325 * operations. We do one try with cio_cancel, three tries with cio_halt,
326 * 255 tries with cio_clear. The caller should initialize @iretry with
327 * the value 255 for its first call to this, and keep using the same
328 * @iretry in the subsequent calls until it gets a non -EBUSY return.
329 *
330 * Returns 0 if device now idle, -ENODEV for device not operational,
331 * -EBUSY if an interrupt is expected (either from halt/clear or from a
332 * status pending), and -EIO if out of retries.
333 */
334int cio_cancel_halt_clear(struct subchannel *sch, int *iretry)
335{
336 int ret;
337
338 if (cio_update_schib(sch))
339 return -ENODEV;
340 if (!sch->schib.pmcw.ena)
341 /* Not operational -> done. */
342 return 0;
343 /* Stage 1: cancel io. */
344 if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
345 !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
346 if (!scsw_is_tm(&sch->schib.scsw)) {
347 ret = cio_cancel(sch);
348 if (ret != -EINVAL)
349 return ret;
350 }
351 /*
352 * Cancel io unsuccessful or not applicable (transport mode).
353 * Continue with asynchronous instructions.
354 */
355 *iretry = 3; /* 3 halt retries. */
356 }
357 /* Stage 2: halt io. */
358 if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
359 if (*iretry) {
360 *iretry -= 1;
361 ret = cio_halt(sch);
362 if (ret != -EBUSY)
363 return (ret == 0) ? -EBUSY : ret;
364 }
365 /* Halt io unsuccessful. */
366 *iretry = 255; /* 255 clear retries. */
367 }
368 /* Stage 3: clear io. */
369 if (*iretry) {
370 *iretry -= 1;
371 ret = cio_clear(sch);
372 return (ret == 0) ? -EBUSY : ret;
373 }
374 /* Function was unsuccessful */
375 return -EIO;
376}
377EXPORT_SYMBOL_GPL(cio_cancel_halt_clear);
312 378
313static void cio_apply_config(struct subchannel *sch, struct schib *schib) 379static void cio_apply_config(struct subchannel *sch, struct schib *schib)
314{ 380{
@@ -382,6 +448,7 @@ int cio_commit_config(struct subchannel *sch)
382 } 448 }
383 return ret; 449 return ret;
384} 450}
451EXPORT_SYMBOL_GPL(cio_commit_config);
385 452
386/** 453/**
387 * cio_update_schib - Perform stsch and update schib if subchannel is valid. 454 * cio_update_schib - Perform stsch and update schib if subchannel is valid.
@@ -987,6 +1054,7 @@ int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key)
987 return cio_start_handle_notoper(sch, lpm); 1054 return cio_start_handle_notoper(sch, lpm);
988 } 1055 }
989} 1056}
1057EXPORT_SYMBOL_GPL(cio_tm_start_key);
990 1058
991/** 1059/**
992 * cio_tm_intrg - perform interrogate function 1060 * cio_tm_intrg - perform interrogate function
@@ -1012,3 +1080,4 @@ int cio_tm_intrg(struct subchannel *sch)
1012 return -ENODEV; 1080 return -ENODEV;
1013 } 1081 }
1014} 1082}
1083EXPORT_SYMBOL_GPL(cio_tm_intrg);
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index f0e57aefb5f2..939596d81b73 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -123,6 +123,7 @@ extern int cio_enable_subchannel(struct subchannel *, u32);
123extern int cio_disable_subchannel (struct subchannel *); 123extern int cio_disable_subchannel (struct subchannel *);
124extern int cio_cancel (struct subchannel *); 124extern int cio_cancel (struct subchannel *);
125extern int cio_clear (struct subchannel *); 125extern int cio_clear (struct subchannel *);
126extern int cio_cancel_halt_clear(struct subchannel *, int *);
126extern int cio_resume (struct subchannel *); 127extern int cio_resume (struct subchannel *);
127extern int cio_halt (struct subchannel *); 128extern int cio_halt (struct subchannel *);
128extern int cio_start (struct subchannel *, struct ccw1 *, __u8); 129extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 9afb5ce13007..12016e32e519 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -124,14 +124,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires)
124 add_timer(&cdev->private->timer); 124 add_timer(&cdev->private->timer);
125} 125}
126 126
127/*
128 * Cancel running i/o. This is called repeatedly since halt/clear are
129 * asynchronous operations. We do one try with cio_cancel, two tries
130 * with cio_halt, 255 tries with cio_clear. If everythings fails panic.
131 * Returns 0 if device now idle, -ENODEV for device not operational and
132 * -EBUSY if an interrupt is expected (either from halt/clear or from a
133 * status pending).
134 */
135int 127int
136ccw_device_cancel_halt_clear(struct ccw_device *cdev) 128ccw_device_cancel_halt_clear(struct ccw_device *cdev)
137{ 129{
@@ -139,44 +131,14 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
139 int ret; 131 int ret;
140 132
141 sch = to_subchannel(cdev->dev.parent); 133 sch = to_subchannel(cdev->dev.parent);
142 if (cio_update_schib(sch)) 134 ret = cio_cancel_halt_clear(sch, &cdev->private->iretry);
143 return -ENODEV; 135
144 if (!sch->schib.pmcw.ena) 136 if (ret == -EIO)
145 /* Not operational -> done. */ 137 CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
146 return 0; 138 cdev->private->dev_id.ssid,
147 /* Stage 1: cancel io. */ 139 cdev->private->dev_id.devno);
148 if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) && 140
149 !(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) { 141 return ret;
150 if (!scsw_is_tm(&sch->schib.scsw)) {
151 ret = cio_cancel(sch);
152 if (ret != -EINVAL)
153 return ret;
154 }
155 /* cancel io unsuccessful or not applicable (transport mode).
156 * Continue with asynchronous instructions. */
157 cdev->private->iretry = 3; /* 3 halt retries. */
158 }
159 if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
160 /* Stage 2: halt io. */
161 if (cdev->private->iretry) {
162 cdev->private->iretry--;
163 ret = cio_halt(sch);
164 if (ret != -EBUSY)
165 return (ret == 0) ? -EBUSY : ret;
166 }
167 /* halt io unsuccessful. */
168 cdev->private->iretry = 255; /* 255 clear retries. */
169 }
170 /* Stage 3: clear io. */
171 if (cdev->private->iretry) {
172 cdev->private->iretry--;
173 ret = cio_clear (sch);
174 return (ret == 0) ? -EBUSY : ret;
175 }
176 /* Function was unsuccessful */
177 CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
178 cdev->private->dev_id.ssid, cdev->private->dev_id.devno);
179 return -EIO;
180} 142}
181 143
182void ccw_device_update_sense_data(struct ccw_device *cdev) 144void ccw_device_update_sense_data(struct ccw_device *cdev)
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
new file mode 100644
index 000000000000..ba6ac83a6c25
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -0,0 +1,842 @@
1/*
2 * channel program interfaces
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
7 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
8 */
9
10#include <linux/mm.h>
11#include <linux/slab.h>
12#include <linux/iommu.h>
13#include <linux/vfio.h>
14#include <asm/idals.h>
15
16#include "vfio_ccw_cp.h"
17
18/*
19 * Max length for ccw chain.
20 * XXX: Limit to 256, need to check more?
21 */
22#define CCWCHAIN_LEN_MAX 256
23
24struct pfn_array {
25 unsigned long pa_iova;
26 unsigned long *pa_iova_pfn;
27 unsigned long *pa_pfn;
28 int pa_nr;
29};
30
31struct pfn_array_table {
32 struct pfn_array *pat_pa;
33 int pat_nr;
34};
35
36struct ccwchain {
37 struct list_head next;
38 struct ccw1 *ch_ccw;
39 /* Guest physical address of the current chain. */
40 u64 ch_iova;
41 /* Count of the valid ccws in chain. */
42 int ch_len;
43 /* Pinned PAGEs for the original data. */
44 struct pfn_array_table *ch_pat;
45};
46
47/*
48 * pfn_array_pin() - pin user pages in memory
49 * @pa: pfn_array on which to perform the operation
50 * @mdev: the mediated device to perform pin/unpin operations
51 *
52 * Attempt to pin user pages in memory.
53 *
54 * Usage of pfn_array:
55 * @pa->pa_iova starting guest physical I/O address. Assigned by caller.
56 * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated
57 * by caller.
58 * @pa->pa_pfn array that receives PFNs of the pages pinned. Allocated by
59 * caller.
60 * @pa->pa_nr number of pages from @pa->pa_iova to pin. Assigned by
61 * caller.
62 * number of pages pinned. Assigned by callee.
63 *
64 * Returns:
65 * Number of pages pinned on success.
66 * If @pa->pa_nr is 0 or negative, returns 0.
67 * If no pages were pinned, returns -errno.
68 */
69static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
70{
71 int i, ret;
72
73 if (pa->pa_nr <= 0) {
74 pa->pa_nr = 0;
75 return 0;
76 }
77
78 pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
79 for (i = 1; i < pa->pa_nr; i++)
80 pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
81
82 ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
83 IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
84
85 if (ret > 0 && ret != pa->pa_nr) {
86 vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
87 pa->pa_nr = 0;
88 return 0;
89 }
90
91 return ret;
92}
93
94/* Unpin the pages before releasing the memory. */
95static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
96{
97 vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
98 pa->pa_nr = 0;
99 kfree(pa->pa_iova_pfn);
100}
101
102/* Alloc memory for PFNs, then pin pages with them. */
103static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
104 u64 iova, unsigned int len)
105{
106 int ret = 0;
107
108 if (!len || pa->pa_nr)
109 return -EINVAL;
110
111 pa->pa_iova = iova;
112
113 pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
114 if (!pa->pa_nr)
115 return -EINVAL;
116
117 pa->pa_iova_pfn = kcalloc(pa->pa_nr,
118 sizeof(*pa->pa_iova_pfn) +
119 sizeof(*pa->pa_pfn),
120 GFP_KERNEL);
121 if (unlikely(!pa->pa_iova_pfn))
122 return -ENOMEM;
123 pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
124
125 ret = pfn_array_pin(pa, mdev);
126
127 if (ret > 0)
128 return ret;
129 else if (!ret)
130 ret = -EINVAL;
131
132 kfree(pa->pa_iova_pfn);
133
134 return ret;
135}
136
137static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
138{
139 pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
140 if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
141 pat->pat_nr = 0;
142 return -ENOMEM;
143 }
144
145 pat->pat_nr = nr;
146
147 return 0;
148}
149
150static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
151 struct device *mdev)
152{
153 int i;
154
155 for (i = 0; i < pat->pat_nr; i++)
156 pfn_array_unpin_free(pat->pat_pa + i, mdev);
157
158 if (pat->pat_nr) {
159 kfree(pat->pat_pa);
160 pat->pat_pa = NULL;
161 pat->pat_nr = 0;
162 }
163}
164
165static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
166 unsigned long iova)
167{
168 struct pfn_array *pa = pat->pat_pa;
169 unsigned long iova_pfn = iova >> PAGE_SHIFT;
170 int i, j;
171
172 for (i = 0; i < pat->pat_nr; i++, pa++)
173 for (j = 0; j < pa->pa_nr; j++)
174 if (pa->pa_iova_pfn[i] == iova_pfn)
175 return true;
176
177 return false;
178}
179/* Create the list idal words for a pfn_array_table. */
180static inline void pfn_array_table_idal_create_words(
181 struct pfn_array_table *pat,
182 unsigned long *idaws)
183{
184 struct pfn_array *pa;
185 int i, j, k;
186
187 /*
188 * Idal words (execept the first one) rely on the memory being 4k
189 * aligned. If a user virtual address is 4K aligned, then it's
190 * corresponding kernel physical address will also be 4K aligned. Thus
191 * there will be no problem here to simply use the phys to create an
192 * idaw.
193 */
194 k = 0;
195 for (i = 0; i < pat->pat_nr; i++) {
196 pa = pat->pat_pa + i;
197 for (j = 0; j < pa->pa_nr; j++) {
198 idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
199 if (k == 0)
200 idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
201 k++;
202 }
203 }
204}
205
206
207/*
208 * Within the domain (@mdev), copy @n bytes from a guest physical
209 * address (@iova) to a host physical address (@to).
210 */
211static long copy_from_iova(struct device *mdev,
212 void *to, u64 iova,
213 unsigned long n)
214{
215 struct pfn_array pa = {0};
216 u64 from;
217 int i, ret;
218 unsigned long l, m;
219
220 ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
221 if (ret <= 0)
222 return ret;
223
224 l = n;
225 for (i = 0; i < pa.pa_nr; i++) {
226 from = pa.pa_pfn[i] << PAGE_SHIFT;
227 m = PAGE_SIZE;
228 if (i == 0) {
229 from += iova & (PAGE_SIZE - 1);
230 m -= iova & (PAGE_SIZE - 1);
231 }
232
233 m = min(l, m);
234 memcpy(to + (n - l), (void *)from, m);
235
236 l -= m;
237 if (l == 0)
238 break;
239 }
240
241 pfn_array_unpin_free(&pa, mdev);
242
243 return l;
244}
245
246static long copy_ccw_from_iova(struct channel_program *cp,
247 struct ccw1 *to, u64 iova,
248 unsigned long len)
249{
250 struct ccw0 ccw0;
251 struct ccw1 *pccw1;
252 int ret;
253 int i;
254
255 ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
256 if (ret)
257 return ret;
258
259 if (!cp->orb.cmd.fmt) {
260 pccw1 = to;
261 for (i = 0; i < len; i++) {
262 ccw0 = *(struct ccw0 *)pccw1;
263 if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
264 pccw1->cmd_code = CCW_CMD_TIC;
265 pccw1->flags = 0;
266 pccw1->count = 0;
267 } else {
268 pccw1->cmd_code = ccw0.cmd_code;
269 pccw1->flags = ccw0.flags;
270 pccw1->count = ccw0.count;
271 }
272 pccw1->cda = ccw0.cda;
273 pccw1++;
274 }
275 }
276
277 return ret;
278}
279
280/*
281 * Helpers to operate ccwchain.
282 */
283#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
284
285#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
286
287#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
288
289#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
290
291
292#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
293
294static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
295{
296 struct ccwchain *chain;
297 void *data;
298 size_t size;
299
300 /* Make ccw address aligned to 8. */
301 size = ((sizeof(*chain) + 7L) & -8L) +
302 sizeof(*chain->ch_ccw) * len +
303 sizeof(*chain->ch_pat) * len;
304 chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
305 if (!chain)
306 return NULL;
307
308 data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
309 chain->ch_ccw = (struct ccw1 *)data;
310
311 data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
312 chain->ch_pat = (struct pfn_array_table *)data;
313
314 chain->ch_len = len;
315
316 list_add_tail(&chain->next, &cp->ccwchain_list);
317
318 return chain;
319}
320
321static void ccwchain_free(struct ccwchain *chain)
322{
323 list_del(&chain->next);
324 kfree(chain);
325}
326
327/* Free resource for a ccw that allocated memory for its cda. */
328static void ccwchain_cda_free(struct ccwchain *chain, int idx)
329{
330 struct ccw1 *ccw = chain->ch_ccw + idx;
331
332 if (!ccw->count)
333 return;
334
335 kfree((void *)(u64)ccw->cda);
336}
337
338/* Unpin the pages then free the memory resources. */
339static void cp_unpin_free(struct channel_program *cp)
340{
341 struct ccwchain *chain, *temp;
342 int i;
343
344 list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
345 for (i = 0; i < chain->ch_len; i++) {
346 pfn_array_table_unpin_free(chain->ch_pat + i,
347 cp->mdev);
348 ccwchain_cda_free(chain, i);
349 }
350 ccwchain_free(chain);
351 }
352}
353
354/**
355 * ccwchain_calc_length - calculate the length of the ccw chain.
356 * @iova: guest physical address of the target ccw chain
357 * @cp: channel_program on which to perform the operation
358 *
359 * This is the chain length not considering any TICs.
360 * You need to do a new round for each TIC target.
361 *
362 * Returns: the length of the ccw chain or -errno.
363 */
364static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
365{
366 struct ccw1 *ccw, *p;
367 int cnt;
368
369 /*
370 * Copy current chain from guest to host kernel.
371 * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
372 * So copying 2K is enough (safe).
373 */
374 p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
375 if (!ccw)
376 return -ENOMEM;
377
378 cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
379 if (cnt) {
380 kfree(ccw);
381 return cnt;
382 }
383
384 cnt = 0;
385 do {
386 cnt++;
387
388 if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
389 break;
390
391 ccw++;
392 } while (cnt < CCWCHAIN_LEN_MAX + 1);
393
394 if (cnt == CCWCHAIN_LEN_MAX + 1)
395 cnt = -EINVAL;
396
397 kfree(p);
398 return cnt;
399}
400
401static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
402{
403 struct ccwchain *chain;
404 u32 ccw_head, ccw_tail;
405
406 list_for_each_entry(chain, &cp->ccwchain_list, next) {
407 ccw_head = chain->ch_iova;
408 ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1);
409
410 if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail))
411 return 1;
412 }
413
414 return 0;
415}
416
417static int ccwchain_loop_tic(struct ccwchain *chain,
418 struct channel_program *cp);
419
420static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
421{
422 struct ccwchain *chain;
423 int len, ret;
424
425 /* May transfer to an existing chain. */
426 if (tic_target_chain_exists(tic, cp))
427 return 0;
428
429 /* Get chain length. */
430 len = ccwchain_calc_length(tic->cda, cp);
431 if (len < 0)
432 return len;
433
434 /* Need alloc a new chain for this one. */
435 chain = ccwchain_alloc(cp, len);
436 if (!chain)
437 return -ENOMEM;
438 chain->ch_iova = tic->cda;
439
440 /* Copy the new chain from user. */
441 ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
442 if (ret) {
443 ccwchain_free(chain);
444 return ret;
445 }
446
447 /* Loop for tics on this new chain. */
448 return ccwchain_loop_tic(chain, cp);
449}
450
451/* Loop for TICs. */
452static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
453{
454 struct ccw1 *tic;
455 int i, ret;
456
457 for (i = 0; i < chain->ch_len; i++) {
458 tic = chain->ch_ccw + i;
459
460 if (!ccw_is_tic(tic))
461 continue;
462
463 ret = ccwchain_handle_tic(tic, cp);
464 if (ret)
465 return ret;
466 }
467
468 return 0;
469}
470
471static int ccwchain_fetch_tic(struct ccwchain *chain,
472 int idx,
473 struct channel_program *cp)
474{
475 struct ccw1 *ccw = chain->ch_ccw + idx;
476 struct ccwchain *iter;
477 u32 ccw_head, ccw_tail;
478
479 list_for_each_entry(iter, &cp->ccwchain_list, next) {
480 ccw_head = iter->ch_iova;
481 ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1);
482
483 if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) {
484 ccw->cda = (__u32) (addr_t) (iter->ch_ccw +
485 (ccw->cda - ccw_head));
486 return 0;
487 }
488 }
489
490 return -EFAULT;
491}
492
493static int ccwchain_fetch_direct(struct ccwchain *chain,
494 int idx,
495 struct channel_program *cp)
496{
497 struct ccw1 *ccw;
498 struct pfn_array_table *pat;
499 unsigned long *idaws;
500 int idaw_nr;
501
502 ccw = chain->ch_ccw + idx;
503
504 /*
505 * Pin data page(s) in memory.
506 * The number of pages actually is the count of the idaws which will be
507 * needed when translating a direct ccw to a idal ccw.
508 */
509 pat = chain->ch_pat + idx;
510 if (pfn_array_table_init(pat, 1))
511 return -ENOMEM;
512 idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev,
513 ccw->cda, ccw->count);
514 if (idaw_nr < 0)
515 return idaw_nr;
516
517 /* Translate this direct ccw to a idal ccw. */
518 idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
519 if (!idaws) {
520 pfn_array_table_unpin_free(pat, cp->mdev);
521 return -ENOMEM;
522 }
523 ccw->cda = (__u32) virt_to_phys(idaws);
524 ccw->flags |= CCW_FLAG_IDA;
525
526 pfn_array_table_idal_create_words(pat, idaws);
527
528 return 0;
529}
530
531static int ccwchain_fetch_idal(struct ccwchain *chain,
532 int idx,
533 struct channel_program *cp)
534{
535 struct ccw1 *ccw;
536 struct pfn_array_table *pat;
537 unsigned long *idaws;
538 u64 idaw_iova;
539 unsigned int idaw_nr, idaw_len;
540 int i, ret;
541
542 ccw = chain->ch_ccw + idx;
543
544 /* Calculate size of idaws. */
545 ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
546 if (ret)
547 return ret;
548 idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
549 idaw_len = idaw_nr * sizeof(*idaws);
550
551 /* Pin data page(s) in memory. */
552 pat = chain->ch_pat + idx;
553 ret = pfn_array_table_init(pat, idaw_nr);
554 if (ret)
555 return ret;
556
557 /* Translate idal ccw to use new allocated idaws. */
558 idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
559 if (!idaws) {
560 ret = -ENOMEM;
561 goto out_unpin;
562 }
563
564 ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
565 if (ret)
566 goto out_free_idaws;
567
568 ccw->cda = virt_to_phys(idaws);
569
570 for (i = 0; i < idaw_nr; i++) {
571 idaw_iova = *(idaws + i);
572 if (IS_ERR_VALUE(idaw_iova)) {
573 ret = -EFAULT;
574 goto out_free_idaws;
575 }
576
577 ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
578 idaw_iova, 1);
579 if (ret < 0)
580 goto out_free_idaws;
581 }
582
583 pfn_array_table_idal_create_words(pat, idaws);
584
585 return 0;
586
587out_free_idaws:
588 kfree(idaws);
589out_unpin:
590 pfn_array_table_unpin_free(pat, cp->mdev);
591 return ret;
592}
593
594/*
595 * Fetch one ccw.
596 * To reduce memory copy, we'll pin the cda page in memory,
597 * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
598 * direct ccws to idal ccws.
599 */
600static int ccwchain_fetch_one(struct ccwchain *chain,
601 int idx,
602 struct channel_program *cp)
603{
604 struct ccw1 *ccw = chain->ch_ccw + idx;
605
606 if (ccw_is_test(ccw) || ccw_is_noop(ccw))
607 return 0;
608
609 if (ccw_is_tic(ccw))
610 return ccwchain_fetch_tic(chain, idx, cp);
611
612 if (ccw_is_idal(ccw))
613 return ccwchain_fetch_idal(chain, idx, cp);
614
615 return ccwchain_fetch_direct(chain, idx, cp);
616}
617
618/**
619 * cp_init() - allocate ccwchains for a channel program.
620 * @cp: channel_program on which to perform the operation
621 * @mdev: the mediated device to perform pin/unpin operations
622 * @orb: control block for the channel program from the guest
623 *
624 * This creates one or more ccwchain(s), and copies the raw data of
625 * the target channel program from @orb->cmd.iova to the new ccwchain(s).
626 *
627 * Limitations:
628 * 1. Supports only prefetch enabled mode.
629 * 2. Supports idal(c64) ccw chaining.
630 * 3. Supports 4k idaw.
631 *
632 * Returns:
633 * %0 on success and a negative error value on failure.
634 */
635int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
636{
637 u64 iova = orb->cmd.cpa;
638 struct ccwchain *chain;
639 int len, ret;
640
641 /*
642 * XXX:
643 * Only support prefetch enable mode now.
644 * Only support 64bit addressing idal.
645 * Only support 4k IDAW.
646 */
647 if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k)
648 return -EOPNOTSUPP;
649
650 INIT_LIST_HEAD(&cp->ccwchain_list);
651 memcpy(&cp->orb, orb, sizeof(*orb));
652 cp->mdev = mdev;
653
654 /* Get chain length. */
655 len = ccwchain_calc_length(iova, cp);
656 if (len < 0)
657 return len;
658
659 /* Alloc mem for the head chain. */
660 chain = ccwchain_alloc(cp, len);
661 if (!chain)
662 return -ENOMEM;
663 chain->ch_iova = iova;
664
665 /* Copy the head chain from guest. */
666 ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
667 if (ret) {
668 ccwchain_free(chain);
669 return ret;
670 }
671
672 /* Now loop for its TICs. */
673 ret = ccwchain_loop_tic(chain, cp);
674 if (ret)
675 cp_unpin_free(cp);
676
677 return ret;
678}
679
680
681/**
682 * cp_free() - free resources for channel program.
683 * @cp: channel_program on which to perform the operation
684 *
685 * This unpins the memory pages and frees the memory space occupied by
686 * @cp, which must have been returned by a previous call to cp_init().
687 * Otherwise, undefined behavior occurs.
688 */
689void cp_free(struct channel_program *cp)
690{
691 cp_unpin_free(cp);
692}
693
694/**
695 * cp_prefetch() - translate a guest physical address channel program to
696 * a real-device runnable channel program.
697 * @cp: channel_program on which to perform the operation
698 *
699 * This function translates the guest-physical-address channel program
700 * and stores the result to ccwchain list. @cp must have been
701 * initialized by a previous call with cp_init(). Otherwise, undefined
702 * behavior occurs.
703 *
704 * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
705 * as helpers to do ccw chain translation inside the kernel. Basically
706 * they accept a channel program issued by a virtual machine, and
707 * translate the channel program to a real-device runnable channel
708 * program.
709 *
710 * These APIs will copy the ccws into kernel-space buffers, and update
711 * the guest phsical addresses with their corresponding host physical
712 * addresses. Then channel I/O device drivers could issue the
713 * translated channel program to real devices to perform an I/O
714 * operation.
715 *
716 * These interfaces are designed to support translation only for
717 * channel programs, which are generated and formatted by a
718 * guest. Thus this will make it possible for things like VFIO to
719 * leverage the interfaces to passthrough a channel I/O mediated
720 * device in QEMU.
721 *
722 * We support direct ccw chaining by translating them to idal ccws.
723 *
724 * Returns:
725 * %0 on success and a negative error value on failure.
726 */
727int cp_prefetch(struct channel_program *cp)
728{
729 struct ccwchain *chain;
730 int len, idx, ret;
731
732 list_for_each_entry(chain, &cp->ccwchain_list, next) {
733 len = chain->ch_len;
734 for (idx = 0; idx < len; idx++) {
735 ret = ccwchain_fetch_one(chain, idx, cp);
736 if (ret)
737 return ret;
738 }
739 }
740
741 return 0;
742}
743
744/**
745 * cp_get_orb() - get the orb of the channel program
746 * @cp: channel_program on which to perform the operation
747 * @intparm: new intparm for the returned orb
748 * @lpm: candidate value of the logical-path mask for the returned orb
749 *
750 * This function returns the address of the updated orb of the channel
751 * program. Channel I/O device drivers could use this orb to issue a
752 * ssch.
753 */
754union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
755{
756 union orb *orb;
757 struct ccwchain *chain;
758 struct ccw1 *cpa;
759
760 orb = &cp->orb;
761
762 orb->cmd.intparm = intparm;
763 orb->cmd.fmt = 1;
764 orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
765
766 if (orb->cmd.lpm == 0)
767 orb->cmd.lpm = lpm;
768
769 chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
770 cpa = chain->ch_ccw;
771 orb->cmd.cpa = (__u32) __pa(cpa);
772
773 return orb;
774}
775
776/**
777 * cp_update_scsw() - update scsw for a channel program.
778 * @cp: channel_program on which to perform the operation
779 * @scsw: I/O results of the channel program and also the target to be
780 * updated
781 *
782 * @scsw contains the I/O results of the channel program that pointed
783 * to by @cp. However what @scsw->cpa stores is a host physical
784 * address, which is meaningless for the guest, which is waiting for
785 * the I/O results.
786 *
787 * This function updates @scsw->cpa to its coressponding guest physical
788 * address.
789 */
790void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
791{
792 struct ccwchain *chain;
793 u32 cpa = scsw->cmd.cpa;
794 u32 ccw_head, ccw_tail;
795
796 /*
797 * LATER:
798 * For now, only update the cmd.cpa part. We may need to deal with
799 * other portions of the schib as well, even if we don't return them
800 * in the ioctl directly. Path status changes etc.
801 */
802 list_for_each_entry(chain, &cp->ccwchain_list, next) {
803 ccw_head = (u32)(u64)chain->ch_ccw;
804 ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1);
805
806 if ((ccw_head <= cpa) && (cpa <= ccw_tail)) {
807 /*
808 * (cpa - ccw_head) is the offset value of the host
809 * physical ccw to its chain head.
810 * Adding this value to the guest physical ccw chain
811 * head gets us the guest cpa.
812 */
813 cpa = chain->ch_iova + (cpa - ccw_head);
814 break;
815 }
816 }
817
818 scsw->cmd.cpa = cpa;
819}
820
821/**
822 * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
823 * @cmd: ccwchain command on which to perform the operation
824 * @iova: the iova to check
825 *
826 * If the @iova is currently pinned for the ccw chain, return true;
827 * else return false.
828 */
829bool cp_iova_pinned(struct channel_program *cp, u64 iova)
830{
831 struct ccwchain *chain;
832 int i;
833
834 list_for_each_entry(chain, &cp->ccwchain_list, next) {
835 for (i = 0; i < chain->ch_len; i++)
836 if (pfn_array_table_iova_pinned(chain->ch_pat + i,
837 iova))
838 return true;
839 }
840
841 return false;
842}
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
new file mode 100644
index 000000000000..7a1996b3b36d
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -0,0 +1,42 @@
1/*
2 * channel program interfaces
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
7 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
8 */
9
10#ifndef _VFIO_CCW_CP_H_
11#define _VFIO_CCW_CP_H_
12
13#include <asm/cio.h>
14#include <asm/scsw.h>
15
16#include "orb.h"
17
18/**
19 * struct channel_program - manage information for channel program
20 * @ccwchain_list: list head of ccwchains
21 * @orb: orb for the currently processed ssch request
22 * @mdev: the mediated device to perform page pinning/unpinning
23 *
24 * @ccwchain_list is the head of a ccwchain list, that contents the
25 * translated result of the guest channel program that pointed out by
26 * the iova parameter when calling cp_init.
27 */
28struct channel_program {
29 struct list_head ccwchain_list;
30 union orb orb;
31 struct device *mdev;
32};
33
34extern int cp_init(struct channel_program *cp, struct device *mdev,
35 union orb *orb);
36extern void cp_free(struct channel_program *cp);
37extern int cp_prefetch(struct channel_program *cp);
38extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm);
39extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw);
40extern bool cp_iova_pinned(struct channel_program *cp, u64 iova);
41
42#endif
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
new file mode 100644
index 000000000000..e90dd43d2a55
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -0,0 +1,308 @@
1/*
2 * VFIO based Physical Subchannel device driver
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
7 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/device.h>
13#include <linux/slab.h>
14#include <linux/uuid.h>
15#include <linux/mdev.h>
16
17#include <asm/isc.h>
18
19#include "ioasm.h"
20#include "css.h"
21#include "vfio_ccw_private.h"
22
23struct workqueue_struct *vfio_ccw_work_q;
24
25/*
26 * Helpers
27 */
28int vfio_ccw_sch_quiesce(struct subchannel *sch)
29{
30 struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
31 DECLARE_COMPLETION_ONSTACK(completion);
32 int iretry, ret = 0;
33
34 spin_lock_irq(sch->lock);
35 if (!sch->schib.pmcw.ena)
36 goto out_unlock;
37 ret = cio_disable_subchannel(sch);
38 if (ret != -EBUSY)
39 goto out_unlock;
40
41 do {
42 iretry = 255;
43
44 ret = cio_cancel_halt_clear(sch, &iretry);
45 while (ret == -EBUSY) {
46 /*
47 * Flush all I/O and wait for
48 * cancel/halt/clear completion.
49 */
50 private->completion = &completion;
51 spin_unlock_irq(sch->lock);
52
53 wait_for_completion_timeout(&completion, 3*HZ);
54
55 spin_lock_irq(sch->lock);
56 private->completion = NULL;
57 flush_workqueue(vfio_ccw_work_q);
58 ret = cio_cancel_halt_clear(sch, &iretry);
59 };
60
61 ret = cio_disable_subchannel(sch);
62 } while (ret == -EBUSY);
63out_unlock:
64 private->state = VFIO_CCW_STATE_NOT_OPER;
65 spin_unlock_irq(sch->lock);
66 return ret;
67}
68
69static void vfio_ccw_sch_io_todo(struct work_struct *work)
70{
71 struct vfio_ccw_private *private;
72 struct subchannel *sch;
73 struct irb *irb;
74
75 private = container_of(work, struct vfio_ccw_private, io_work);
76 irb = &private->irb;
77 sch = private->sch;
78
79 if (scsw_is_solicited(&irb->scsw)) {
80 cp_update_scsw(&private->cp, &irb->scsw);
81 cp_free(&private->cp);
82 }
83 memcpy(private->io_region.irb_area, irb, sizeof(*irb));
84
85 if (private->io_trigger)
86 eventfd_signal(private->io_trigger, 1);
87
88 if (private->mdev)
89 private->state = VFIO_CCW_STATE_IDLE;
90}
91
92/*
93 * Sysfs interfaces
94 */
95static ssize_t chpids_show(struct device *dev,
96 struct device_attribute *attr,
97 char *buf)
98{
99 struct subchannel *sch = to_subchannel(dev);
100 struct chsc_ssd_info *ssd = &sch->ssd_info;
101 ssize_t ret = 0;
102 int chp;
103 int mask;
104
105 for (chp = 0; chp < 8; chp++) {
106 mask = 0x80 >> chp;
107 if (ssd->path_mask & mask)
108 ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
109 else
110 ret += sprintf(buf + ret, "00 ");
111 }
112 ret += sprintf(buf+ret, "\n");
113 return ret;
114}
115
116static ssize_t pimpampom_show(struct device *dev,
117 struct device_attribute *attr,
118 char *buf)
119{
120 struct subchannel *sch = to_subchannel(dev);
121 struct pmcw *pmcw = &sch->schib.pmcw;
122
123 return sprintf(buf, "%02x %02x %02x\n",
124 pmcw->pim, pmcw->pam, pmcw->pom);
125}
126
127static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
128static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
129
130static struct attribute *vfio_subchannel_attrs[] = {
131 &dev_attr_chpids.attr,
132 &dev_attr_pimpampom.attr,
133 NULL,
134};
135
136static struct attribute_group vfio_subchannel_attr_group = {
137 .attrs = vfio_subchannel_attrs,
138};
139
140/*
141 * Css driver callbacks
142 */
143static void vfio_ccw_sch_irq(struct subchannel *sch)
144{
145 struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
146
147 inc_irq_stat(IRQIO_CIO);
148 vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
149}
150
151static int vfio_ccw_sch_probe(struct subchannel *sch)
152{
153 struct pmcw *pmcw = &sch->schib.pmcw;
154 struct vfio_ccw_private *private;
155 int ret;
156
157 if (pmcw->qf) {
158 dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
159 dev_name(&sch->dev));
160 return -ENODEV;
161 }
162
163 private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
164 if (!private)
165 return -ENOMEM;
166 private->sch = sch;
167 dev_set_drvdata(&sch->dev, private);
168
169 spin_lock_irq(sch->lock);
170 private->state = VFIO_CCW_STATE_NOT_OPER;
171 sch->isc = VFIO_CCW_ISC;
172 ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
173 spin_unlock_irq(sch->lock);
174 if (ret)
175 goto out_free;
176
177 ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
178 if (ret)
179 goto out_disable;
180
181 ret = vfio_ccw_mdev_reg(sch);
182 if (ret)
183 goto out_rm_group;
184
185 INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
186 atomic_set(&private->avail, 1);
187 private->state = VFIO_CCW_STATE_STANDBY;
188
189 return 0;
190
191out_rm_group:
192 sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
193out_disable:
194 cio_disable_subchannel(sch);
195out_free:
196 dev_set_drvdata(&sch->dev, NULL);
197 kfree(private);
198 return ret;
199}
200
201static int vfio_ccw_sch_remove(struct subchannel *sch)
202{
203 struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
204
205 vfio_ccw_sch_quiesce(sch);
206
207 vfio_ccw_mdev_unreg(sch);
208
209 sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
210
211 dev_set_drvdata(&sch->dev, NULL);
212
213 kfree(private);
214
215 return 0;
216}
217
218static void vfio_ccw_sch_shutdown(struct subchannel *sch)
219{
220 vfio_ccw_sch_quiesce(sch);
221}
222
223/**
224 * vfio_ccw_sch_event - process subchannel event
225 * @sch: subchannel
226 * @process: non-zero if function is called in process context
227 *
228 * An unspecified event occurred for this subchannel. Adjust data according
229 * to the current operational state of the subchannel. Return zero when the
230 * event has been handled sufficiently or -EAGAIN when this function should
231 * be called again in process context.
232 */
233static int vfio_ccw_sch_event(struct subchannel *sch, int process)
234{
235 struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
236 unsigned long flags;
237
238 spin_lock_irqsave(sch->lock, flags);
239 if (!device_is_registered(&sch->dev))
240 goto out_unlock;
241
242 if (work_pending(&sch->todo_work))
243 goto out_unlock;
244
245 if (cio_update_schib(sch)) {
246 vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
247 goto out_unlock;
248 }
249
250 private = dev_get_drvdata(&sch->dev);
251 if (private->state == VFIO_CCW_STATE_NOT_OPER) {
252 private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
253 VFIO_CCW_STATE_STANDBY;
254 }
255
256out_unlock:
257 spin_unlock_irqrestore(sch->lock, flags);
258
259 return 0;
260}
261
262static struct css_device_id vfio_ccw_sch_ids[] = {
263 { .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
264 { /* end of list */ },
265};
266MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids);
267
268static struct css_driver vfio_ccw_sch_driver = {
269 .drv = {
270 .name = "vfio_ccw",
271 .owner = THIS_MODULE,
272 },
273 .subchannel_type = vfio_ccw_sch_ids,
274 .irq = vfio_ccw_sch_irq,
275 .probe = vfio_ccw_sch_probe,
276 .remove = vfio_ccw_sch_remove,
277 .shutdown = vfio_ccw_sch_shutdown,
278 .sch_event = vfio_ccw_sch_event,
279};
280
281static int __init vfio_ccw_sch_init(void)
282{
283 int ret;
284
285 vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
286 if (!vfio_ccw_work_q)
287 return -ENOMEM;
288
289 isc_register(VFIO_CCW_ISC);
290 ret = css_driver_register(&vfio_ccw_sch_driver);
291 if (ret) {
292 isc_unregister(VFIO_CCW_ISC);
293 destroy_workqueue(vfio_ccw_work_q);
294 }
295
296 return ret;
297}
298
299static void __exit vfio_ccw_sch_exit(void)
300{
301 css_driver_unregister(&vfio_ccw_sch_driver);
302 isc_unregister(VFIO_CCW_ISC);
303 destroy_workqueue(vfio_ccw_work_q);
304}
305module_init(vfio_ccw_sch_init);
306module_exit(vfio_ccw_sch_exit);
307
308MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
new file mode 100644
index 000000000000..80a0559cd7ce
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -0,0 +1,203 @@
1/*
2 * Finite state machine for vfio-ccw device handling
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
7 */
8
9#include <linux/vfio.h>
10#include <linux/mdev.h>
11
12#include "ioasm.h"
13#include "vfio_ccw_private.h"
14
15static int fsm_io_helper(struct vfio_ccw_private *private)
16{
17 struct subchannel *sch;
18 union orb *orb;
19 int ccode;
20 __u8 lpm;
21 unsigned long flags;
22
23 sch = private->sch;
24
25 spin_lock_irqsave(sch->lock, flags);
26 private->state = VFIO_CCW_STATE_BUSY;
27 spin_unlock_irqrestore(sch->lock, flags);
28
29 orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
30
31 /* Issue "Start Subchannel" */
32 ccode = ssch(sch->schid, orb);
33
34 switch (ccode) {
35 case 0:
36 /*
37 * Initialize device status information
38 */
39 sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
40 return 0;
41 case 1: /* Status pending */
42 case 2: /* Busy */
43 return -EBUSY;
44 case 3: /* Device/path not operational */
45 {
46 lpm = orb->cmd.lpm;
47 if (lpm != 0)
48 sch->lpm &= ~lpm;
49 else
50 sch->lpm = 0;
51
52 if (cio_update_schib(sch))
53 return -ENODEV;
54
55 return sch->lpm ? -EACCES : -ENODEV;
56 }
57 default:
58 return ccode;
59 }
60}
61
62static void fsm_notoper(struct vfio_ccw_private *private,
63 enum vfio_ccw_event event)
64{
65 struct subchannel *sch = private->sch;
66
67 /*
68 * TODO:
69 * Probably we should send the machine check to the guest.
70 */
71 css_sched_sch_todo(sch, SCH_TODO_UNREG);
72 private->state = VFIO_CCW_STATE_NOT_OPER;
73}
74
75/*
76 * No operation action.
77 */
78static void fsm_nop(struct vfio_ccw_private *private,
79 enum vfio_ccw_event event)
80{
81}
82
83static void fsm_io_error(struct vfio_ccw_private *private,
84 enum vfio_ccw_event event)
85{
86 pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
87 private->io_region.ret_code = -EIO;
88}
89
90static void fsm_io_busy(struct vfio_ccw_private *private,
91 enum vfio_ccw_event event)
92{
93 private->io_region.ret_code = -EBUSY;
94}
95
96static void fsm_disabled_irq(struct vfio_ccw_private *private,
97 enum vfio_ccw_event event)
98{
99 struct subchannel *sch = private->sch;
100
101 /*
102 * An interrupt in a disabled state means a previous disable was not
103 * successful - should not happen, but we try to disable again.
104 */
105 cio_disable_subchannel(sch);
106}
107
108/*
109 * Deal with the ccw command request from the userspace.
110 */
111static void fsm_io_request(struct vfio_ccw_private *private,
112 enum vfio_ccw_event event)
113{
114 union orb *orb;
115 union scsw *scsw = &private->scsw;
116 struct ccw_io_region *io_region = &private->io_region;
117 struct mdev_device *mdev = private->mdev;
118
119 private->state = VFIO_CCW_STATE_BOXED;
120
121 memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
122
123 if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
124 orb = (union orb *)io_region->orb_area;
125
126 io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
127 orb);
128 if (io_region->ret_code)
129 goto err_out;
130
131 io_region->ret_code = cp_prefetch(&private->cp);
132 if (io_region->ret_code) {
133 cp_free(&private->cp);
134 goto err_out;
135 }
136
137 /* Start channel program and wait for I/O interrupt. */
138 io_region->ret_code = fsm_io_helper(private);
139 if (io_region->ret_code) {
140 cp_free(&private->cp);
141 goto err_out;
142 }
143 return;
144 } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
145 /* XXX: Handle halt. */
146 io_region->ret_code = -EOPNOTSUPP;
147 goto err_out;
148 } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
149 /* XXX: Handle clear. */
150 io_region->ret_code = -EOPNOTSUPP;
151 goto err_out;
152 }
153
154err_out:
155 private->state = VFIO_CCW_STATE_IDLE;
156}
157
158/*
159 * Got an interrupt for a normal io (state busy).
160 */
161static void fsm_irq(struct vfio_ccw_private *private,
162 enum vfio_ccw_event event)
163{
164 struct irb *irb = this_cpu_ptr(&cio_irb);
165
166 memcpy(&private->irb, irb, sizeof(*irb));
167
168 queue_work(vfio_ccw_work_q, &private->io_work);
169
170 if (private->completion)
171 complete(private->completion);
172}
173
174/*
175 * Device statemachine
176 */
177fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
178 [VFIO_CCW_STATE_NOT_OPER] = {
179 [VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
180 [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
181 [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
182 },
183 [VFIO_CCW_STATE_STANDBY] = {
184 [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
185 [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
186 [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
187 },
188 [VFIO_CCW_STATE_IDLE] = {
189 [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
190 [VFIO_CCW_EVENT_IO_REQ] = fsm_io_request,
191 [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
192 },
193 [VFIO_CCW_STATE_BOXED] = {
194 [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
195 [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
196 [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
197 },
198 [VFIO_CCW_STATE_BUSY] = {
199 [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
200 [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
201 [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
202 },
203};
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
new file mode 100644
index 000000000000..e72abbc18ee3
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -0,0 +1,425 @@
1/*
2 * Physical device callbacks for vfio_ccw
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
7 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
8 */
9
10#include <linux/vfio.h>
11#include <linux/mdev.h>
12
13#include "vfio_ccw_private.h"
14
15static int vfio_ccw_mdev_reset(struct mdev_device *mdev)
16{
17 struct vfio_ccw_private *private;
18 struct subchannel *sch;
19 int ret;
20
21 private = dev_get_drvdata(mdev_parent_dev(mdev));
22 sch = private->sch;
23 /*
24 * TODO:
25 * In the cureent stage, some things like "no I/O running" and "no
26 * interrupt pending" are clear, but we are not sure what other state
27 * we need to care about.
28 * There are still a lot more instructions need to be handled. We
29 * should come back here later.
30 */
31 ret = vfio_ccw_sch_quiesce(sch);
32 if (ret)
33 return ret;
34
35 ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
36 if (!ret)
37 private->state = VFIO_CCW_STATE_IDLE;
38
39 return ret;
40}
41
42static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
43 unsigned long action,
44 void *data)
45{
46 struct vfio_ccw_private *private =
47 container_of(nb, struct vfio_ccw_private, nb);
48
49 /*
50 * Vendor drivers MUST unpin pages in response to an
51 * invalidation.
52 */
53 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
54 struct vfio_iommu_type1_dma_unmap *unmap = data;
55
56 if (!cp_iova_pinned(&private->cp, unmap->iova))
57 return NOTIFY_OK;
58
59 if (vfio_ccw_mdev_reset(private->mdev))
60 return NOTIFY_BAD;
61
62 cp_free(&private->cp);
63 return NOTIFY_OK;
64 }
65
66 return NOTIFY_DONE;
67}
68
69static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
70{
71 return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
72}
73MDEV_TYPE_ATTR_RO(name);
74
75static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
76 char *buf)
77{
78 return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
79}
80MDEV_TYPE_ATTR_RO(device_api);
81
82static ssize_t available_instances_show(struct kobject *kobj,
83 struct device *dev, char *buf)
84{
85 struct vfio_ccw_private *private = dev_get_drvdata(dev);
86
87 return sprintf(buf, "%d\n", atomic_read(&private->avail));
88}
89MDEV_TYPE_ATTR_RO(available_instances);
90
91static struct attribute *mdev_types_attrs[] = {
92 &mdev_type_attr_name.attr,
93 &mdev_type_attr_device_api.attr,
94 &mdev_type_attr_available_instances.attr,
95 NULL,
96};
97
98static struct attribute_group mdev_type_group = {
99 .name = "io",
100 .attrs = mdev_types_attrs,
101};
102
103struct attribute_group *mdev_type_groups[] = {
104 &mdev_type_group,
105 NULL,
106};
107
108static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
109{
110 struct vfio_ccw_private *private =
111 dev_get_drvdata(mdev_parent_dev(mdev));
112
113 if (private->state == VFIO_CCW_STATE_NOT_OPER)
114 return -ENODEV;
115
116 if (atomic_dec_if_positive(&private->avail) < 0)
117 return -EPERM;
118
119 private->mdev = mdev;
120 private->state = VFIO_CCW_STATE_IDLE;
121
122 return 0;
123}
124
125static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
126{
127 struct vfio_ccw_private *private =
128 dev_get_drvdata(mdev_parent_dev(mdev));
129
130 if ((private->state != VFIO_CCW_STATE_NOT_OPER) &&
131 (private->state != VFIO_CCW_STATE_STANDBY)) {
132 if (!vfio_ccw_mdev_reset(mdev))
133 private->state = VFIO_CCW_STATE_STANDBY;
134 /* The state will be NOT_OPER on error. */
135 }
136
137 private->mdev = NULL;
138 atomic_inc(&private->avail);
139
140 return 0;
141}
142
143static int vfio_ccw_mdev_open(struct mdev_device *mdev)
144{
145 struct vfio_ccw_private *private =
146 dev_get_drvdata(mdev_parent_dev(mdev));
147 unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
148
149 private->nb.notifier_call = vfio_ccw_mdev_notifier;
150
151 return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
152 &events, &private->nb);
153}
154
155void vfio_ccw_mdev_release(struct mdev_device *mdev)
156{
157 struct vfio_ccw_private *private =
158 dev_get_drvdata(mdev_parent_dev(mdev));
159
160 vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
161 &private->nb);
162}
163
164static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
165 char __user *buf,
166 size_t count,
167 loff_t *ppos)
168{
169 struct vfio_ccw_private *private;
170 struct ccw_io_region *region;
171
172 if (*ppos + count > sizeof(*region))
173 return -EINVAL;
174
175 private = dev_get_drvdata(mdev_parent_dev(mdev));
176 region = &private->io_region;
177 if (copy_to_user(buf, (void *)region + *ppos, count))
178 return -EFAULT;
179
180 return count;
181}
182
183static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
184 const char __user *buf,
185 size_t count,
186 loff_t *ppos)
187{
188 struct vfio_ccw_private *private;
189 struct ccw_io_region *region;
190
191 if (*ppos + count > sizeof(*region))
192 return -EINVAL;
193
194 private = dev_get_drvdata(mdev_parent_dev(mdev));
195 if (private->state != VFIO_CCW_STATE_IDLE)
196 return -EACCES;
197
198 region = &private->io_region;
199 if (copy_from_user((void *)region + *ppos, buf, count))
200 return -EFAULT;
201
202 vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
203 if (region->ret_code != 0) {
204 private->state = VFIO_CCW_STATE_IDLE;
205 return region->ret_code;
206 }
207
208 return count;
209}
210
211static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
212{
213 info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET;
214 info->num_regions = VFIO_CCW_NUM_REGIONS;
215 info->num_irqs = VFIO_CCW_NUM_IRQS;
216
217 return 0;
218}
219
220static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
221 u16 *cap_type_id,
222 void **cap_type)
223{
224 switch (info->index) {
225 case VFIO_CCW_CONFIG_REGION_INDEX:
226 info->offset = 0;
227 info->size = sizeof(struct ccw_io_region);
228 info->flags = VFIO_REGION_INFO_FLAG_READ
229 | VFIO_REGION_INFO_FLAG_WRITE;
230 return 0;
231 default:
232 return -EINVAL;
233 }
234}
235
236int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
237{
238 if (info->index != VFIO_CCW_IO_IRQ_INDEX)
239 return -EINVAL;
240
241 info->count = 1;
242 info->flags = VFIO_IRQ_INFO_EVENTFD;
243
244 return 0;
245}
246
247static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
248 uint32_t flags,
249 void __user *data)
250{
251 struct vfio_ccw_private *private;
252 struct eventfd_ctx **ctx;
253
254 if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER))
255 return -EINVAL;
256
257 private = dev_get_drvdata(mdev_parent_dev(mdev));
258 ctx = &private->io_trigger;
259
260 switch (flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
261 case VFIO_IRQ_SET_DATA_NONE:
262 {
263 if (*ctx)
264 eventfd_signal(*ctx, 1);
265 return 0;
266 }
267 case VFIO_IRQ_SET_DATA_BOOL:
268 {
269 uint8_t trigger;
270
271 if (get_user(trigger, (uint8_t __user *)data))
272 return -EFAULT;
273
274 if (trigger && *ctx)
275 eventfd_signal(*ctx, 1);
276 return 0;
277 }
278 case VFIO_IRQ_SET_DATA_EVENTFD:
279 {
280 int32_t fd;
281
282 if (get_user(fd, (int32_t __user *)data))
283 return -EFAULT;
284
285 if (fd == -1) {
286 if (*ctx)
287 eventfd_ctx_put(*ctx);
288 *ctx = NULL;
289 } else if (fd >= 0) {
290 struct eventfd_ctx *efdctx;
291
292 efdctx = eventfd_ctx_fdget(fd);
293 if (IS_ERR(efdctx))
294 return PTR_ERR(efdctx);
295
296 if (*ctx)
297 eventfd_ctx_put(*ctx);
298
299 *ctx = efdctx;
300 } else
301 return -EINVAL;
302
303 return 0;
304 }
305 default:
306 return -EINVAL;
307 }
308}
309
310static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
311 unsigned int cmd,
312 unsigned long arg)
313{
314 int ret = 0;
315 unsigned long minsz;
316
317 switch (cmd) {
318 case VFIO_DEVICE_GET_INFO:
319 {
320 struct vfio_device_info info;
321
322 minsz = offsetofend(struct vfio_device_info, num_irqs);
323
324 if (copy_from_user(&info, (void __user *)arg, minsz))
325 return -EFAULT;
326
327 if (info.argsz < minsz)
328 return -EINVAL;
329
330 ret = vfio_ccw_mdev_get_device_info(&info);
331 if (ret)
332 return ret;
333
334 return copy_to_user((void __user *)arg, &info, minsz);
335 }
336 case VFIO_DEVICE_GET_REGION_INFO:
337 {
338 struct vfio_region_info info;
339 u16 cap_type_id = 0;
340 void *cap_type = NULL;
341
342 minsz = offsetofend(struct vfio_region_info, offset);
343
344 if (copy_from_user(&info, (void __user *)arg, minsz))
345 return -EFAULT;
346
347 if (info.argsz < minsz)
348 return -EINVAL;
349
350 ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id,
351 &cap_type);
352 if (ret)
353 return ret;
354
355 return copy_to_user((void __user *)arg, &info, minsz);
356 }
357 case VFIO_DEVICE_GET_IRQ_INFO:
358 {
359 struct vfio_irq_info info;
360
361 minsz = offsetofend(struct vfio_irq_info, count);
362
363 if (copy_from_user(&info, (void __user *)arg, minsz))
364 return -EFAULT;
365
366 if (info.argsz < minsz || info.index >= VFIO_CCW_NUM_IRQS)
367 return -EINVAL;
368
369 ret = vfio_ccw_mdev_get_irq_info(&info);
370 if (ret)
371 return ret;
372
373 if (info.count == -1)
374 return -EINVAL;
375
376 return copy_to_user((void __user *)arg, &info, minsz);
377 }
378 case VFIO_DEVICE_SET_IRQS:
379 {
380 struct vfio_irq_set hdr;
381 size_t data_size;
382 void __user *data;
383
384 minsz = offsetofend(struct vfio_irq_set, count);
385
386 if (copy_from_user(&hdr, (void __user *)arg, minsz))
387 return -EFAULT;
388
389 ret = vfio_set_irqs_validate_and_prepare(&hdr, 1,
390 VFIO_CCW_NUM_IRQS,
391 &data_size);
392 if (ret)
393 return ret;
394
395 data = (void __user *)(arg + minsz);
396 return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, data);
397 }
398 case VFIO_DEVICE_RESET:
399 return vfio_ccw_mdev_reset(mdev);
400 default:
401 return -ENOTTY;
402 }
403}
404
405static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
406 .owner = THIS_MODULE,
407 .supported_type_groups = mdev_type_groups,
408 .create = vfio_ccw_mdev_create,
409 .remove = vfio_ccw_mdev_remove,
410 .open = vfio_ccw_mdev_open,
411 .release = vfio_ccw_mdev_release,
412 .read = vfio_ccw_mdev_read,
413 .write = vfio_ccw_mdev_write,
414 .ioctl = vfio_ccw_mdev_ioctl,
415};
416
417int vfio_ccw_mdev_reg(struct subchannel *sch)
418{
419 return mdev_register_device(&sch->dev, &vfio_ccw_mdev_ops);
420}
421
422void vfio_ccw_mdev_unreg(struct subchannel *sch)
423{
424 mdev_unregister_device(&sch->dev);
425}
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
new file mode 100644
index 000000000000..fc0f01c16ef9
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -0,0 +1,96 @@
1/*
2 * Private stuff for vfio_ccw driver
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
7 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
8 */
9
10#ifndef _VFIO_CCW_PRIVATE_H_
11#define _VFIO_CCW_PRIVATE_H_
12
13#include <linux/completion.h>
14#include <linux/eventfd.h>
15#include <linux/workqueue.h>
16#include <linux/vfio_ccw.h>
17
18#include "css.h"
19#include "vfio_ccw_cp.h"
20
21/**
22 * struct vfio_ccw_private
23 * @sch: pointer to the subchannel
24 * @state: internal state of the device
25 * @completion: synchronization helper of the I/O completion
26 * @avail: available for creating a mediated device
27 * @mdev: pointer to the mediated device
28 * @nb: notifier for vfio events
29 * @io_region: MMIO region to input/output I/O arguments/results
30 * @cp: channel program for the current I/O operation
31 * @irb: irb info received from interrupt
32 * @scsw: scsw info
33 * @io_trigger: eventfd ctx for signaling userspace I/O results
34 * @io_work: work for deferral process of I/O handling
35 */
36struct vfio_ccw_private {
37 struct subchannel *sch;
38 int state;
39 struct completion *completion;
40 atomic_t avail;
41 struct mdev_device *mdev;
42 struct notifier_block nb;
43 struct ccw_io_region io_region;
44
45 struct channel_program cp;
46 struct irb irb;
47 union scsw scsw;
48
49 struct eventfd_ctx *io_trigger;
50 struct work_struct io_work;
51} __aligned(8);
52
53extern int vfio_ccw_mdev_reg(struct subchannel *sch);
54extern void vfio_ccw_mdev_unreg(struct subchannel *sch);
55
56extern int vfio_ccw_sch_quiesce(struct subchannel *sch);
57
58/*
59 * States of the device statemachine.
60 */
61enum vfio_ccw_state {
62 VFIO_CCW_STATE_NOT_OPER,
63 VFIO_CCW_STATE_STANDBY,
64 VFIO_CCW_STATE_IDLE,
65 VFIO_CCW_STATE_BOXED,
66 VFIO_CCW_STATE_BUSY,
67 /* last element! */
68 NR_VFIO_CCW_STATES
69};
70
71/*
72 * Asynchronous events of the device statemachine.
73 */
74enum vfio_ccw_event {
75 VFIO_CCW_EVENT_NOT_OPER,
76 VFIO_CCW_EVENT_IO_REQ,
77 VFIO_CCW_EVENT_INTERRUPT,
78 /* last element! */
79 NR_VFIO_CCW_EVENTS
80};
81
82/*
83 * Action called through jumptable.
84 */
85typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
86extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
87
88static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
89 int event)
90{
91 vfio_ccw_jumptable[private->state][event](private, event);
92}
93
94extern struct workqueue_struct *vfio_ccw_work_q;
95
96#endif
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 058db724b5a2..ea86da8c75f9 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -80,7 +80,7 @@ struct secaeskeytoken {
80 * token. If keybitsize is given, the bitsize of the key is 80 * token. If keybitsize is given, the bitsize of the key is
81 * also checked. Returns 0 on success or errno value on failure. 81 * also checked. Returns 0 on success or errno value on failure.
82 */ 82 */
83static int check_secaeskeytoken(u8 *token, int keybitsize) 83static int check_secaeskeytoken(const u8 *token, int keybitsize)
84{ 84{
85 struct secaeskeytoken *t = (struct secaeskeytoken *) token; 85 struct secaeskeytoken *t = (struct secaeskeytoken *) token;
86 86
@@ -1004,6 +1004,53 @@ int pkey_skey2pkey(const struct pkey_seckey *seckey,
1004EXPORT_SYMBOL(pkey_skey2pkey); 1004EXPORT_SYMBOL(pkey_skey2pkey);
1005 1005
1006/* 1006/*
1007 * Verify key and give back some info about the key.
1008 */
1009int pkey_verifykey(const struct pkey_seckey *seckey,
1010 u16 *pcardnr, u16 *pdomain,
1011 u16 *pkeysize, u32 *pattributes)
1012{
1013 struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
1014 u16 cardnr, domain;
1015 u64 mkvp[2];
1016 int rc;
1017
1018 /* check the secure key for valid AES secure key */
1019 rc = check_secaeskeytoken((u8 *) seckey, 0);
1020 if (rc)
1021 goto out;
1022 if (pattributes)
1023 *pattributes = PKEY_VERIFY_ATTR_AES;
1024 if (pkeysize)
1025 *pkeysize = t->bitsize;
1026
1027 /* try to find a card which can handle this key */
1028 rc = pkey_findcard(seckey, &cardnr, &domain, 1);
1029 if (rc)
1030 goto out;
1031
1032 /* check mkvp for old mkvp match */
1033 rc = mkvp_cache_fetch(cardnr, domain, mkvp);
1034 if (rc)
1035 goto out;
1036 if (t->mkvp == mkvp[1]) {
1037 DEBUG_DBG("pkey_verifykey secure key has old mkvp\n");
1038 if (pattributes)
1039 *pattributes |= PKEY_VERIFY_ATTR_OLD_MKVP;
1040 }
1041
1042 if (pcardnr)
1043 *pcardnr = cardnr;
1044 if (pdomain)
1045 *pdomain = domain;
1046
1047out:
1048 DEBUG_DBG("pkey_verifykey rc=%d\n", rc);
1049 return rc;
1050}
1051EXPORT_SYMBOL(pkey_verifykey);
1052
1053/*
1007 * File io functions 1054 * File io functions
1008 */ 1055 */
1009 1056
@@ -1104,6 +1151,21 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
1104 return -EFAULT; 1151 return -EFAULT;
1105 break; 1152 break;
1106 } 1153 }
1154 case PKEY_VERIFYKEY: {
1155 struct pkey_verifykey __user *uvk = (void __user *) arg;
1156 struct pkey_verifykey kvk;
1157
1158 if (copy_from_user(&kvk, uvk, sizeof(kvk)))
1159 return -EFAULT;
1160 rc = pkey_verifykey(&kvk.seckey, &kvk.cardnr, &kvk.domain,
1161 &kvk.keysize, &kvk.attributes);
1162 DEBUG_DBG("pkey_ioctl pkey_verifykey()=%d\n", rc);
1163 if (rc)
1164 break;
1165 if (copy_to_user(uvk, &kvk, sizeof(kvk)))
1166 return -EFAULT;
1167 break;
1168 }
1107 default: 1169 default:
1108 /* unknown/unsupported ioctl cmd */ 1170 /* unknown/unsupported ioctl cmd */
1109 return -ENOTTY; 1171 return -ENOTTY;
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b59ee077a596..176b6cb1008d 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -409,6 +409,8 @@ typedef struct elf64_shdr {
409#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ 409#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
410#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ 410#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
411#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ 411#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
412#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
413#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
412#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ 414#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
413#define NT_ARM_TLS 0x401 /* ARM TLS register */ 415#define NT_ARM_TLS 0x401 /* ARM TLS register */
414#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ 416#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 519eff362c1c..ae461050661a 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -198,6 +198,7 @@ struct vfio_device_info {
198#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ 198#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
199#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ 199#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */
200#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ 200#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
201#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */
201 __u32 num_regions; /* Max region index + 1 */ 202 __u32 num_regions; /* Max region index + 1 */
202 __u32 num_irqs; /* Max IRQ index + 1 */ 203 __u32 num_irqs; /* Max IRQ index + 1 */
203}; 204};
@@ -212,6 +213,7 @@ struct vfio_device_info {
212#define VFIO_DEVICE_API_PCI_STRING "vfio-pci" 213#define VFIO_DEVICE_API_PCI_STRING "vfio-pci"
213#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" 214#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform"
214#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" 215#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba"
216#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw"
215 217
216/** 218/**
217 * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, 219 * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
@@ -446,6 +448,22 @@ enum {
446 VFIO_PCI_NUM_IRQS 448 VFIO_PCI_NUM_IRQS
447}; 449};
448 450
451/*
452 * The vfio-ccw bus driver makes use of the following fixed region and
453 * IRQ index mapping. Unimplemented regions return a size of zero.
454 * Unimplemented IRQ types return a count of zero.
455 */
456
457enum {
458 VFIO_CCW_CONFIG_REGION_INDEX,
459 VFIO_CCW_NUM_REGIONS
460};
461
462enum {
463 VFIO_CCW_IO_IRQ_INDEX,
464 VFIO_CCW_NUM_IRQS
465};
466
449/** 467/**
450 * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, 468 * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
451 * struct vfio_pci_hot_reset_info) 469 * struct vfio_pci_hot_reset_info)
diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
new file mode 100644
index 000000000000..34a7f6f9e065
--- /dev/null
+++ b/include/uapi/linux/vfio_ccw.h
@@ -0,0 +1,24 @@
1/*
2 * Interfaces for vfio-ccw
3 *
4 * Copyright IBM Corp. 2017
5 *
6 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
7 */
8
9#ifndef _VFIO_CCW_H_
10#define _VFIO_CCW_H_
11
12#include <linux/types.h>
13
14struct ccw_io_region {
15#define ORB_AREA_SIZE 12
16 __u8 orb_area[ORB_AREA_SIZE];
17#define SCSW_AREA_SIZE 12
18 __u8 scsw_area[SCSW_AREA_SIZE];
19#define IRB_AREA_SIZE 96
20 __u8 irb_area[IRB_AREA_SIZE];
21 __u32 ret_code;
22} __packed;
23
24#endif