aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 12:45:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-05 12:45:46 -0400
commit9e85ae6af6e907975f68d82ff127073ec024cb05 (patch)
tree3d3349b03da858e53ef8f8dce467e4a691eabf88
parent6caffe21ddeaae4a9d18d46eed2445a8d269a1fe (diff)
parentfa41ba0d08de7c975c3e94d0067553f9b934221f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Martin Schwidefsky: "The first part of the s390 updates for 4.14: - Add machine type 0x3906 for IBM z14 - Add IBM z14 TLB flushing improvements for KVM guests - Exploit the TOD clock epoch extension to provide a continuous TOD clock afer 2042/09/17 - Add NIAI spinlock hints for IBM z14 - Rework the vmcp driver and use CMA for the respone buffer of z/VM CP commands - Drop some s390 specific asm headers and use the generic version - Add block discard for DASD-FBA devices under z/VM - Add average request times to DASD statistics - A few of those constify patches which seem to be in vogue right now - Cleanup and bug fixes" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (50 commits) s390/mm: avoid empty zero pages for KVM guests to avoid postcopy hangs s390/dasd: Add discard support for FBA devices s390/zcrypt: make CPRBX const s390/uaccess: avoid mvcos jump label s390/mm: use generic mm_hooks s390/facilities: fix typo s390/vmcp: simplify vmcp_response_free() s390/topology: Remove the unused parent_node() macro s390/dasd: Change unsigned long long to unsigned long s390/smp: convert cpuhp_setup_state() return code to zero on success s390: fix 'novx' early parameter handling s390/dasd: add average request times to dasd statistics s390/scm: use common completion path s390/pci: log changes to uid checking s390/vmcp: simplify vmcp_ioctl() s390/vmcp: return -ENOTTY for unknown ioctl commands s390/vmcp: split vmcp header file and move to uapi s390/vmcp: make use of contiguous memory allocator s390/cpcmd,vmcp: avoid GFP_DMA allocations s390/vmcp: fix uaccess check and avoid undefined behavior ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--arch/s390/Kconfig18
-rw-r--r--arch/s390/Makefile6
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/cpcmd.h7
-rw-r--r--arch/s390/include/asm/ebcdic.h4
-rw-r--r--arch/s390/include/asm/elf.h2
-rw-r--r--arch/s390/include/asm/ipl.h2
-rw-r--r--arch/s390/include/asm/lowcore.h48
-rw-r--r--arch/s390/include/asm/mman.h11
-rw-r--r--arch/s390/include/asm/mmu_context.h33
-rw-r--r--arch/s390/include/asm/nmi.h2
-rw-r--r--arch/s390/include/asm/page-states.h1
-rw-r--r--arch/s390/include/asm/page.h37
-rw-r--r--arch/s390/include/asm/pgalloc.h18
-rw-r--r--arch/s390/include/asm/pgtable.h197
-rw-r--r--arch/s390/include/asm/qdio.h2
-rw-r--r--arch/s390/include/asm/setup.h17
-rw-r--r--arch/s390/include/asm/spinlock.h9
-rw-r--r--arch/s390/include/asm/timex.h40
-rw-r--r--arch/s390/include/asm/tlb.h6
-rw-r--r--arch/s390/include/asm/tlbflush.h7
-rw-r--r--arch/s390/include/asm/topology.h6
-rw-r--r--arch/s390/include/asm/types.h11
-rw-r--r--arch/s390/include/asm/unaligned.h13
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/dasd.h6
-rw-r--r--arch/s390/include/uapi/asm/swab.h89
-rw-r--r--arch/s390/include/uapi/asm/vmcp.h (renamed from drivers/s390/char/vmcp.h)20
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/cpcmd.c13
-rw-r--r--arch/s390/kernel/debug.c9
-rw-r--r--arch/s390/kernel/dumpstack.c2
-rw-r--r--arch/s390/kernel/early.c17
-rw-r--r--arch/s390/kernel/head.S3
-rw-r--r--arch/s390/kernel/head64.S4
-rw-r--r--arch/s390/kernel/irq.c3
-rw-r--r--arch/s390/kernel/relocate_kernel.S5
-rw-r--r--arch/s390/kernel/setup.c14
-rw-r--r--arch/s390/kernel/smp.c1
-rw-r--r--arch/s390/kernel/suspend.c24
-rw-r--r--arch/s390/kernel/time.c67
-rw-r--r--arch/s390/kernel/vdso.c2
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S4
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S4
-rw-r--r--arch/s390/kvm/diag.c8
-rw-r--r--arch/s390/kvm/gaccess.c35
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/kvm/vsie.c2
-rw-r--r--arch/s390/lib/delay.c2
-rw-r--r--arch/s390/lib/spinlock.c87
-rw-r--r--arch/s390/lib/uaccess.c38
-rw-r--r--arch/s390/mm/fault.c10
-rw-r--r--arch/s390/mm/gmap.c163
-rw-r--r--arch/s390/mm/init.c60
-rw-r--r--arch/s390/mm/page-states.c192
-rw-r--r--arch/s390/mm/pageattr.c5
-rw-r--r--arch/s390/mm/pgalloc.c12
-rw-r--r--arch/s390/mm/pgtable.c154
-rw-r--r--arch/s390/mm/vmem.c47
-rw-r--r--arch/s390/pci/pci_clp.c10
-rw-r--r--arch/s390/tools/gen_facilities.c5
-rw-r--r--drivers/s390/block/dasd.c55
-rw-r--r--drivers/s390/block/dasd_3990_erp.c2
-rw-r--r--drivers/s390/block/dasd_devmap.c3
-rw-r--r--drivers/s390/block/dasd_diag.c2
-rw-r--r--drivers/s390/block/dasd_eckd.c8
-rw-r--r--drivers/s390/block/dasd_eckd.h2
-rw-r--r--drivers/s390/block/dasd_erp.c2
-rw-r--r--drivers/s390/block/dasd_fba.c202
-rw-r--r--drivers/s390/block/dasd_int.h19
-rw-r--r--drivers/s390/block/dasd_proc.c2
-rw-r--r--drivers/s390/block/scm_blk.c13
-rw-r--r--drivers/s390/char/Kconfig11
-rw-r--r--drivers/s390/char/raw3270.c2
-rw-r--r--drivers/s390/char/sclp_cmd.c1
-rw-r--r--drivers/s390/char/sclp_config.c2
-rw-r--r--drivers/s390/char/sclp_early.c6
-rw-r--r--drivers/s390/char/sclp_ocf.c2
-rw-r--r--drivers/s390/char/tape_core.c2
-rw-r--r--drivers/s390/char/vmcp.c112
-rw-r--r--drivers/s390/cio/chp.c4
-rw-r--r--drivers/s390/cio/device.c4
-rw-r--r--drivers/s390/crypto/zcrypt_card.c2
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c2
-rw-r--r--drivers/s390/crypto/zcrypt_queue.c2
-rw-r--r--drivers/s390/net/qeth_l3_sys.c8
87 files changed, 1405 insertions, 694 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 591d48f3a7de..6996b7727b85 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4395,6 +4395,10 @@
4395 decrease the size and leave more room for directly 4395 decrease the size and leave more room for directly
4396 mapped kernel RAM. 4396 mapped kernel RAM.
4397 4397
4398 vmcp_cma=nn[MG] [KNL,S390]
4399 Sets the memory size reserved for contiguous memory
4400 allocations for the vmcp device driver.
4401
4398 vmhalt= [KNL,S390] Perform z/VM CP command after system halt. 4402 vmhalt= [KNL,S390] Perform z/VM CP command after system halt.
4399 Format: <command> 4403 Format: <command>
4400 4404
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7eeb75d758c1..48af970320cb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -222,6 +222,10 @@ config HAVE_MARCH_Z13_FEATURES
222 def_bool n 222 def_bool n
223 select HAVE_MARCH_ZEC12_FEATURES 223 select HAVE_MARCH_ZEC12_FEATURES
224 224
225config HAVE_MARCH_Z14_FEATURES
226 def_bool n
227 select HAVE_MARCH_Z13_FEATURES
228
225choice 229choice
226 prompt "Processor type" 230 prompt "Processor type"
227 default MARCH_Z196 231 default MARCH_Z196
@@ -282,6 +286,14 @@ config MARCH_Z13
282 2964 series). The kernel will be slightly faster but will not work on 286 2964 series). The kernel will be slightly faster but will not work on
283 older machines. 287 older machines.
284 288
289config MARCH_Z14
290 bool "IBM z14"
291 select HAVE_MARCH_Z14_FEATURES
292 help
293 Select this to enable optimizations for IBM z14 (3906 series).
294 The kernel will be slightly faster but will not work on older
295 machines.
296
285endchoice 297endchoice
286 298
287config MARCH_Z900_TUNE 299config MARCH_Z900_TUNE
@@ -305,6 +317,9 @@ config MARCH_ZEC12_TUNE
305config MARCH_Z13_TUNE 317config MARCH_Z13_TUNE
306 def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT 318 def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT
307 319
320config MARCH_Z14_TUNE
321 def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT
322
308choice 323choice
309 prompt "Tune code generation" 324 prompt "Tune code generation"
310 default TUNE_DEFAULT 325 default TUNE_DEFAULT
@@ -343,6 +358,9 @@ config TUNE_ZEC12
343config TUNE_Z13 358config TUNE_Z13
344 bool "IBM z13" 359 bool "IBM z13"
345 360
361config TUNE_Z14
362 bool "IBM z14"
363
346endchoice 364endchoice
347 365
348config 64BIT 366config 64BIT
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 54e00526b8df..dac821cfcd43 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -31,7 +31,8 @@ mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
31mflags-$(CONFIG_MARCH_Z10) := -march=z10 31mflags-$(CONFIG_MARCH_Z10) := -march=z10
32mflags-$(CONFIG_MARCH_Z196) := -march=z196 32mflags-$(CONFIG_MARCH_Z196) := -march=z196
33mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 33mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
34mflags-$(CONFIG_MARCH_Z13) := -march=z13 34mflags-$(CONFIG_MARCH_Z13) := -march=z13
35mflags-$(CONFIG_MARCH_Z14) := -march=z14
35 36
36export CC_FLAGS_MARCH := $(mflags-y) 37export CC_FLAGS_MARCH := $(mflags-y)
37 38
@@ -44,7 +45,8 @@ cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109
44cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 45cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10
45cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 46cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196
46cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 47cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12
47cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 48cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
49cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
48 50
49cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include 51cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
50 52
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index b3c88479feba..6e2c9f7e47fa 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -16,4 +16,5 @@ generic-y += mcs_spinlock.h
16generic-y += mm-arch-hooks.h 16generic-y += mm-arch-hooks.h
17generic-y += preempt.h 17generic-y += preempt.h
18generic-y += trace_clock.h 18generic-y += trace_clock.h
19generic-y += unaligned.h
19generic-y += word-at-a-time.h 20generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
index 3dfadb5d648f..ca2b0624ad46 100644
--- a/arch/s390/include/asm/cpcmd.h
+++ b/arch/s390/include/asm/cpcmd.h
@@ -10,9 +10,8 @@
10 10
11/* 11/*
12 * the lowlevel function for cpcmd 12 * the lowlevel function for cpcmd
13 * the caller of __cpcmd has to ensure that the response buffer is below 2 GB
14 */ 13 */
15extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); 14int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
16 15
17/* 16/*
18 * cpcmd is the in-kernel interface for issuing CP commands 17 * cpcmd is the in-kernel interface for issuing CP commands
@@ -25,8 +24,8 @@ extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code
25 * response_code: return pointer for VM's error code 24 * response_code: return pointer for VM's error code
26 * return value: the size of the response. The caller can check if the buffer 25 * return value: the size of the response. The caller can check if the buffer
27 * was large enough by comparing the return value and rlen 26 * was large enough by comparing the return value and rlen
28 * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep 27 * NOTE: If the response buffer is not in real storage, cpcmd can sleep
29 */ 28 */
30extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code); 29int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
31 30
32#endif /* _ASM_S390_CPCMD_H */ 31#endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index c5befc5a3bf5..b71735eab23f 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -9,9 +9,7 @@
9#ifndef _EBCDIC_H 9#ifndef _EBCDIC_H
10#define _EBCDIC_H 10#define _EBCDIC_H
11 11
12#ifndef _S390_TYPES_H 12#include <linux/types.h>
13#include <types.h>
14#endif
15 13
16extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */ 14extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */
17extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */ 15extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index c92ed0170be2..65998a1f5d43 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -191,7 +191,7 @@ struct arch_elf_state {
191 } while (0) 191 } while (0)
192 192
193#define CORE_DUMP_USE_REGSET 193#define CORE_DUMP_USE_REGSET
194#define ELF_EXEC_PAGESIZE 4096 194#define ELF_EXEC_PAGESIZE PAGE_SIZE
195 195
196/* 196/*
197 * This is the base location for PIE (ET_DYN with INTERP) loads. On 197 * This is the base location for PIE (ET_DYN with INTERP) loads. On
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index edb5161df7e2..6810bd757312 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -81,7 +81,7 @@ struct ipl_parameter_block {
81 struct ipl_block_fcp fcp; 81 struct ipl_block_fcp fcp;
82 struct ipl_block_ccw ccw; 82 struct ipl_block_ccw ccw;
83 } ipl_info; 83 } ipl_info;
84} __attribute__((packed,aligned(4096))); 84} __packed __aligned(PAGE_SIZE);
85 85
86/* 86/*
87 * IPL validity flags 87 * IPL validity flags
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 8a5b082797f8..a6870ea6ea8b 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -95,46 +95,46 @@ struct lowcore {
95 __u64 int_clock; /* 0x0310 */ 95 __u64 int_clock; /* 0x0310 */
96 __u64 mcck_clock; /* 0x0318 */ 96 __u64 mcck_clock; /* 0x0318 */
97 __u64 clock_comparator; /* 0x0320 */ 97 __u64 clock_comparator; /* 0x0320 */
98 __u64 boot_clock[2]; /* 0x0328 */
98 99
99 /* Current process. */ 100 /* Current process. */
100 __u64 current_task; /* 0x0328 */ 101 __u64 current_task; /* 0x0338 */
101 __u8 pad_0x318[0x320-0x318]; /* 0x0330 */ 102 __u64 kernel_stack; /* 0x0340 */
102 __u64 kernel_stack; /* 0x0338 */
103 103
104 /* Interrupt, panic and restart stack. */ 104 /* Interrupt, panic and restart stack. */
105 __u64 async_stack; /* 0x0340 */ 105 __u64 async_stack; /* 0x0348 */
106 __u64 panic_stack; /* 0x0348 */ 106 __u64 panic_stack; /* 0x0350 */
107 __u64 restart_stack; /* 0x0350 */ 107 __u64 restart_stack; /* 0x0358 */
108 108
109 /* Restart function and parameter. */ 109 /* Restart function and parameter. */
110 __u64 restart_fn; /* 0x0358 */ 110 __u64 restart_fn; /* 0x0360 */
111 __u64 restart_data; /* 0x0360 */ 111 __u64 restart_data; /* 0x0368 */
112 __u64 restart_source; /* 0x0368 */ 112 __u64 restart_source; /* 0x0370 */
113 113
114 /* Address space pointer. */ 114 /* Address space pointer. */
115 __u64 kernel_asce; /* 0x0370 */ 115 __u64 kernel_asce; /* 0x0378 */
116 __u64 user_asce; /* 0x0378 */ 116 __u64 user_asce; /* 0x0380 */
117 117
118 /* 118 /*
119 * The lpp and current_pid fields form a 119 * The lpp and current_pid fields form a
120 * 64-bit value that is set as program 120 * 64-bit value that is set as program
121 * parameter with the LPP instruction. 121 * parameter with the LPP instruction.
122 */ 122 */
123 __u32 lpp; /* 0x0380 */ 123 __u32 lpp; /* 0x0388 */
124 __u32 current_pid; /* 0x0384 */ 124 __u32 current_pid; /* 0x038c */
125 125
126 /* SMP info area */ 126 /* SMP info area */
127 __u32 cpu_nr; /* 0x0388 */ 127 __u32 cpu_nr; /* 0x0390 */
128 __u32 softirq_pending; /* 0x038c */ 128 __u32 softirq_pending; /* 0x0394 */
129 __u64 percpu_offset; /* 0x0390 */ 129 __u64 percpu_offset; /* 0x0398 */
130 __u64 vdso_per_cpu_data; /* 0x0398 */ 130 __u64 vdso_per_cpu_data; /* 0x03a0 */
131 __u64 machine_flags; /* 0x03a0 */ 131 __u64 machine_flags; /* 0x03a8 */
132 __u32 preempt_count; /* 0x03a8 */ 132 __u32 preempt_count; /* 0x03b0 */
133 __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */ 133 __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
134 __u64 gmap; /* 0x03b0 */ 134 __u64 gmap; /* 0x03b8 */
135 __u32 spinlock_lockval; /* 0x03b8 */ 135 __u32 spinlock_lockval; /* 0x03c0 */
136 __u32 fpu_flags; /* 0x03bc */ 136 __u32 fpu_flags; /* 0x03c4 */
137 __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ 137 __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */
138 138
139 /* Per cpu primary space access list */ 139 /* Per cpu primary space access list */
140 __u32 paste[16]; /* 0x0400 */ 140 __u32 paste[16]; /* 0x0400 */
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
deleted file mode 100644
index b79813d9cf68..000000000000
--- a/arch/s390/include/asm/mman.h
+++ /dev/null
@@ -1,11 +0,0 @@
1/*
2 * S390 version
3 *
4 * Derived from "include/asm-i386/mman.h"
5 */
6#ifndef __S390_MMAN_H__
7#define __S390_MMAN_H__
8
9#include <uapi/asm/mman.h>
10
11#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 24bc41622a98..72e9ca83a668 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -12,6 +12,7 @@
12#include <linux/mm_types.h> 12#include <linux/mm_types.h>
13#include <asm/tlbflush.h> 13#include <asm/tlbflush.h>
14#include <asm/ctl_reg.h> 14#include <asm/ctl_reg.h>
15#include <asm-generic/mm_hooks.h>
15 16
16static inline int init_new_context(struct task_struct *tsk, 17static inline int init_new_context(struct task_struct *tsk,
17 struct mm_struct *mm) 18 struct mm_struct *mm)
@@ -33,7 +34,7 @@ static inline int init_new_context(struct task_struct *tsk,
33 mm->context.use_cmma = 0; 34 mm->context.use_cmma = 0;
34#endif 35#endif
35 switch (mm->context.asce_limit) { 36 switch (mm->context.asce_limit) {
36 case 1UL << 42: 37 case _REGION2_SIZE:
37 /* 38 /*
38 * forked 3-level task, fall through to set new asce with new 39 * forked 3-level task, fall through to set new asce with new
39 * mm->pgd 40 * mm->pgd
@@ -49,12 +50,12 @@ static inline int init_new_context(struct task_struct *tsk,
49 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 50 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
50 _ASCE_USER_BITS | _ASCE_TYPE_REGION1; 51 _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
51 break; 52 break;
52 case 1UL << 53: 53 case _REGION1_SIZE:
53 /* forked 4-level task, set new asce with new mm->pgd */ 54 /* forked 4-level task, set new asce with new mm->pgd */
54 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 55 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
55 _ASCE_USER_BITS | _ASCE_TYPE_REGION2; 56 _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
56 break; 57 break;
57 case 1UL << 31: 58 case _REGION3_SIZE:
58 /* forked 2-level compat task, set new asce with new mm->pgd */ 59 /* forked 2-level compat task, set new asce with new mm->pgd */
59 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 60 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
60 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; 61 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
@@ -138,30 +139,4 @@ static inline void activate_mm(struct mm_struct *prev,
138 set_user_asce(next); 139 set_user_asce(next);
139} 140}
140 141
141static inline void arch_dup_mmap(struct mm_struct *oldmm,
142 struct mm_struct *mm)
143{
144}
145
146static inline void arch_exit_mmap(struct mm_struct *mm)
147{
148}
149
150static inline void arch_unmap(struct mm_struct *mm,
151 struct vm_area_struct *vma,
152 unsigned long start, unsigned long end)
153{
154}
155
156static inline void arch_bprm_mm_init(struct mm_struct *mm,
157 struct vm_area_struct *vma)
158{
159}
160
161static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
162 bool write, bool execute, bool foreign)
163{
164 /* by default, allow everything */
165 return true;
166}
167#endif /* __S390_MMU_CONTEXT_H */ 142#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 9d91cf3e427f..c8e211b9a002 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -72,7 +72,7 @@ union mci {
72 u64 ar : 1; /* 33 access register validity */ 72 u64 ar : 1; /* 33 access register validity */
73 u64 da : 1; /* 34 delayed access exception */ 73 u64 da : 1; /* 34 delayed access exception */
74 u64 : 1; /* 35 */ 74 u64 : 1; /* 35 */
75 u64 gs : 1; /* 36 guarded storage registers */ 75 u64 gs : 1; /* 36 guarded storage registers validity */
76 u64 : 5; /* 37-41 */ 76 u64 : 5; /* 37-41 */
77 u64 pr : 1; /* 42 tod programmable register validity */ 77 u64 pr : 1; /* 42 tod programmable register validity */
78 u64 fc : 1; /* 43 fp control register validity */ 78 u64 fc : 1; /* 43 fp control register validity */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index 42267a2fe29e..ca21b28a7b17 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -13,6 +13,7 @@
13#define ESSA_SET_POT_VOLATILE 4 13#define ESSA_SET_POT_VOLATILE 4
14#define ESSA_SET_STABLE_RESIDENT 5 14#define ESSA_SET_STABLE_RESIDENT 5
15#define ESSA_SET_STABLE_IF_RESIDENT 6 15#define ESSA_SET_STABLE_IF_RESIDENT 6
16#define ESSA_SET_STABLE_NODAT 7
16 17
17#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT 18#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT
18 19
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 624deaa44230..5d5c2b3500a4 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -10,10 +10,14 @@
10#include <linux/const.h> 10#include <linux/const.h>
11#include <asm/types.h> 11#include <asm/types.h>
12 12
13#define _PAGE_SHIFT 12
14#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT)
15#define _PAGE_MASK (~(_PAGE_SIZE - 1))
16
13/* PAGE_SHIFT determines the page size */ 17/* PAGE_SHIFT determines the page size */
14#define PAGE_SHIFT 12 18#define PAGE_SHIFT _PAGE_SHIFT
15#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 19#define PAGE_SIZE _PAGE_SIZE
16#define PAGE_MASK (~(PAGE_SIZE-1)) 20#define PAGE_MASK _PAGE_MASK
17#define PAGE_DEFAULT_ACC 0 21#define PAGE_DEFAULT_ACC 0
18#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) 22#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
19 23
@@ -133,6 +137,9 @@ static inline int page_reset_referenced(unsigned long addr)
133struct page; 137struct page;
134void arch_free_page(struct page *page, int order); 138void arch_free_page(struct page *page, int order);
135void arch_alloc_page(struct page *page, int order); 139void arch_alloc_page(struct page *page, int order);
140void arch_set_page_dat(struct page *page, int order);
141void arch_set_page_nodat(struct page *page, int order);
142int arch_test_page_nodat(struct page *page);
136void arch_set_page_states(int make_stable); 143void arch_set_page_states(int make_stable);
137 144
138static inline int devmem_is_allowed(unsigned long pfn) 145static inline int devmem_is_allowed(unsigned long pfn)
@@ -145,16 +152,26 @@ static inline int devmem_is_allowed(unsigned long pfn)
145 152
146#endif /* !__ASSEMBLY__ */ 153#endif /* !__ASSEMBLY__ */
147 154
148#define __PAGE_OFFSET 0x0UL 155#define __PAGE_OFFSET 0x0UL
149#define PAGE_OFFSET 0x0UL 156#define PAGE_OFFSET 0x0UL
150#define __pa(x) (unsigned long)(x) 157
151#define __va(x) (void *)(unsigned long)(x) 158#define __pa(x) ((unsigned long)(x))
152#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) 159#define __va(x) ((void *)(unsigned long)(x))
153#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 160
154#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) 161#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
155#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) 162#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
163
164#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
156#define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) 165#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
157 166
167#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT)
168#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
169
170#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr))
171#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
172
173#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
174
158#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ 175#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
159 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 176 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
160 177
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bb0ff1bb0c4a..a0d9167519b1 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -15,6 +15,8 @@
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17 17
18#define CRST_ALLOC_ORDER 2
19
18unsigned long *crst_table_alloc(struct mm_struct *); 20unsigned long *crst_table_alloc(struct mm_struct *);
19void crst_table_free(struct mm_struct *, unsigned long *); 21void crst_table_free(struct mm_struct *, unsigned long *);
20 22
@@ -42,16 +44,16 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
42 44
43static inline void crst_table_init(unsigned long *crst, unsigned long entry) 45static inline void crst_table_init(unsigned long *crst, unsigned long entry)
44{ 46{
45 clear_table(crst, entry, sizeof(unsigned long)*2048); 47 clear_table(crst, entry, _CRST_TABLE_SIZE);
46} 48}
47 49
48static inline unsigned long pgd_entry_type(struct mm_struct *mm) 50static inline unsigned long pgd_entry_type(struct mm_struct *mm)
49{ 51{
50 if (mm->context.asce_limit <= (1UL << 31)) 52 if (mm->context.asce_limit <= _REGION3_SIZE)
51 return _SEGMENT_ENTRY_EMPTY; 53 return _SEGMENT_ENTRY_EMPTY;
52 if (mm->context.asce_limit <= (1UL << 42)) 54 if (mm->context.asce_limit <= _REGION2_SIZE)
53 return _REGION3_ENTRY_EMPTY; 55 return _REGION3_ENTRY_EMPTY;
54 if (mm->context.asce_limit <= (1UL << 53)) 56 if (mm->context.asce_limit <= _REGION1_SIZE)
55 return _REGION2_ENTRY_EMPTY; 57 return _REGION2_ENTRY_EMPTY;
56 return _REGION1_ENTRY_EMPTY; 58 return _REGION1_ENTRY_EMPTY;
57} 59}
@@ -119,7 +121,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
119 121
120 if (!table) 122 if (!table)
121 return NULL; 123 return NULL;
122 if (mm->context.asce_limit == (1UL << 31)) { 124 if (mm->context.asce_limit == _REGION3_SIZE) {
123 /* Forking a compat process with 2 page table levels */ 125 /* Forking a compat process with 2 page table levels */
124 if (!pgtable_pmd_page_ctor(virt_to_page(table))) { 126 if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
125 crst_table_free(mm, table); 127 crst_table_free(mm, table);
@@ -131,7 +133,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
131 133
132static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) 134static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
133{ 135{
134 if (mm->context.asce_limit == (1UL << 31)) 136 if (mm->context.asce_limit == _REGION3_SIZE)
135 pgtable_pmd_page_dtor(virt_to_page(pgd)); 137 pgtable_pmd_page_dtor(virt_to_page(pgd));
136 crst_table_free(mm, (unsigned long *) pgd); 138 crst_table_free(mm, (unsigned long *) pgd);
137} 139}
@@ -158,4 +160,8 @@ static inline void pmd_populate(struct mm_struct *mm,
158 160
159extern void rcu_table_freelist_finish(void); 161extern void rcu_table_freelist_finish(void);
160 162
163void vmem_map_init(void);
164void *vmem_crst_alloc(unsigned long val);
165pte_t *vmem_pte_alloc(void);
166
161#endif /* _S390_PGALLOC_H */ 167#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57057fb1cc07..dce708e061ea 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -11,19 +11,6 @@
11#ifndef _ASM_S390_PGTABLE_H 11#ifndef _ASM_S390_PGTABLE_H
12#define _ASM_S390_PGTABLE_H 12#define _ASM_S390_PGTABLE_H
13 13
14/*
15 * The Linux memory management assumes a three-level page table setup.
16 * For s390 64 bit we use up to four of the five levels the hardware
17 * provides (region first tables are not used).
18 *
19 * The "pgd_xxx()" functions are trivial for a folded two-level
20 * setup: the pgd is never bad, and a pmd always exists (as it's folded
21 * into the pgd entry)
22 *
23 * This file contains the functions and defines necessary to modify and use
24 * the S390 page table tree.
25 */
26#ifndef __ASSEMBLY__
27#include <linux/sched.h> 14#include <linux/sched.h>
28#include <linux/mm_types.h> 15#include <linux/mm_types.h>
29#include <linux/page-flags.h> 16#include <linux/page-flags.h>
@@ -34,9 +21,6 @@
34 21
35extern pgd_t swapper_pg_dir[]; 22extern pgd_t swapper_pg_dir[];
36extern void paging_init(void); 23extern void paging_init(void);
37extern void vmem_map_init(void);
38pmd_t *vmem_pmd_alloc(void);
39pte_t *vmem_pte_alloc(void);
40 24
41enum { 25enum {
42 PG_DIRECT_MAP_4K = 0, 26 PG_DIRECT_MAP_4K = 0,
@@ -77,38 +61,6 @@ extern unsigned long zero_page_mask;
77#define __HAVE_COLOR_ZERO_PAGE 61#define __HAVE_COLOR_ZERO_PAGE
78 62
79/* TODO: s390 cannot support io_remap_pfn_range... */ 63/* TODO: s390 cannot support io_remap_pfn_range... */
80#endif /* !__ASSEMBLY__ */
81
82/*
83 * PMD_SHIFT determines the size of the area a second-level page
84 * table can map
85 * PGDIR_SHIFT determines what a third-level page table entry can map
86 */
87#define PMD_SHIFT 20
88#define PUD_SHIFT 31
89#define P4D_SHIFT 42
90#define PGDIR_SHIFT 53
91
92#define PMD_SIZE (1UL << PMD_SHIFT)
93#define PMD_MASK (~(PMD_SIZE-1))
94#define PUD_SIZE (1UL << PUD_SHIFT)
95#define PUD_MASK (~(PUD_SIZE-1))
96#define P4D_SIZE (1UL << P4D_SHIFT)
97#define P4D_MASK (~(P4D_SIZE-1))
98#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
99#define PGDIR_MASK (~(PGDIR_SIZE-1))
100
101/*
102 * entries per page directory level: the S390 is two-level, so
103 * we don't really have any PMD directory physically.
104 * for S390 segment-table entries are combined to one PGD
105 * that leads to 1024 pte per pgd
106 */
107#define PTRS_PER_PTE 256
108#define PTRS_PER_PMD 2048
109#define PTRS_PER_PUD 2048
110#define PTRS_PER_P4D 2048
111#define PTRS_PER_PGD 2048
112 64
113#define FIRST_USER_ADDRESS 0UL 65#define FIRST_USER_ADDRESS 0UL
114 66
@@ -123,7 +75,6 @@ extern unsigned long zero_page_mask;
123#define pgd_ERROR(e) \ 75#define pgd_ERROR(e) \
124 printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) 76 printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
125 77
126#ifndef __ASSEMBLY__
127/* 78/*
128 * The vmalloc and module area will always be on the topmost area of the 79 * The vmalloc and module area will always be on the topmost area of the
129 * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. 80 * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
@@ -269,7 +220,7 @@ static inline int is_module_addr(void *addr)
269 */ 220 */
270 221
271/* Bits in the segment/region table address-space-control-element */ 222/* Bits in the segment/region table address-space-control-element */
272#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ 223#define _ASCE_ORIGIN ~0xfffUL/* region/segment table origin */
273#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ 224#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */
274#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ 225#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */
275#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ 226#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */
@@ -320,9 +271,9 @@ static inline int is_module_addr(void *addr)
320#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL 271#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
321#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL 272#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
322#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ 273#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
323#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ 274#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
324#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ 275#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
325#define _SEGMENT_ENTRY_NOEXEC 0x100 /* region no-execute bit */ 276#define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */
326#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ 277#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
327 278
328#define _SEGMENT_ENTRY (0) 279#define _SEGMENT_ENTRY (0)
@@ -340,6 +291,54 @@ static inline int is_module_addr(void *addr)
340#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ 291#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
341#endif 292#endif
342 293
294#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
295#define _PAGE_ENTRIES 256 /* number of page table entries */
296
297#define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8)
298#define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8)
299
300#define _REGION1_SHIFT 53
301#define _REGION2_SHIFT 42
302#define _REGION3_SHIFT 31
303#define _SEGMENT_SHIFT 20
304
305#define _REGION1_INDEX (0x7ffUL << _REGION1_SHIFT)
306#define _REGION2_INDEX (0x7ffUL << _REGION2_SHIFT)
307#define _REGION3_INDEX (0x7ffUL << _REGION3_SHIFT)
308#define _SEGMENT_INDEX (0x7ffUL << _SEGMENT_SHIFT)
309#define _PAGE_INDEX (0xffUL << _PAGE_SHIFT)
310
311#define _REGION1_SIZE (1UL << _REGION1_SHIFT)
312#define _REGION2_SIZE (1UL << _REGION2_SHIFT)
313#define _REGION3_SIZE (1UL << _REGION3_SHIFT)
314#define _SEGMENT_SIZE (1UL << _SEGMENT_SHIFT)
315
316#define _REGION1_MASK (~(_REGION1_SIZE - 1))
317#define _REGION2_MASK (~(_REGION2_SIZE - 1))
318#define _REGION3_MASK (~(_REGION3_SIZE - 1))
319#define _SEGMENT_MASK (~(_SEGMENT_SIZE - 1))
320
321#define PMD_SHIFT _SEGMENT_SHIFT
322#define PUD_SHIFT _REGION3_SHIFT
323#define P4D_SHIFT _REGION2_SHIFT
324#define PGDIR_SHIFT _REGION1_SHIFT
325
326#define PMD_SIZE _SEGMENT_SIZE
327#define PUD_SIZE _REGION3_SIZE
328#define P4D_SIZE _REGION2_SIZE
329#define PGDIR_SIZE _REGION1_SIZE
330
331#define PMD_MASK _SEGMENT_MASK
332#define PUD_MASK _REGION3_MASK
333#define P4D_MASK _REGION2_MASK
334#define PGDIR_MASK _REGION1_MASK
335
336#define PTRS_PER_PTE _PAGE_ENTRIES
337#define PTRS_PER_PMD _CRST_ENTRIES
338#define PTRS_PER_PUD _CRST_ENTRIES
339#define PTRS_PER_P4D _CRST_ENTRIES
340#define PTRS_PER_PGD _CRST_ENTRIES
341
343/* 342/*
344 * Segment table and region3 table entry encoding 343 * Segment table and region3 table entry encoding
345 * (R = read-only, I = invalid, y = young bit): 344 * (R = read-only, I = invalid, y = young bit):
@@ -376,6 +375,7 @@ static inline int is_module_addr(void *addr)
376 375
377/* Guest Page State used for virtualization */ 376/* Guest Page State used for virtualization */
378#define _PGSTE_GPS_ZERO 0x0000000080000000UL 377#define _PGSTE_GPS_ZERO 0x0000000080000000UL
378#define _PGSTE_GPS_NODAT 0x0000000040000000UL
379#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL 379#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
380#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL 380#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
381#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL 381#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
@@ -505,7 +505,7 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
505 * In the case that a guest uses storage keys 505 * In the case that a guest uses storage keys
506 * faults should no longer be backed by zero pages 506 * faults should no longer be backed by zero pages
507 */ 507 */
508#define mm_forbids_zeropage mm_use_skey 508#define mm_forbids_zeropage mm_has_pgste
509static inline int mm_use_skey(struct mm_struct *mm) 509static inline int mm_use_skey(struct mm_struct *mm)
510{ 510{
511#ifdef CONFIG_PGSTE 511#ifdef CONFIG_PGSTE
@@ -952,15 +952,30 @@ static inline pte_t pte_mkhuge(pte_t pte)
952#define IPTE_GLOBAL 0 952#define IPTE_GLOBAL 0
953#define IPTE_LOCAL 1 953#define IPTE_LOCAL 1
954 954
955static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local) 955#define IPTE_NODAT 0x400
956#define IPTE_GUEST_ASCE 0x800
957
958static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
959 unsigned long opt, unsigned long asce,
960 int local)
956{ 961{
957 unsigned long pto = (unsigned long) ptep; 962 unsigned long pto = (unsigned long) ptep;
958 963
959 /* Invalidation + TLB flush for the pte */ 964 if (__builtin_constant_p(opt) && opt == 0) {
965 /* Invalidation + TLB flush for the pte */
966 asm volatile(
967 " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
968 : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
969 [m4] "i" (local));
970 return;
971 }
972
973 /* Invalidate ptes with options + TLB flush of the ptes */
974 opt = opt | (asce & _ASCE_ORIGIN);
960 asm volatile( 975 asm volatile(
961 " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" 976 " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
962 : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), 977 : [r2] "+a" (address), [r3] "+a" (opt)
963 [m4] "i" (local)); 978 : [r1] "a" (pto), [m4] "i" (local) : "memory");
964} 979}
965 980
966static inline void __ptep_ipte_range(unsigned long address, int nr, 981static inline void __ptep_ipte_range(unsigned long address, int nr,
@@ -1341,31 +1356,61 @@ static inline void __pmdp_csp(pmd_t *pmdp)
1341#define IDTE_GLOBAL 0 1356#define IDTE_GLOBAL 0
1342#define IDTE_LOCAL 1 1357#define IDTE_LOCAL 1
1343 1358
1344static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local) 1359#define IDTE_PTOA 0x0800
1360#define IDTE_NODAT 0x1000
1361#define IDTE_GUEST_ASCE 0x2000
1362
1363static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
1364 unsigned long opt, unsigned long asce,
1365 int local)
1345{ 1366{
1346 unsigned long sto; 1367 unsigned long sto;
1347 1368
1348 sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); 1369 sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
1349 asm volatile( 1370 if (__builtin_constant_p(opt) && opt == 0) {
1350 " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" 1371 /* flush without guest asce */
1351 : "+m" (*pmdp) 1372 asm volatile(
1352 : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)), 1373 " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
1353 [m4] "i" (local) 1374 : "+m" (*pmdp)
1354 : "cc" ); 1375 : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)),
1376 [m4] "i" (local)
1377 : "cc" );
1378 } else {
1379 /* flush with guest asce */
1380 asm volatile(
1381 " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
1382 : "+m" (*pmdp)
1383 : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
1384 [r3] "a" (asce), [m4] "i" (local)
1385 : "cc" );
1386 }
1355} 1387}
1356 1388
1357static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local) 1389static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
1390 unsigned long opt, unsigned long asce,
1391 int local)
1358{ 1392{
1359 unsigned long r3o; 1393 unsigned long r3o;
1360 1394
1361 r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); 1395 r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
1362 r3o |= _ASCE_TYPE_REGION3; 1396 r3o |= _ASCE_TYPE_REGION3;
1363 asm volatile( 1397 if (__builtin_constant_p(opt) && opt == 0) {
1364 " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" 1398 /* flush without guest asce */
1365 : "+m" (*pudp) 1399 asm volatile(
1366 : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)), 1400 " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
1367 [m4] "i" (local) 1401 : "+m" (*pudp)
1368 : "cc"); 1402 : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)),
1403 [m4] "i" (local)
1404 : "cc");
1405 } else {
1406 /* flush with guest asce */
1407 asm volatile(
1408 " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
1409 : "+m" (*pudp)
1410 : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
1411 [r3] "a" (asce), [m4] "i" (local)
1412 : "cc" );
1413 }
1369} 1414}
1370 1415
1371pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); 1416pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
@@ -1548,8 +1593,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
1548#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 1593#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
1549#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 1594#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
1550 1595
1551#endif /* !__ASSEMBLY__ */
1552
1553#define kern_addr_valid(addr) (1) 1596#define kern_addr_valid(addr) (1)
1554 1597
1555extern int vmem_add_mapping(unsigned long start, unsigned long size); 1598extern int vmem_add_mapping(unsigned long start, unsigned long size);
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 998b61cd0e56..eaee69e7c42a 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -80,7 +80,7 @@ struct qdr {
80 u32 qkey : 4; 80 u32 qkey : 4;
81 u32 : 28; 81 u32 : 28;
82 struct qdesfmt0 qdf0[126]; 82 struct qdesfmt0 qdf0[126];
83} __attribute__ ((packed, aligned(4096))); 83} __packed __aligned(PAGE_SIZE);
84 84
85#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 85#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40
86#define QIB_RFLAGS_ENABLE_QEBSM 0x80 86#define QIB_RFLAGS_ENABLE_QEBSM 0x80
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index cd78155b1829..490e035b3716 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,10 @@
29#define MACHINE_FLAG_TE _BITUL(11) 29#define MACHINE_FLAG_TE _BITUL(11)
30#define MACHINE_FLAG_TLB_LC _BITUL(12) 30#define MACHINE_FLAG_TLB_LC _BITUL(12)
31#define MACHINE_FLAG_VX _BITUL(13) 31#define MACHINE_FLAG_VX _BITUL(13)
32#define MACHINE_FLAG_NX _BITUL(14) 32#define MACHINE_FLAG_TLB_GUEST _BITUL(14)
33#define MACHINE_FLAG_GS _BITUL(15) 33#define MACHINE_FLAG_NX _BITUL(15)
34#define MACHINE_FLAG_GS _BITUL(16)
35#define MACHINE_FLAG_SCC _BITUL(17)
34 36
35#define LPP_MAGIC _BITUL(31) 37#define LPP_MAGIC _BITUL(31)
36#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) 38#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -68,8 +70,10 @@ extern void detect_memory_memblock(void);
68#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) 70#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
69#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) 71#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
70#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) 72#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
73#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
71#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) 74#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
72#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) 75#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
76#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
73 77
74/* 78/*
75 * Console mode. Override with conmode= 79 * Console mode. Override with conmode=
@@ -104,9 +108,16 @@ extern void pfault_fini(void);
104#define pfault_fini() do { } while (0) 108#define pfault_fini() do { } while (0)
105#endif /* CONFIG_PFAULT */ 109#endif /* CONFIG_PFAULT */
106 110
111#ifdef CONFIG_VMCP
112void vmcp_cma_reserve(void);
113#else
114static inline void vmcp_cma_reserve(void) { }
115#endif
116
107void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); 117void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
108 118
109extern void cmma_init(void); 119void cmma_init(void);
120void cmma_init_nodat(void);
110 121
111extern void (*_machine_restart)(char *command); 122extern void (*_machine_restart)(char *command);
112extern void (*_machine_halt)(void); 123extern void (*_machine_halt)(void);
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 217ee5210c32..8182b521c42f 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -92,10 +92,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
92{ 92{
93 typecheck(int, lp->lock); 93 typecheck(int, lp->lock);
94 asm volatile( 94 asm volatile(
95 "st %1,%0\n" 95#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
96 : "+Q" (lp->lock) 96 " .long 0xb2fa0070\n" /* NIAI 7 */
97 : "d" (0) 97#endif
98 : "cc", "memory"); 98 " st %1,%0\n"
99 : "=Q" (lp->lock) : "d" (0) : "cc", "memory");
99} 100}
100 101
101/* 102/*
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 118535123f34..93f2eb3f277c 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -15,6 +15,8 @@
15/* The value of the TOD clock for 1.1.1970. */ 15/* The value of the TOD clock for 1.1.1970. */
16#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL 16#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
17 17
18extern u64 clock_comparator_max;
19
18/* Inline functions for clock register access. */ 20/* Inline functions for clock register access. */
19static inline int set_tod_clock(__u64 time) 21static inline int set_tod_clock(__u64 time)
20{ 22{
@@ -126,7 +128,7 @@ static inline unsigned long long local_tick_disable(void)
126 unsigned long long old; 128 unsigned long long old;
127 129
128 old = S390_lowcore.clock_comparator; 130 old = S390_lowcore.clock_comparator;
129 S390_lowcore.clock_comparator = -1ULL; 131 S390_lowcore.clock_comparator = clock_comparator_max;
130 set_clock_comparator(S390_lowcore.clock_comparator); 132 set_clock_comparator(S390_lowcore.clock_comparator);
131 return old; 133 return old;
132} 134}
@@ -174,24 +176,24 @@ static inline cycles_t get_cycles(void)
174 return (cycles_t) get_tod_clock() >> 2; 176 return (cycles_t) get_tod_clock() >> 2;
175} 177}
176 178
177int get_phys_clock(unsigned long long *clock); 179int get_phys_clock(unsigned long *clock);
178void init_cpu_timer(void); 180void init_cpu_timer(void);
179unsigned long long monotonic_clock(void); 181unsigned long long monotonic_clock(void);
180 182
181extern u64 sched_clock_base_cc; 183extern unsigned char tod_clock_base[16] __aligned(8);
182 184
183/** 185/**
184 * get_clock_monotonic - returns current time in clock rate units 186 * get_clock_monotonic - returns current time in clock rate units
185 * 187 *
186 * The caller must ensure that preemption is disabled. 188 * The caller must ensure that preemption is disabled.
187 * The clock and sched_clock_base get changed via stop_machine. 189 * The clock and tod_clock_base get changed via stop_machine.
188 * Therefore preemption must be disabled when calling this 190 * Therefore preemption must be disabled when calling this
189 * function, otherwise the returned value is not guaranteed to 191 * function, otherwise the returned value is not guaranteed to
190 * be monotonic. 192 * be monotonic.
191 */ 193 */
192static inline unsigned long long get_tod_clock_monotonic(void) 194static inline unsigned long long get_tod_clock_monotonic(void)
193{ 195{
194 return get_tod_clock() - sched_clock_base_cc; 196 return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
195} 197}
196 198
197/** 199/**
@@ -218,4 +220,32 @@ static inline unsigned long long tod_to_ns(unsigned long long todval)
218 return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); 220 return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
219} 221}
220 222
223/**
224 * tod_after - compare two 64 bit TOD values
225 * @a: first 64 bit TOD timestamp
226 * @b: second 64 bit TOD timestamp
227 *
228 * Returns: true if a is later than b
229 */
230static inline int tod_after(unsigned long long a, unsigned long long b)
231{
232 if (MACHINE_HAS_SCC)
233 return (long long) a > (long long) b;
234 return a > b;
235}
236
237/**
238 * tod_after_eq - compare two 64 bit TOD values
239 * @a: first 64 bit TOD timestamp
240 * @b: second 64 bit TOD timestamp
241 *
242 * Returns: true if a is later than b
243 */
244static inline int tod_after_eq(unsigned long long a, unsigned long long b)
245{
246 if (MACHINE_HAS_SCC)
247 return (long long) a >= (long long) b;
248 return a >= b;
249}
250
221#endif 251#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 2eb8ff0d6fca..3a14b864b2e3 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -135,7 +135,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
135static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, 135static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
136 unsigned long address) 136 unsigned long address)
137{ 137{
138 if (tlb->mm->context.asce_limit <= (1UL << 31)) 138 if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
139 return; 139 return;
140 pgtable_pmd_page_dtor(virt_to_page(pmd)); 140 pgtable_pmd_page_dtor(virt_to_page(pmd));
141 tlb_remove_table(tlb, pmd); 141 tlb_remove_table(tlb, pmd);
@@ -151,7 +151,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
151static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, 151static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
152 unsigned long address) 152 unsigned long address)
153{ 153{
154 if (tlb->mm->context.asce_limit <= (1UL << 53)) 154 if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
155 return; 155 return;
156 tlb_remove_table(tlb, p4d); 156 tlb_remove_table(tlb, p4d);
157} 157}
@@ -166,7 +166,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
166static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, 166static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
167 unsigned long address) 167 unsigned long address)
168{ 168{
169 if (tlb->mm->context.asce_limit <= (1UL << 42)) 169 if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
170 return; 170 return;
171 tlb_remove_table(tlb, pud); 171 tlb_remove_table(tlb, pud);
172} 172}
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 39846100682a..4d759f8f4bc7 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -20,10 +20,15 @@ static inline void __tlb_flush_local(void)
20 */ 20 */
21static inline void __tlb_flush_idte(unsigned long asce) 21static inline void __tlb_flush_idte(unsigned long asce)
22{ 22{
23 unsigned long opt;
24
25 opt = IDTE_PTOA;
26 if (MACHINE_HAS_TLB_GUEST)
27 opt |= IDTE_GUEST_ASCE;
23 /* Global TLB flush for the mm */ 28 /* Global TLB flush for the mm */
24 asm volatile( 29 asm volatile(
25 " .insn rrf,0xb98e0000,0,%0,%1,0" 30 " .insn rrf,0xb98e0000,0,%0,%1,0"
26 : : "a" (2048), "a" (asce) : "cc"); 31 : : "a" (opt), "a" (asce) : "cc");
27} 32}
28 33
29#ifdef CONFIG_SMP 34#ifdef CONFIG_SMP
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fa1bfce10370..5222da162b69 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -77,12 +77,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
77 return &node_to_cpumask_map[node]; 77 return &node_to_cpumask_map[node];
78} 78}
79 79
80/*
81 * Returns the number of the node containing node 'node'. This
82 * architecture is flat, so it is a pretty simple function!
83 */
84#define parent_node(node) (node)
85
86#define pcibus_to_node(bus) __pcibus_to_node(bus) 80#define pcibus_to_node(bus) __pcibus_to_node(bus)
87 81
88#define node_distance(a, b) __node_distance(a, b) 82#define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
deleted file mode 100644
index 6740f4f9781f..000000000000
--- a/arch/s390/include/asm/types.h
+++ /dev/null
@@ -1,11 +0,0 @@
1/*
2 * S390 version
3 *
4 * Derived from "include/asm-i386/types.h"
5 */
6#ifndef _S390_TYPES_H
7#define _S390_TYPES_H
8
9#include <uapi/asm/types.h>
10
11#endif /* _S390_TYPES_H */
diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h
deleted file mode 100644
index da9627afe5d8..000000000000
--- a/arch/s390/include/asm/unaligned.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef _ASM_S390_UNALIGNED_H
2#define _ASM_S390_UNALIGNED_H
3
4/*
5 * The S390 can do unaligned accesses itself.
6 */
7#include <linux/unaligned/access_ok.h>
8#include <linux/unaligned/generic.h>
9
10#define get_unaligned __get_unaligned_be
11#define put_unaligned __put_unaligned_be
12
13#endif /* _ASM_S390_UNALIGNED_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index ca62066895e0..098f28778a13 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -9,4 +9,5 @@ generic-y += param.h
9generic-y += poll.h 9generic-y += poll.h
10generic-y += resource.h 10generic-y += resource.h
11generic-y += sockios.h 11generic-y += sockios.h
12generic-y += swab.h
12generic-y += termbits.h 13generic-y += termbits.h
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 1340311dab77..ab5797cdc1b7 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -72,7 +72,10 @@ typedef struct dasd_information2_t {
72 * 0x02: use diag discipline (diag) 72 * 0x02: use diag discipline (diag)
73 * 0x04: set the device initially online (internal use only) 73 * 0x04: set the device initially online (internal use only)
74 * 0x08: enable ERP related logging 74 * 0x08: enable ERP related logging
75 * 0x20: give access to raw eckd data 75 * 0x10: allow I/O to fail on lost paths
76 * 0x20: allow I/O to fail when a lock was stolen
77 * 0x40: give access to raw eckd data
78 * 0x80: enable discard support
76 */ 79 */
77#define DASD_FEATURE_DEFAULT 0x00 80#define DASD_FEATURE_DEFAULT 0x00
78#define DASD_FEATURE_READONLY 0x01 81#define DASD_FEATURE_READONLY 0x01
@@ -82,6 +85,7 @@ typedef struct dasd_information2_t {
82#define DASD_FEATURE_FAILFAST 0x10 85#define DASD_FEATURE_FAILFAST 0x10
83#define DASD_FEATURE_FAILONSLCK 0x20 86#define DASD_FEATURE_FAILONSLCK 0x20
84#define DASD_FEATURE_USERAW 0x40 87#define DASD_FEATURE_USERAW 0x40
88#define DASD_FEATURE_DISCARD 0x80
85 89
86#define DASD_PARTN_BITS 2 90#define DASD_PARTN_BITS 2
87 91
diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h
deleted file mode 100644
index da3bfe5cc161..000000000000
--- a/arch/s390/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,89 +0,0 @@
1#ifndef _S390_SWAB_H
2#define _S390_SWAB_H
3
4/*
5 * S390 version
6 * Copyright IBM Corp. 1999
7 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
8 */
9
10#include <linux/types.h>
11
12#ifndef __s390x__
13# define __SWAB_64_THRU_32__
14#endif
15
16#ifdef __s390x__
17static inline __u64 __arch_swab64p(const __u64 *x)
18{
19 __u64 result;
20
21 asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
22 return result;
23}
24#define __arch_swab64p __arch_swab64p
25
26static inline __u64 __arch_swab64(__u64 x)
27{
28 __u64 result;
29
30 asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
31 return result;
32}
33#define __arch_swab64 __arch_swab64
34
35static inline void __arch_swab64s(__u64 *x)
36{
37 *x = __arch_swab64p(x);
38}
39#define __arch_swab64s __arch_swab64s
40#endif /* __s390x__ */
41
42static inline __u32 __arch_swab32p(const __u32 *x)
43{
44 __u32 result;
45
46 asm volatile(
47#ifndef __s390x__
48 " icm %0,8,%O1+3(%R1)\n"
49 " icm %0,4,%O1+2(%R1)\n"
50 " icm %0,2,%O1+1(%R1)\n"
51 " ic %0,%1"
52 : "=&d" (result) : "Q" (*x) : "cc");
53#else /* __s390x__ */
54 " lrv %0,%1"
55 : "=d" (result) : "m" (*x));
56#endif /* __s390x__ */
57 return result;
58}
59#define __arch_swab32p __arch_swab32p
60
61#ifdef __s390x__
62static inline __u32 __arch_swab32(__u32 x)
63{
64 __u32 result;
65
66 asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x));
67 return result;
68}
69#define __arch_swab32 __arch_swab32
70#endif /* __s390x__ */
71
72static inline __u16 __arch_swab16p(const __u16 *x)
73{
74 __u16 result;
75
76 asm volatile(
77#ifndef __s390x__
78 " icm %0,2,%O1+1(%R1)\n"
79 " ic %0,%1\n"
80 : "=&d" (result) : "Q" (*x) : "cc");
81#else /* __s390x__ */
82 " lrvh %0,%1"
83 : "=d" (result) : "m" (*x));
84#endif /* __s390x__ */
85 return result;
86}
87#define __arch_swab16p __arch_swab16p
88
89#endif /* _S390_SWAB_H */
diff --git a/drivers/s390/char/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
index 1e29b0418382..4caf71714a55 100644
--- a/drivers/s390/char/vmcp.h
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -12,19 +12,13 @@
12 * The idea of this driver is based on cpint from Neale Ferguson 12 * The idea of this driver is based on cpint from Neale Ferguson
13 */ 13 */
14 14
15#ifndef _UAPI_ASM_VMCP_H
16#define _UAPI_ASM_VMCP_H
17
15#include <linux/ioctl.h> 18#include <linux/ioctl.h>
16#include <linux/mutex.h>
17 19
18#define VMCP_GETCODE _IOR(0x10, 1, int) 20#define VMCP_GETCODE _IOR(0x10, 1, int)
19#define VMCP_SETBUF _IOW(0x10, 2, int) 21#define VMCP_SETBUF _IOW(0x10, 2, int)
20#define VMCP_GETSIZE _IOR(0x10, 3, int) 22#define VMCP_GETSIZE _IOR(0x10, 3, int)
21 23
22struct vmcp_session { 24#endif /* _UAPI_ASM_VMCP_H */
23 unsigned int bufsize;
24 char *response;
25 int resp_size;
26 int resp_code;
27 /* As we use copy_from/to_user, which might *
28 * sleep and cannot use a spinlock */
29 struct mutex mutex;
30};
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index b65c414b6c0e..3d42f91c95fd 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -158,6 +158,7 @@ int main(void)
158 OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); 158 OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
159 OFFSET(__LC_INT_CLOCK, lowcore, int_clock); 159 OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
160 OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); 160 OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
161 OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
161 OFFSET(__LC_CURRENT, lowcore, current_task); 162 OFFSET(__LC_CURRENT, lowcore, current_task);
162 OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); 163 OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
163 OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); 164 OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 9f0e4a2785f7..63bc6603e0ed 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -14,6 +14,7 @@
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/stddef.h> 15#include <linux/stddef.h>
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/mm.h>
17#include <asm/diag.h> 18#include <asm/diag.h>
18#include <asm/ebcdic.h> 19#include <asm/ebcdic.h>
19#include <asm/cpcmd.h> 20#include <asm/cpcmd.h>
@@ -28,9 +29,7 @@ static int diag8_noresponse(int cmdlen)
28 register unsigned long reg3 asm ("3") = cmdlen; 29 register unsigned long reg3 asm ("3") = cmdlen;
29 30
30 asm volatile( 31 asm volatile(
31 " sam31\n"
32 " diag %1,%0,0x8\n" 32 " diag %1,%0,0x8\n"
33 " sam64\n"
34 : "+d" (reg3) : "d" (reg2) : "cc"); 33 : "+d" (reg3) : "d" (reg2) : "cc");
35 return reg3; 34 return reg3;
36} 35}
@@ -43,9 +42,7 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
43 register unsigned long reg5 asm ("5") = *rlen; 42 register unsigned long reg5 asm ("5") = *rlen;
44 43
45 asm volatile( 44 asm volatile(
46 " sam31\n"
47 " diag %2,%0,0x8\n" 45 " diag %2,%0,0x8\n"
48 " sam64\n"
49 " brc 8,1f\n" 46 " brc 8,1f\n"
50 " agr %1,%4\n" 47 " agr %1,%4\n"
51 "1:\n" 48 "1:\n"
@@ -57,7 +54,6 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
57 54
58/* 55/*
59 * __cpcmd has some restrictions over cpcmd 56 * __cpcmd has some restrictions over cpcmd
60 * - the response buffer must reside below 2GB (if any)
61 * - __cpcmd is unlocked and therefore not SMP-safe 57 * - __cpcmd is unlocked and therefore not SMP-safe
62 */ 58 */
63int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) 59int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
@@ -88,13 +84,12 @@ EXPORT_SYMBOL(__cpcmd);
88 84
89int cpcmd(const char *cmd, char *response, int rlen, int *response_code) 85int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
90{ 86{
87 unsigned long flags;
91 char *lowbuf; 88 char *lowbuf;
92 int len; 89 int len;
93 unsigned long flags;
94 90
95 if ((virt_to_phys(response) != (unsigned long) response) || 91 if (is_vmalloc_or_module_addr(response)) {
96 (((unsigned long)response + rlen) >> 31)) { 92 lowbuf = kmalloc(rlen, GFP_KERNEL);
97 lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
98 if (!lowbuf) { 93 if (!lowbuf) {
99 pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); 94 pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
100 return -ENOMEM; 95 return -ENOMEM;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 86b3e74f569e..1d9e83c401fc 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -866,7 +866,8 @@ static inline void
866debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, 866debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
867 int exception) 867 int exception)
868{ 868{
869 active->id.stck = get_tod_clock_fast() - sched_clock_base_cc; 869 active->id.stck = get_tod_clock_fast() -
870 *(unsigned long long *) &tod_clock_base[1];
870 active->id.fields.cpuid = smp_processor_id(); 871 active->id.fields.cpuid = smp_processor_id();
871 active->caller = __builtin_return_address(0); 872 active->caller = __builtin_return_address(0);
872 active->id.fields.exception = exception; 873 active->id.fields.exception = exception;
@@ -1455,15 +1456,15 @@ int
1455debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, 1456debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
1456 int area, debug_entry_t * entry, char *out_buf) 1457 int area, debug_entry_t * entry, char *out_buf)
1457{ 1458{
1458 unsigned long sec, usec; 1459 unsigned long base, sec, usec;
1459 char *except_str; 1460 char *except_str;
1460 unsigned long caller; 1461 unsigned long caller;
1461 int rc = 0; 1462 int rc = 0;
1462 unsigned int level; 1463 unsigned int level;
1463 1464
1464 level = entry->id.fields.level; 1465 level = entry->id.fields.level;
1465 sec = (entry->id.stck >> 12) + (sched_clock_base_cc >> 12); 1466 base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
1466 sec = sec - (TOD_UNIX_EPOCH >> 12); 1467 sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
1467 usec = do_div(sec, USEC_PER_SEC); 1468 usec = do_div(sec, USEC_PER_SEC);
1468 1469
1469 if (entry->id.fields.exception) 1470 if (entry->id.fields.exception)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dab78babfab6..2aa545dca4d5 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -76,7 +76,7 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
76 frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); 76 frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
77#ifdef CONFIG_CHECK_STACK 77#ifdef CONFIG_CHECK_STACK
78 sp = __dump_trace(func, data, sp, 78 sp = __dump_trace(func, data, sp,
79 S390_lowcore.panic_stack + frame_size - 4096, 79 S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
80 S390_lowcore.panic_stack + frame_size); 80 S390_lowcore.panic_stack + frame_size);
81#endif 81#endif
82 sp = __dump_trace(func, data, sp, 82 sp = __dump_trace(func, data, sp,
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 5d20182ee8ae..ca8cd80e8feb 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -53,8 +53,9 @@ static void __init reset_tod_clock(void)
53 if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) 53 if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
54 disabled_wait(0); 54 disabled_wait(0);
55 55
56 sched_clock_base_cc = TOD_UNIX_EPOCH; 56 memset(tod_clock_base, 0, 16);
57 S390_lowcore.last_update_clock = sched_clock_base_cc; 57 *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
58 S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
58} 59}
59 60
60#ifdef CONFIG_SHARED_KERNEL 61#ifdef CONFIG_SHARED_KERNEL
@@ -165,8 +166,8 @@ static noinline __init void create_kernel_nss(void)
165 } 166 }
166 167
167 /* re-initialize cputime accounting. */ 168 /* re-initialize cputime accounting. */
168 sched_clock_base_cc = get_tod_clock(); 169 get_tod_clock_ext(tod_clock_base);
169 S390_lowcore.last_update_clock = sched_clock_base_cc; 170 S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1];
170 S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; 171 S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
171 S390_lowcore.user_timer = 0; 172 S390_lowcore.user_timer = 0;
172 S390_lowcore.system_timer = 0; 173 S390_lowcore.system_timer = 0;
@@ -387,6 +388,12 @@ static __init void detect_machine_facilities(void)
387 } 388 }
388 if (test_facility(133)) 389 if (test_facility(133))
389 S390_lowcore.machine_flags |= MACHINE_FLAG_GS; 390 S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
391 if (test_facility(139) && (tod_clock_base[1] & 0x80)) {
392 /* Enabled signed clock comparator comparisons */
393 S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
394 clock_comparator_max = -1ULL >> 1;
395 __ctl_set_bit(0, 53);
396 }
390} 397}
391 398
392static inline void save_vector_registers(void) 399static inline void save_vector_registers(void)
@@ -413,7 +420,7 @@ static int __init disable_vector_extension(char *str)
413{ 420{
414 S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; 421 S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
415 __ctl_clear_bit(0, 17); 422 __ctl_clear_bit(0, 17);
416 return 1; 423 return 0;
417} 424}
418early_param("novx", disable_vector_extension); 425early_param("novx", disable_vector_extension);
419 426
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index eff5b31671d4..8ed753c72d9b 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -302,7 +302,8 @@ ENTRY(startup_kdump)
302 xc 0xe00(256),0xe00 302 xc 0xe00(256),0xe00
303 xc 0xf00(256),0xf00 303 xc 0xf00(256),0xf00
304 lctlg %c0,%c15,0x200(%r0) # initialize control registers 304 lctlg %c0,%c15,0x200(%r0) # initialize control registers
305 stck __LC_LAST_UPDATE_CLOCK 305 stcke __LC_BOOT_CLOCK
306 mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
306 spt 6f-.LPG0(%r13) 307 spt 6f-.LPG0(%r13)
307 mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) 308 mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
308 l %r15,.Lstack-.LPG0(%r13) 309 l %r15,.Lstack-.LPG0(%r13)
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 31c91f24e562..0d8f2a858ced 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -21,8 +21,8 @@ ENTRY(startup_continue)
21 xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid 21 xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
22 mvi __LC_LPP,0x80 # and set LPP_MAGIC 22 mvi __LC_LPP,0x80 # and set LPP_MAGIC
23 .insn s,0xb2800000,__LC_LPP # load program parameter 23 .insn s,0xb2800000,__LC_LPP # load program parameter
240: larl %r1,sched_clock_base_cc 240: larl %r1,tod_clock_base
25 mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK 25 mvc 0(16,%r1),__LC_BOOT_CLOCK
26 larl %r13,.LPG1 # get base 26 larl %r13,.LPG1 # get base
27 lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers 27 lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
28 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area 28 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 6dca93b29bed..a2fdff0e730b 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -105,7 +105,8 @@ void do_IRQ(struct pt_regs *regs, int irq)
105 105
106 old_regs = set_irq_regs(regs); 106 old_regs = set_irq_regs(regs);
107 irq_enter(); 107 irq_enter();
108 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 108 if (tod_after_eq(S390_lowcore.int_clock,
109 S390_lowcore.clock_comparator))
109 /* Serve timer interrupts first. */ 110 /* Serve timer interrupts first. */
110 clock_comparator_work(); 111 clock_comparator_work();
111 generic_handle_irq(irq); 112 generic_handle_irq(irq);
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index cfac28330b03..4bdc65636603 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h>
10#include <asm/sigp.h> 11#include <asm/sigp.h>
11 12
12/* 13/*
@@ -55,8 +56,8 @@ ENTRY(relocate_kernel)
55 .back_pgm: 56 .back_pgm:
56 lmg %r0,%r15,gprregs-.base(%r13) 57 lmg %r0,%r15,gprregs-.base(%r13)
57 .top: 58 .top:
58 lghi %r7,4096 # load PAGE_SIZE in r7 59 lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
59 lghi %r9,4096 # load PAGE_SIZE in r9 60 lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
60 lg %r5,0(%r2) # read another word for indirection page 61 lg %r5,0(%r2) # read another word for indirection page
61 aghi %r2,8 # increment pointer 62 aghi %r2,8 # increment pointer
62 tml %r5,0x1 # is it a destination page? 63 tml %r5,0x1 # is it a destination page?
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3d1d808ea8a9..164a1e16b53e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,7 +305,7 @@ static void __init setup_lowcore(void)
305 /* 305 /*
306 * Setup lowcore for boot cpu 306 * Setup lowcore for boot cpu
307 */ 307 */
308 BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096); 308 BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
309 lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc)); 309 lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
310 lc->restart_psw.mask = PSW_KERNEL_BITS; 310 lc->restart_psw.mask = PSW_KERNEL_BITS;
311 lc->restart_psw.addr = (unsigned long) restart_int_handler; 311 lc->restart_psw.addr = (unsigned long) restart_int_handler;
@@ -323,7 +323,7 @@ static void __init setup_lowcore(void)
323 lc->io_new_psw.mask = PSW_KERNEL_BITS | 323 lc->io_new_psw.mask = PSW_KERNEL_BITS |
324 PSW_MASK_DAT | PSW_MASK_MCHECK; 324 PSW_MASK_DAT | PSW_MASK_MCHECK;
325 lc->io_new_psw.addr = (unsigned long) io_int_handler; 325 lc->io_new_psw.addr = (unsigned long) io_int_handler;
326 lc->clock_comparator = -1ULL; 326 lc->clock_comparator = clock_comparator_max;
327 lc->kernel_stack = ((unsigned long) &init_thread_union) 327 lc->kernel_stack = ((unsigned long) &init_thread_union)
328 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); 328 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
329 lc->async_stack = (unsigned long) 329 lc->async_stack = (unsigned long)
@@ -469,10 +469,10 @@ static void __init setup_memory_end(void)
469 vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; 469 vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
470 tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; 470 tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
471 tmp = tmp * (sizeof(struct page) + PAGE_SIZE); 471 tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
472 if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42)) 472 if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
473 vmax = 1UL << 42; /* 3-level kernel page table */ 473 vmax = _REGION2_SIZE; /* 3-level kernel page table */
474 else 474 else
475 vmax = 1UL << 53; /* 4-level kernel page table */ 475 vmax = _REGION1_SIZE; /* 4-level kernel page table */
476 /* module area is at the end of the kernel address space. */ 476 /* module area is at the end of the kernel address space. */
477 MODULES_END = vmax; 477 MODULES_END = vmax;
478 MODULES_VADDR = MODULES_END - MODULES_LEN; 478 MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -818,6 +818,9 @@ static int __init setup_hwcaps(void)
818 case 0x2965: 818 case 0x2965:
819 strcpy(elf_platform, "z13"); 819 strcpy(elf_platform, "z13");
820 break; 820 break;
821 case 0x3906:
822 strcpy(elf_platform, "z14");
823 break;
821 } 824 }
822 825
823 /* 826 /*
@@ -922,6 +925,7 @@ void __init setup_arch(char **cmdline_p)
922 setup_memory_end(); 925 setup_memory_end();
923 setup_memory(); 926 setup_memory();
924 dma_contiguous_reserve(memory_end); 927 dma_contiguous_reserve(memory_end);
928 vmcp_cma_reserve();
925 929
926 check_initrd(); 930 check_initrd();
927 reserve_crashkernel(); 931 reserve_crashkernel();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1020a11a24e5..1cee6753d47a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1181,6 +1181,7 @@ static int __init s390_smp_init(void)
1181 1181
1182 rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", 1182 rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
1183 smp_cpu_online, smp_cpu_pre_down); 1183 smp_cpu_online, smp_cpu_pre_down);
1184 rc = rc <= 0 ? rc : 0;
1184out: 1185out:
1185 return rc; 1186 return rc;
1186} 1187}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index 39e2f41b6cf0..c8ea715bfe10 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -98,10 +98,16 @@ int page_key_alloc(unsigned long pages)
98 */ 98 */
99void page_key_read(unsigned long *pfn) 99void page_key_read(unsigned long *pfn)
100{ 100{
101 struct page *page;
101 unsigned long addr; 102 unsigned long addr;
102 103 unsigned char key;
103 addr = (unsigned long) page_address(pfn_to_page(*pfn)); 104
104 *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); 105 page = pfn_to_page(*pfn);
106 addr = (unsigned long) page_address(page);
107 key = (unsigned char) page_get_storage_key(addr) & 0x7f;
108 if (arch_test_page_nodat(page))
109 key |= 0x80;
110 *(unsigned char *) pfn = key;
105} 111}
106 112
107/* 113/*
@@ -126,8 +132,16 @@ void page_key_memorize(unsigned long *pfn)
126 */ 132 */
127void page_key_write(void *address) 133void page_key_write(void *address)
128{ 134{
129 page_set_storage_key((unsigned long) address, 135 struct page *page;
130 page_key_rp->data[page_key_rx], 0); 136 unsigned char key;
137
138 key = page_key_rp->data[page_key_rx];
139 page_set_storage_key((unsigned long) address, key & 0x7f, 0);
140 page = virt_to_page(address);
141 if (key & 0x80)
142 arch_set_page_nodat(page, 0);
143 else
144 arch_set_page_dat(page, 0);
131 if (++page_key_rx >= PAGE_KEY_DATA_SIZE) 145 if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
132 return; 146 return;
133 page_key_rp = page_key_rp->next; 147 page_key_rp = page_key_rp->next;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 192efdfac918..5cbd52169348 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -51,8 +51,15 @@
51#include <asm/cio.h> 51#include <asm/cio.h>
52#include "entry.h" 52#include "entry.h"
53 53
54u64 sched_clock_base_cc = -1; /* Force to data section. */ 54unsigned char tod_clock_base[16] __aligned(8) = {
55EXPORT_SYMBOL_GPL(sched_clock_base_cc); 55 /* Force to data section. */
56 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
57 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
58};
59EXPORT_SYMBOL_GPL(tod_clock_base);
60
61u64 clock_comparator_max = -1ULL;
62EXPORT_SYMBOL_GPL(clock_comparator_max);
56 63
57static DEFINE_PER_CPU(struct clock_event_device, comparators); 64static DEFINE_PER_CPU(struct clock_event_device, comparators);
58 65
@@ -75,7 +82,7 @@ void __init time_early_init(void)
75 struct ptff_qui qui; 82 struct ptff_qui qui;
76 83
77 /* Initialize TOD steering parameters */ 84 /* Initialize TOD steering parameters */
78 tod_steering_end = sched_clock_base_cc; 85 tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
79 vdso_data->ts_end = tod_steering_end; 86 vdso_data->ts_end = tod_steering_end;
80 87
81 if (!test_facility(28)) 88 if (!test_facility(28))
@@ -111,22 +118,27 @@ unsigned long long monotonic_clock(void)
111} 118}
112EXPORT_SYMBOL(monotonic_clock); 119EXPORT_SYMBOL(monotonic_clock);
113 120
114static void tod_to_timeval(__u64 todval, struct timespec64 *xt) 121static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
115{ 122{
116 unsigned long long sec; 123 unsigned long long high, low, rem, sec, nsec;
124
125 /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */
126 high = (*(unsigned long long *) clk) >> 4;
127 low = (*(unsigned long long *)&clk[7]) << 4;
128 /* Calculate seconds and nano-seconds */
129 sec = high;
130 rem = do_div(sec, 1000000);
131 nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32;
117 132
118 sec = todval >> 12;
119 do_div(sec, 1000000);
120 xt->tv_sec = sec; 133 xt->tv_sec = sec;
121 todval -= (sec * 1000000) << 12; 134 xt->tv_nsec = nsec;
122 xt->tv_nsec = ((todval * 1000) >> 12);
123} 135}
124 136
125void clock_comparator_work(void) 137void clock_comparator_work(void)
126{ 138{
127 struct clock_event_device *cd; 139 struct clock_event_device *cd;
128 140
129 S390_lowcore.clock_comparator = -1ULL; 141 S390_lowcore.clock_comparator = clock_comparator_max;
130 cd = this_cpu_ptr(&comparators); 142 cd = this_cpu_ptr(&comparators);
131 cd->event_handler(cd); 143 cd->event_handler(cd);
132} 144}
@@ -148,7 +160,7 @@ void init_cpu_timer(void)
148 struct clock_event_device *cd; 160 struct clock_event_device *cd;
149 int cpu; 161 int cpu;
150 162
151 S390_lowcore.clock_comparator = -1ULL; 163 S390_lowcore.clock_comparator = clock_comparator_max;
152 set_clock_comparator(S390_lowcore.clock_comparator); 164 set_clock_comparator(S390_lowcore.clock_comparator);
153 165
154 cpu = smp_processor_id(); 166 cpu = smp_processor_id();
@@ -179,7 +191,7 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
179 unsigned long param64) 191 unsigned long param64)
180{ 192{
181 inc_irq_stat(IRQEXT_CLK); 193 inc_irq_stat(IRQEXT_CLK);
182 if (S390_lowcore.clock_comparator == -1ULL) 194 if (S390_lowcore.clock_comparator == clock_comparator_max)
183 set_clock_comparator(S390_lowcore.clock_comparator); 195 set_clock_comparator(S390_lowcore.clock_comparator);
184} 196}
185 197
@@ -197,18 +209,28 @@ static void stp_reset(void);
197 209
198void read_persistent_clock64(struct timespec64 *ts) 210void read_persistent_clock64(struct timespec64 *ts)
199{ 211{
200 __u64 clock; 212 unsigned char clk[STORE_CLOCK_EXT_SIZE];
213 __u64 delta;
201 214
202 clock = get_tod_clock() - initial_leap_seconds; 215 delta = initial_leap_seconds + TOD_UNIX_EPOCH;
203 tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); 216 get_tod_clock_ext(clk);
217 *(__u64 *) &clk[1] -= delta;
218 if (*(__u64 *) &clk[1] > delta)
219 clk[0]--;
220 ext_to_timespec64(clk, ts);
204} 221}
205 222
206void read_boot_clock64(struct timespec64 *ts) 223void read_boot_clock64(struct timespec64 *ts)
207{ 224{
208 __u64 clock; 225 unsigned char clk[STORE_CLOCK_EXT_SIZE];
226 __u64 delta;
209 227
210 clock = sched_clock_base_cc - initial_leap_seconds; 228 delta = initial_leap_seconds + TOD_UNIX_EPOCH;
211 tod_to_timeval(clock - TOD_UNIX_EPOCH, ts); 229 memcpy(clk, tod_clock_base, 16);
230 *(__u64 *) &clk[1] -= delta;
231 if (*(__u64 *) &clk[1] > delta)
232 clk[0]--;
233 ext_to_timespec64(clk, ts);
212} 234}
213 235
214static u64 read_tod_clock(struct clocksource *cs) 236static u64 read_tod_clock(struct clocksource *cs)
@@ -335,7 +357,7 @@ static unsigned long clock_sync_flags;
335 * source. If the clock mode is local it will return -EOPNOTSUPP and 357 * source. If the clock mode is local it will return -EOPNOTSUPP and
336 * -EAGAIN if the clock is not in sync with the external reference. 358 * -EAGAIN if the clock is not in sync with the external reference.
337 */ 359 */
338int get_phys_clock(unsigned long long *clock) 360int get_phys_clock(unsigned long *clock)
339{ 361{
340 atomic_t *sw_ptr; 362 atomic_t *sw_ptr;
341 unsigned int sw0, sw1; 363 unsigned int sw0, sw1;
@@ -406,7 +428,10 @@ static void clock_sync_global(unsigned long long delta)
406 struct ptff_qto qto; 428 struct ptff_qto qto;
407 429
408 /* Fixup the monotonic sched clock. */ 430 /* Fixup the monotonic sched clock. */
409 sched_clock_base_cc += delta; 431 *(unsigned long long *) &tod_clock_base[1] += delta;
432 if (*(unsigned long long *) &tod_clock_base[1] < delta)
433 /* Epoch overflow */
434 tod_clock_base[0]++;
410 /* Adjust TOD steering parameters. */ 435 /* Adjust TOD steering parameters. */
411 vdso_data->tb_update_count++; 436 vdso_data->tb_update_count++;
412 now = get_tod_clock(); 437 now = get_tod_clock();
@@ -437,7 +462,7 @@ static void clock_sync_global(unsigned long long delta)
437static void clock_sync_local(unsigned long long delta) 462static void clock_sync_local(unsigned long long delta)
438{ 463{
439 /* Add the delta to the clock comparator. */ 464 /* Add the delta to the clock comparator. */
440 if (S390_lowcore.clock_comparator != -1ULL) { 465 if (S390_lowcore.clock_comparator != clock_comparator_max) {
441 S390_lowcore.clock_comparator += delta; 466 S390_lowcore.clock_comparator += delta;
442 set_clock_comparator(S390_lowcore.clock_comparator); 467 set_clock_comparator(S390_lowcore.clock_comparator);
443 } 468 }
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index b89d19f6f2ab..eacda05b45d7 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -157,6 +157,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
157 page_frame = get_zeroed_page(GFP_KERNEL); 157 page_frame = get_zeroed_page(GFP_KERNEL);
158 if (!segment_table || !page_table || !page_frame) 158 if (!segment_table || !page_table || !page_frame)
159 goto out; 159 goto out;
160 arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER);
161 arch_set_page_dat(virt_to_page(page_table), 0);
160 162
161 /* Initialize per-cpu vdso data page */ 163 /* Initialize per-cpu vdso data page */
162 vd = (struct vdso_per_cpu_data *) page_frame; 164 vd = (struct vdso_per_cpu_data *) page_frame;
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index 8f048c2d6d13..263a7f9eee1e 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -2,6 +2,8 @@
2 * This is the infamous ld script for the 32 bits vdso 2 * This is the infamous ld script for the 32 bits vdso
3 * library 3 * library
4 */ 4 */
5
6#include <asm/page.h>
5#include <asm/vdso.h> 7#include <asm/vdso.h>
6 8
7OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") 9OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
@@ -91,7 +93,7 @@ SECTIONS
91 .debug_ranges 0 : { *(.debug_ranges) } 93 .debug_ranges 0 : { *(.debug_ranges) }
92 .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } 94 .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
93 95
94 . = ALIGN(4096); 96 . = ALIGN(PAGE_SIZE);
95 PROVIDE(_vdso_data = .); 97 PROVIDE(_vdso_data = .);
96 98
97 /DISCARD/ : { 99 /DISCARD/ : {
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index f35455d497fe..9e3dbbcc1cfc 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -2,6 +2,8 @@
2 * This is the infamous ld script for the 64 bits vdso 2 * This is the infamous ld script for the 64 bits vdso
3 * library 3 * library
4 */ 4 */
5
6#include <asm/page.h>
5#include <asm/vdso.h> 7#include <asm/vdso.h>
6 8
7OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") 9OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -91,7 +93,7 @@ SECTIONS
91 .debug_ranges 0 : { *(.debug_ranges) } 93 .debug_ranges 0 : { *(.debug_ranges) }
92 .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } 94 .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
93 95
94 . = ALIGN(4096); 96 . = ALIGN(PAGE_SIZE);
95 PROVIDE(_vdso_data = .); 97 PROVIDE(_vdso_data = .);
96 98
97 /DISCARD/ : { 99 /DISCARD/ : {
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index ce865bd4f81d..e4d36094aceb 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,7 +27,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
27 unsigned long prefix = kvm_s390_get_prefix(vcpu); 27 unsigned long prefix = kvm_s390_get_prefix(vcpu);
28 28
29 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; 29 start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
30 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; 30 end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
31 vcpu->stat.diagnose_10++; 31 vcpu->stat.diagnose_10++;
32 32
33 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end 33 if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
@@ -51,9 +51,9 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
51 */ 51 */
52 gmap_discard(vcpu->arch.gmap, start, prefix); 52 gmap_discard(vcpu->arch.gmap, start, prefix);
53 if (start <= prefix) 53 if (start <= prefix)
54 gmap_discard(vcpu->arch.gmap, 0, 4096); 54 gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
55 if (end > prefix + 4096) 55 if (end > prefix + PAGE_SIZE)
56 gmap_discard(vcpu->arch.gmap, 4096, 8192); 56 gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
57 gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); 57 gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
58 } 58 }
59 return 0; 59 return 0;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 653cae5e1ee1..3cc77391a102 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -629,7 +629,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
629 iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); 629 iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
630 if (asce.r) 630 if (asce.r)
631 goto real_address; 631 goto real_address;
632 ptr = asce.origin * 4096; 632 ptr = asce.origin * PAGE_SIZE;
633 switch (asce.dt) { 633 switch (asce.dt) {
634 case ASCE_TYPE_REGION1: 634 case ASCE_TYPE_REGION1:
635 if (vaddr.rfx01 > asce.tl) 635 if (vaddr.rfx01 > asce.tl)
@@ -674,7 +674,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
674 return PGM_REGION_SECOND_TRANS; 674 return PGM_REGION_SECOND_TRANS;
675 if (edat1) 675 if (edat1)
676 dat_protection |= rfte.p; 676 dat_protection |= rfte.p;
677 ptr = rfte.rto * 4096 + vaddr.rsx * 8; 677 ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
678 } 678 }
679 /* fallthrough */ 679 /* fallthrough */
680 case ASCE_TYPE_REGION2: { 680 case ASCE_TYPE_REGION2: {
@@ -692,7 +692,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
692 return PGM_REGION_THIRD_TRANS; 692 return PGM_REGION_THIRD_TRANS;
693 if (edat1) 693 if (edat1)
694 dat_protection |= rste.p; 694 dat_protection |= rste.p;
695 ptr = rste.rto * 4096 + vaddr.rtx * 8; 695 ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
696 } 696 }
697 /* fallthrough */ 697 /* fallthrough */
698 case ASCE_TYPE_REGION3: { 698 case ASCE_TYPE_REGION3: {
@@ -720,7 +720,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
720 return PGM_SEGMENT_TRANSLATION; 720 return PGM_SEGMENT_TRANSLATION;
721 if (edat1) 721 if (edat1)
722 dat_protection |= rtte.fc0.p; 722 dat_protection |= rtte.fc0.p;
723 ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8; 723 ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
724 } 724 }
725 /* fallthrough */ 725 /* fallthrough */
726 case ASCE_TYPE_SEGMENT: { 726 case ASCE_TYPE_SEGMENT: {
@@ -743,7 +743,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
743 goto absolute_address; 743 goto absolute_address;
744 } 744 }
745 dat_protection |= ste.fc0.p; 745 dat_protection |= ste.fc0.p;
746 ptr = ste.fc0.pto * 2048 + vaddr.px * 8; 746 ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
747 } 747 }
748 } 748 }
749 if (kvm_is_error_gpa(vcpu->kvm, ptr)) 749 if (kvm_is_error_gpa(vcpu->kvm, ptr))
@@ -993,7 +993,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
993 parent = sg->parent; 993 parent = sg->parent;
994 vaddr.addr = saddr; 994 vaddr.addr = saddr;
995 asce.val = sg->orig_asce; 995 asce.val = sg->orig_asce;
996 ptr = asce.origin * 4096; 996 ptr = asce.origin * PAGE_SIZE;
997 if (asce.r) { 997 if (asce.r) {
998 *fake = 1; 998 *fake = 1;
999 ptr = 0; 999 ptr = 0;
@@ -1029,7 +1029,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
1029 union region1_table_entry rfte; 1029 union region1_table_entry rfte;
1030 1030
1031 if (*fake) { 1031 if (*fake) {
1032 ptr += (unsigned long) vaddr.rfx << 53; 1032 ptr += vaddr.rfx * _REGION1_SIZE;
1033 rfte.val = ptr; 1033 rfte.val = ptr;
1034 goto shadow_r2t; 1034 goto shadow_r2t;
1035 } 1035 }
@@ -1044,7 +1044,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
1044 return PGM_REGION_SECOND_TRANS; 1044 return PGM_REGION_SECOND_TRANS;
1045 if (sg->edat_level >= 1) 1045 if (sg->edat_level >= 1)
1046 *dat_protection |= rfte.p; 1046 *dat_protection |= rfte.p;
1047 ptr = rfte.rto << 12UL; 1047 ptr = rfte.rto * PAGE_SIZE;
1048shadow_r2t: 1048shadow_r2t:
1049 rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); 1049 rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
1050 if (rc) 1050 if (rc)
@@ -1055,7 +1055,7 @@ shadow_r2t:
1055 union region2_table_entry rste; 1055 union region2_table_entry rste;
1056 1056
1057 if (*fake) { 1057 if (*fake) {
1058 ptr += (unsigned long) vaddr.rsx << 42; 1058 ptr += vaddr.rsx * _REGION2_SIZE;
1059 rste.val = ptr; 1059 rste.val = ptr;
1060 goto shadow_r3t; 1060 goto shadow_r3t;
1061 } 1061 }
@@ -1070,7 +1070,7 @@ shadow_r2t:
1070 return PGM_REGION_THIRD_TRANS; 1070 return PGM_REGION_THIRD_TRANS;
1071 if (sg->edat_level >= 1) 1071 if (sg->edat_level >= 1)
1072 *dat_protection |= rste.p; 1072 *dat_protection |= rste.p;
1073 ptr = rste.rto << 12UL; 1073 ptr = rste.rto * PAGE_SIZE;
1074shadow_r3t: 1074shadow_r3t:
1075 rste.p |= *dat_protection; 1075 rste.p |= *dat_protection;
1076 rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); 1076 rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
@@ -1082,7 +1082,7 @@ shadow_r3t:
1082 union region3_table_entry rtte; 1082 union region3_table_entry rtte;
1083 1083
1084 if (*fake) { 1084 if (*fake) {
1085 ptr += (unsigned long) vaddr.rtx << 31; 1085 ptr += vaddr.rtx * _REGION3_SIZE;
1086 rtte.val = ptr; 1086 rtte.val = ptr;
1087 goto shadow_sgt; 1087 goto shadow_sgt;
1088 } 1088 }
@@ -1098,7 +1098,7 @@ shadow_r3t:
1098 if (rtte.fc && sg->edat_level >= 2) { 1098 if (rtte.fc && sg->edat_level >= 2) {
1099 *dat_protection |= rtte.fc0.p; 1099 *dat_protection |= rtte.fc0.p;
1100 *fake = 1; 1100 *fake = 1;
1101 ptr = rtte.fc1.rfaa << 31UL; 1101 ptr = rtte.fc1.rfaa * _REGION3_SIZE;
1102 rtte.val = ptr; 1102 rtte.val = ptr;
1103 goto shadow_sgt; 1103 goto shadow_sgt;
1104 } 1104 }
@@ -1106,7 +1106,7 @@ shadow_r3t:
1106 return PGM_SEGMENT_TRANSLATION; 1106 return PGM_SEGMENT_TRANSLATION;
1107 if (sg->edat_level >= 1) 1107 if (sg->edat_level >= 1)
1108 *dat_protection |= rtte.fc0.p; 1108 *dat_protection |= rtte.fc0.p;
1109 ptr = rtte.fc0.sto << 12UL; 1109 ptr = rtte.fc0.sto * PAGE_SIZE;
1110shadow_sgt: 1110shadow_sgt:
1111 rtte.fc0.p |= *dat_protection; 1111 rtte.fc0.p |= *dat_protection;
1112 rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); 1112 rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
@@ -1118,7 +1118,7 @@ shadow_sgt:
1118 union segment_table_entry ste; 1118 union segment_table_entry ste;
1119 1119
1120 if (*fake) { 1120 if (*fake) {
1121 ptr += (unsigned long) vaddr.sx << 20; 1121 ptr += vaddr.sx * _SEGMENT_SIZE;
1122 ste.val = ptr; 1122 ste.val = ptr;
1123 goto shadow_pgt; 1123 goto shadow_pgt;
1124 } 1124 }
@@ -1134,11 +1134,11 @@ shadow_sgt:
1134 *dat_protection |= ste.fc0.p; 1134 *dat_protection |= ste.fc0.p;
1135 if (ste.fc && sg->edat_level >= 1) { 1135 if (ste.fc && sg->edat_level >= 1) {
1136 *fake = 1; 1136 *fake = 1;
1137 ptr = ste.fc1.sfaa << 20UL; 1137 ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
1138 ste.val = ptr; 1138 ste.val = ptr;
1139 goto shadow_pgt; 1139 goto shadow_pgt;
1140 } 1140 }
1141 ptr = ste.fc0.pto << 11UL; 1141 ptr = ste.fc0.pto * (PAGE_SIZE / 2);
1142shadow_pgt: 1142shadow_pgt:
1143 ste.fc0.p |= *dat_protection; 1143 ste.fc0.p |= *dat_protection;
1144 rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); 1144 rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
@@ -1187,8 +1187,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
1187 1187
1188 vaddr.addr = saddr; 1188 vaddr.addr = saddr;
1189 if (fake) { 1189 if (fake) {
1190 /* offset in 1MB guest memory block */ 1190 pte.val = pgt + vaddr.px * PAGE_SIZE;
1191 pte.val = pgt + ((unsigned long) vaddr.px << 12UL);
1192 goto shadow_page; 1191 goto shadow_page;
1193 } 1192 }
1194 if (!rc) 1193 if (!rc)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8a1dac793d6b..785ad028bde6 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -329,7 +329,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
329 start = kvm_s390_logical_to_effective(vcpu, start); 329 start = kvm_s390_logical_to_effective(vcpu, start);
330 if (m3 & SSKE_MB) { 330 if (m3 & SSKE_MB) {
331 /* start already designates an absolute address */ 331 /* start already designates an absolute address */
332 end = (start + (1UL << 20)) & ~((1UL << 20) - 1); 332 end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
333 } else { 333 } else {
334 start = kvm_s390_real_to_abs(vcpu, start); 334 start = kvm_s390_real_to_abs(vcpu, start);
335 end = start + PAGE_SIZE; 335 end = start + PAGE_SIZE;
@@ -893,10 +893,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
893 case 0x00000000: 893 case 0x00000000:
894 /* only 4k frames specify a real address */ 894 /* only 4k frames specify a real address */
895 start = kvm_s390_real_to_abs(vcpu, start); 895 start = kvm_s390_real_to_abs(vcpu, start);
896 end = (start + (1UL << 12)) & ~((1UL << 12) - 1); 896 end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1);
897 break; 897 break;
898 case 0x00001000: 898 case 0x00001000:
899 end = (start + (1UL << 20)) & ~((1UL << 20) - 1); 899 end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
900 break; 900 break;
901 case 0x00002000: 901 case 0x00002000:
902 /* only support 2G frame size if EDAT2 is available and we are 902 /* only support 2G frame size if EDAT2 is available and we are
@@ -904,7 +904,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
904 if (!test_kvm_facility(vcpu->kvm, 78) || 904 if (!test_kvm_facility(vcpu->kvm, 78) ||
905 psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT) 905 psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
906 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 906 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
907 end = (start + (1UL << 31)) & ~((1UL << 31) - 1); 907 end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1);
908 break; 908 break;
909 default: 909 default:
910 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 910 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 715c19c45d9a..ba8203e4d516 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1069,7 +1069,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
1069 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 1069 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
1070 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 1070 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
1071 1071
1072 BUILD_BUG_ON(sizeof(struct vsie_page) != 4096); 1072 BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
1073 scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL); 1073 scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
1074 1074
1075 /* 512 byte alignment */ 1075 /* 512 byte alignment */
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 92e90e40b6fb..7f17555ad4d5 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -57,7 +57,7 @@ static void __udelay_enabled(unsigned long long usecs)
57 end = get_tod_clock_fast() + (usecs << 12); 57 end = get_tod_clock_fast() + (usecs << 12);
58 do { 58 do {
59 clock_saved = 0; 59 clock_saved = 0;
60 if (end < S390_lowcore.clock_comparator) { 60 if (tod_after(S390_lowcore.clock_comparator, end)) {
61 clock_saved = local_tick_disable(); 61 clock_saved = local_tick_disable();
62 set_clock_comparator(end); 62 set_clock_comparator(end);
63 } 63 }
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ffb15bd4c593..b12663d653d8 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -32,42 +32,63 @@ static int __init spin_retry_setup(char *str)
32} 32}
33__setup("spin_retry=", spin_retry_setup); 33__setup("spin_retry=", spin_retry_setup);
34 34
35static inline int arch_load_niai4(int *lock)
36{
37 int owner;
38
39 asm volatile(
40#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
41 " .long 0xb2fa0040\n" /* NIAI 4 */
42#endif
43 " l %0,%1\n"
44 : "=d" (owner) : "Q" (*lock) : "memory");
45 return owner;
46}
47
48static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
49{
50 int expected = old;
51
52 asm volatile(
53#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
54 " .long 0xb2fa0080\n" /* NIAI 8 */
55#endif
56 " cs %0,%3,%1\n"
57 : "=d" (old), "=Q" (*lock)
58 : "0" (old), "d" (new), "Q" (*lock)
59 : "cc", "memory");
60 return expected == old;
61}
62
35void arch_spin_lock_wait(arch_spinlock_t *lp) 63void arch_spin_lock_wait(arch_spinlock_t *lp)
36{ 64{
37 int cpu = SPINLOCK_LOCKVAL; 65 int cpu = SPINLOCK_LOCKVAL;
38 int owner, count, first_diag; 66 int owner, count;
67
68 /* Pass the virtual CPU to the lock holder if it is not running */
69 owner = arch_load_niai4(&lp->lock);
70 if (owner && arch_vcpu_is_preempted(~owner))
71 smp_yield_cpu(~owner);
39 72
40 first_diag = 1; 73 count = spin_retry;
41 while (1) { 74 while (1) {
42 owner = ACCESS_ONCE(lp->lock); 75 owner = arch_load_niai4(&lp->lock);
43 /* Try to get the lock if it is free. */ 76 /* Try to get the lock if it is free. */
44 if (!owner) { 77 if (!owner) {
45 if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) 78 if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
46 return; 79 return;
47 continue; 80 continue;
48 } 81 }
49 /* First iteration: check if the lock owner is running. */ 82 if (count-- >= 0)
50 if (first_diag && arch_vcpu_is_preempted(~owner)) {
51 smp_yield_cpu(~owner);
52 first_diag = 0;
53 continue; 83 continue;
54 }
55 /* Loop for a while on the lock value. */
56 count = spin_retry; 84 count = spin_retry;
57 do {
58 owner = ACCESS_ONCE(lp->lock);
59 } while (owner && count-- > 0);
60 if (!owner)
61 continue;
62 /* 85 /*
63 * For multiple layers of hypervisors, e.g. z/VM + LPAR 86 * For multiple layers of hypervisors, e.g. z/VM + LPAR
64 * yield the CPU unconditionally. For LPAR rely on the 87 * yield the CPU unconditionally. For LPAR rely on the
65 * sense running status. 88 * sense running status.
66 */ 89 */
67 if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { 90 if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
68 smp_yield_cpu(~owner); 91 smp_yield_cpu(~owner);
69 first_diag = 0;
70 }
71 } 92 }
72} 93}
73EXPORT_SYMBOL(arch_spin_lock_wait); 94EXPORT_SYMBOL(arch_spin_lock_wait);
@@ -75,42 +96,36 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
75void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) 96void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
76{ 97{
77 int cpu = SPINLOCK_LOCKVAL; 98 int cpu = SPINLOCK_LOCKVAL;
78 int owner, count, first_diag; 99 int owner, count;
79 100
80 local_irq_restore(flags); 101 local_irq_restore(flags);
81 first_diag = 1; 102
103 /* Pass the virtual CPU to the lock holder if it is not running */
104 owner = arch_load_niai4(&lp->lock);
105 if (owner && arch_vcpu_is_preempted(~owner))
106 smp_yield_cpu(~owner);
107
108 count = spin_retry;
82 while (1) { 109 while (1) {
83 owner = ACCESS_ONCE(lp->lock); 110 owner = arch_load_niai4(&lp->lock);
84 /* Try to get the lock if it is free. */ 111 /* Try to get the lock if it is free. */
85 if (!owner) { 112 if (!owner) {
86 local_irq_disable(); 113 local_irq_disable();
87 if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu)) 114 if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
88 return; 115 return;
89 local_irq_restore(flags); 116 local_irq_restore(flags);
90 continue; 117 continue;
91 } 118 }
92 /* Check if the lock owner is running. */ 119 if (count-- >= 0)
93 if (first_diag && arch_vcpu_is_preempted(~owner)) {
94 smp_yield_cpu(~owner);
95 first_diag = 0;
96 continue; 120 continue;
97 }
98 /* Loop for a while on the lock value. */
99 count = spin_retry; 121 count = spin_retry;
100 do {
101 owner = ACCESS_ONCE(lp->lock);
102 } while (owner && count-- > 0);
103 if (!owner)
104 continue;
105 /* 122 /*
106 * For multiple layers of hypervisors, e.g. z/VM + LPAR 123 * For multiple layers of hypervisors, e.g. z/VM + LPAR
107 * yield the CPU unconditionally. For LPAR rely on the 124 * yield the CPU unconditionally. For LPAR rely on the
108 * sense running status. 125 * sense running status.
109 */ 126 */
110 if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { 127 if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
111 smp_yield_cpu(~owner); 128 smp_yield_cpu(~owner);
112 first_diag = 0;
113 }
114 } 129 }
115} 130}
116EXPORT_SYMBOL(arch_spin_lock_wait_flags); 131EXPORT_SYMBOL(arch_spin_lock_wait_flags);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index b3bd3f23b8e8..4ea9106417ee 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -15,8 +15,30 @@
15#include <asm/mmu_context.h> 15#include <asm/mmu_context.h>
16#include <asm/facility.h> 16#include <asm/facility.h>
17 17
18#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
18static DEFINE_STATIC_KEY_FALSE(have_mvcos); 19static DEFINE_STATIC_KEY_FALSE(have_mvcos);
19 20
21static int __init uaccess_init(void)
22{
23 if (test_facility(27))
24 static_branch_enable(&have_mvcos);
25 return 0;
26}
27early_initcall(uaccess_init);
28
29static inline int copy_with_mvcos(void)
30{
31 if (static_branch_likely(&have_mvcos))
32 return 1;
33 return 0;
34}
35#else
36static inline int copy_with_mvcos(void)
37{
38 return 1;
39}
40#endif
41
20static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, 42static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
21 unsigned long size) 43 unsigned long size)
22{ 44{
@@ -84,7 +106,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
84 106
85unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) 107unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
86{ 108{
87 if (static_branch_likely(&have_mvcos)) 109 if (copy_with_mvcos())
88 return copy_from_user_mvcos(to, from, n); 110 return copy_from_user_mvcos(to, from, n);
89 return copy_from_user_mvcp(to, from, n); 111 return copy_from_user_mvcp(to, from, n);
90} 112}
@@ -157,7 +179,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
157 179
158unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) 180unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
159{ 181{
160 if (static_branch_likely(&have_mvcos)) 182 if (copy_with_mvcos())
161 return copy_to_user_mvcos(to, from, n); 183 return copy_to_user_mvcos(to, from, n);
162 return copy_to_user_mvcs(to, from, n); 184 return copy_to_user_mvcs(to, from, n);
163} 185}
@@ -220,7 +242,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
220 242
221unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) 243unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
222{ 244{
223 if (static_branch_likely(&have_mvcos)) 245 if (copy_with_mvcos())
224 return copy_in_user_mvcos(to, from, n); 246 return copy_in_user_mvcos(to, from, n);
225 return copy_in_user_mvc(to, from, n); 247 return copy_in_user_mvc(to, from, n);
226} 248}
@@ -292,7 +314,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
292 314
293unsigned long __clear_user(void __user *to, unsigned long size) 315unsigned long __clear_user(void __user *to, unsigned long size)
294{ 316{
295 if (static_branch_likely(&have_mvcos)) 317 if (copy_with_mvcos())
296 return clear_user_mvcos(to, size); 318 return clear_user_mvcos(to, size);
297 return clear_user_xc(to, size); 319 return clear_user_xc(to, size);
298} 320}
@@ -349,11 +371,3 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
349 return done; 371 return done;
350} 372}
351EXPORT_SYMBOL(__strncpy_from_user); 373EXPORT_SYMBOL(__strncpy_from_user);
352
353static int __init uaccess_init(void)
354{
355 if (test_facility(27))
356 static_branch_enable(&have_mvcos);
357 return 0;
358}
359early_initcall(uaccess_init);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14f25798b001..bdabb013537b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -135,7 +135,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
135 pr_alert("AS:%016lx ", asce); 135 pr_alert("AS:%016lx ", asce);
136 switch (asce & _ASCE_TYPE_MASK) { 136 switch (asce & _ASCE_TYPE_MASK) {
137 case _ASCE_TYPE_REGION1: 137 case _ASCE_TYPE_REGION1:
138 table = table + ((address >> 53) & 0x7ff); 138 table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
139 if (bad_address(table)) 139 if (bad_address(table))
140 goto bad; 140 goto bad;
141 pr_cont("R1:%016lx ", *table); 141 pr_cont("R1:%016lx ", *table);
@@ -144,7 +144,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
144 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 144 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
145 /* fallthrough */ 145 /* fallthrough */
146 case _ASCE_TYPE_REGION2: 146 case _ASCE_TYPE_REGION2:
147 table = table + ((address >> 42) & 0x7ff); 147 table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
148 if (bad_address(table)) 148 if (bad_address(table))
149 goto bad; 149 goto bad;
150 pr_cont("R2:%016lx ", *table); 150 pr_cont("R2:%016lx ", *table);
@@ -153,7 +153,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
153 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 153 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
154 /* fallthrough */ 154 /* fallthrough */
155 case _ASCE_TYPE_REGION3: 155 case _ASCE_TYPE_REGION3:
156 table = table + ((address >> 31) & 0x7ff); 156 table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
157 if (bad_address(table)) 157 if (bad_address(table))
158 goto bad; 158 goto bad;
159 pr_cont("R3:%016lx ", *table); 159 pr_cont("R3:%016lx ", *table);
@@ -162,7 +162,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
162 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 162 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
163 /* fallthrough */ 163 /* fallthrough */
164 case _ASCE_TYPE_SEGMENT: 164 case _ASCE_TYPE_SEGMENT:
165 table = table + ((address >> 20) & 0x7ff); 165 table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
166 if (bad_address(table)) 166 if (bad_address(table))
167 goto bad; 167 goto bad;
168 pr_cont("S:%016lx ", *table); 168 pr_cont("S:%016lx ", *table);
@@ -170,7 +170,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
170 goto out; 170 goto out;
171 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); 171 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
172 } 172 }
173 table = table + ((address >> 12) & 0xff); 173 table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
174 if (bad_address(table)) 174 if (bad_address(table))
175 goto bad; 175 goto bad;
176 pr_cont("P:%016lx ", *table); 176 pr_cont("P:%016lx ", *table);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4fb3d3cdb370..9e1494e3d849 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -36,16 +36,16 @@ static struct gmap *gmap_alloc(unsigned long limit)
36 unsigned long *table; 36 unsigned long *table;
37 unsigned long etype, atype; 37 unsigned long etype, atype;
38 38
39 if (limit < (1UL << 31)) { 39 if (limit < _REGION3_SIZE) {
40 limit = (1UL << 31) - 1; 40 limit = _REGION3_SIZE - 1;
41 atype = _ASCE_TYPE_SEGMENT; 41 atype = _ASCE_TYPE_SEGMENT;
42 etype = _SEGMENT_ENTRY_EMPTY; 42 etype = _SEGMENT_ENTRY_EMPTY;
43 } else if (limit < (1UL << 42)) { 43 } else if (limit < _REGION2_SIZE) {
44 limit = (1UL << 42) - 1; 44 limit = _REGION2_SIZE - 1;
45 atype = _ASCE_TYPE_REGION3; 45 atype = _ASCE_TYPE_REGION3;
46 etype = _REGION3_ENTRY_EMPTY; 46 etype = _REGION3_ENTRY_EMPTY;
47 } else if (limit < (1UL << 53)) { 47 } else if (limit < _REGION1_SIZE) {
48 limit = (1UL << 53) - 1; 48 limit = _REGION1_SIZE - 1;
49 atype = _ASCE_TYPE_REGION2; 49 atype = _ASCE_TYPE_REGION2;
50 etype = _REGION2_ENTRY_EMPTY; 50 etype = _REGION2_ENTRY_EMPTY;
51 } else { 51 } else {
@@ -65,7 +65,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
65 spin_lock_init(&gmap->guest_table_lock); 65 spin_lock_init(&gmap->guest_table_lock);
66 spin_lock_init(&gmap->shadow_lock); 66 spin_lock_init(&gmap->shadow_lock);
67 atomic_set(&gmap->ref_count, 1); 67 atomic_set(&gmap->ref_count, 1);
68 page = alloc_pages(GFP_KERNEL, 2); 68 page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
69 if (!page) 69 if (!page)
70 goto out_free; 70 goto out_free;
71 page->index = 0; 71 page->index = 0;
@@ -186,7 +186,7 @@ static void gmap_free(struct gmap *gmap)
186 gmap_flush_tlb(gmap); 186 gmap_flush_tlb(gmap);
187 /* Free all segment & region tables. */ 187 /* Free all segment & region tables. */
188 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) 188 list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
189 __free_pages(page, 2); 189 __free_pages(page, CRST_ALLOC_ORDER);
190 gmap_radix_tree_free(&gmap->guest_to_host); 190 gmap_radix_tree_free(&gmap->guest_to_host);
191 gmap_radix_tree_free(&gmap->host_to_guest); 191 gmap_radix_tree_free(&gmap->host_to_guest);
192 192
@@ -306,7 +306,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
306 unsigned long *new; 306 unsigned long *new;
307 307
308 /* since we dont free the gmap table until gmap_free we can unlock */ 308 /* since we dont free the gmap table until gmap_free we can unlock */
309 page = alloc_pages(GFP_KERNEL, 2); 309 page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
310 if (!page) 310 if (!page)
311 return -ENOMEM; 311 return -ENOMEM;
312 new = (unsigned long *) page_to_phys(page); 312 new = (unsigned long *) page_to_phys(page);
@@ -321,7 +321,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
321 } 321 }
322 spin_unlock(&gmap->guest_table_lock); 322 spin_unlock(&gmap->guest_table_lock);
323 if (page) 323 if (page)
324 __free_pages(page, 2); 324 __free_pages(page, CRST_ALLOC_ORDER);
325 return 0; 325 return 0;
326} 326}
327 327
@@ -546,30 +546,30 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
546 /* Create higher level tables in the gmap page table */ 546 /* Create higher level tables in the gmap page table */
547 table = gmap->table; 547 table = gmap->table;
548 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 548 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
549 table += (gaddr >> 53) & 0x7ff; 549 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
550 if ((*table & _REGION_ENTRY_INVALID) && 550 if ((*table & _REGION_ENTRY_INVALID) &&
551 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 551 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
552 gaddr & 0xffe0000000000000UL)) 552 gaddr & _REGION1_MASK))
553 return -ENOMEM; 553 return -ENOMEM;
554 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 554 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
555 } 555 }
556 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 556 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
557 table += (gaddr >> 42) & 0x7ff; 557 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
558 if ((*table & _REGION_ENTRY_INVALID) && 558 if ((*table & _REGION_ENTRY_INVALID) &&
559 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 559 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
560 gaddr & 0xfffffc0000000000UL)) 560 gaddr & _REGION2_MASK))
561 return -ENOMEM; 561 return -ENOMEM;
562 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 562 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
563 } 563 }
564 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 564 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
565 table += (gaddr >> 31) & 0x7ff; 565 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
566 if ((*table & _REGION_ENTRY_INVALID) && 566 if ((*table & _REGION_ENTRY_INVALID) &&
567 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 567 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
568 gaddr & 0xffffffff80000000UL)) 568 gaddr & _REGION3_MASK))
569 return -ENOMEM; 569 return -ENOMEM;
570 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 570 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
571 } 571 }
572 table += (gaddr >> 20) & 0x7ff; 572 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
573 /* Walk the parent mm page table */ 573 /* Walk the parent mm page table */
574 mm = gmap->mm; 574 mm = gmap->mm;
575 pgd = pgd_offset(mm, vmaddr); 575 pgd = pgd_offset(mm, vmaddr);
@@ -771,7 +771,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
771 table = gmap->table; 771 table = gmap->table;
772 switch (gmap->asce & _ASCE_TYPE_MASK) { 772 switch (gmap->asce & _ASCE_TYPE_MASK) {
773 case _ASCE_TYPE_REGION1: 773 case _ASCE_TYPE_REGION1:
774 table += (gaddr >> 53) & 0x7ff; 774 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
775 if (level == 4) 775 if (level == 4)
776 break; 776 break;
777 if (*table & _REGION_ENTRY_INVALID) 777 if (*table & _REGION_ENTRY_INVALID)
@@ -779,7 +779,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
779 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 779 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
780 /* Fallthrough */ 780 /* Fallthrough */
781 case _ASCE_TYPE_REGION2: 781 case _ASCE_TYPE_REGION2:
782 table += (gaddr >> 42) & 0x7ff; 782 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
783 if (level == 3) 783 if (level == 3)
784 break; 784 break;
785 if (*table & _REGION_ENTRY_INVALID) 785 if (*table & _REGION_ENTRY_INVALID)
@@ -787,7 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
787 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 787 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
788 /* Fallthrough */ 788 /* Fallthrough */
789 case _ASCE_TYPE_REGION3: 789 case _ASCE_TYPE_REGION3:
790 table += (gaddr >> 31) & 0x7ff; 790 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
791 if (level == 2) 791 if (level == 2)
792 break; 792 break;
793 if (*table & _REGION_ENTRY_INVALID) 793 if (*table & _REGION_ENTRY_INVALID)
@@ -795,13 +795,13 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
795 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 795 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
796 /* Fallthrough */ 796 /* Fallthrough */
797 case _ASCE_TYPE_SEGMENT: 797 case _ASCE_TYPE_SEGMENT:
798 table += (gaddr >> 20) & 0x7ff; 798 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
799 if (level == 1) 799 if (level == 1)
800 break; 800 break;
801 if (*table & _REGION_ENTRY_INVALID) 801 if (*table & _REGION_ENTRY_INVALID)
802 return NULL; 802 return NULL;
803 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); 803 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
804 table += (gaddr >> 12) & 0xff; 804 table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
805 } 805 }
806 return table; 806 return table;
807} 807}
@@ -1126,7 +1126,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
1126 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ 1126 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
1127 if (!table || *table & _PAGE_INVALID) 1127 if (!table || *table & _PAGE_INVALID)
1128 return; 1128 return;
1129 gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1); 1129 gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
1130 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); 1130 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
1131} 1131}
1132 1132
@@ -1144,7 +1144,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
1144 int i; 1144 int i;
1145 1145
1146 BUG_ON(!gmap_is_shadow(sg)); 1146 BUG_ON(!gmap_is_shadow(sg));
1147 for (i = 0; i < 256; i++, raddr += 1UL << 12) 1147 for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
1148 pgt[i] = _PAGE_INVALID; 1148 pgt[i] = _PAGE_INVALID;
1149} 1149}
1150 1150
@@ -1164,8 +1164,8 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
1164 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ 1164 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
1165 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) 1165 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
1166 return; 1166 return;
1167 gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1); 1167 gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
1168 sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff)); 1168 sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
1169 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); 1169 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
1170 pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); 1170 pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
1171 *ste = _SEGMENT_ENTRY_EMPTY; 1171 *ste = _SEGMENT_ENTRY_EMPTY;
@@ -1193,7 +1193,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
1193 1193
1194 BUG_ON(!gmap_is_shadow(sg)); 1194 BUG_ON(!gmap_is_shadow(sg));
1195 asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; 1195 asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
1196 for (i = 0; i < 2048; i++, raddr += 1UL << 20) { 1196 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
1197 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) 1197 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
1198 continue; 1198 continue;
1199 pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); 1199 pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
@@ -1222,8 +1222,8 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
1222 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ 1222 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
1223 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) 1223 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
1224 return; 1224 return;
1225 gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1); 1225 gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
1226 r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff)); 1226 r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
1227 gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); 1227 gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
1228 sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); 1228 sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
1229 *r3e = _REGION3_ENTRY_EMPTY; 1229 *r3e = _REGION3_ENTRY_EMPTY;
@@ -1231,7 +1231,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
1231 /* Free segment table */ 1231 /* Free segment table */
1232 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); 1232 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
1233 list_del(&page->lru); 1233 list_del(&page->lru);
1234 __free_pages(page, 2); 1234 __free_pages(page, CRST_ALLOC_ORDER);
1235} 1235}
1236 1236
1237/** 1237/**
@@ -1251,7 +1251,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
1251 1251
1252 BUG_ON(!gmap_is_shadow(sg)); 1252 BUG_ON(!gmap_is_shadow(sg));
1253 asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; 1253 asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
1254 for (i = 0; i < 2048; i++, raddr += 1UL << 31) { 1254 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
1255 if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) 1255 if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
1256 continue; 1256 continue;
1257 sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); 1257 sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
@@ -1260,7 +1260,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
1260 /* Free segment table */ 1260 /* Free segment table */
1261 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); 1261 page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
1262 list_del(&page->lru); 1262 list_del(&page->lru);
1263 __free_pages(page, 2); 1263 __free_pages(page, CRST_ALLOC_ORDER);
1264 } 1264 }
1265} 1265}
1266 1266
@@ -1280,8 +1280,8 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
1280 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ 1280 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
1281 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) 1281 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
1282 return; 1282 return;
1283 gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1); 1283 gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
1284 r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff)); 1284 r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
1285 gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); 1285 gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
1286 r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); 1286 r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
1287 *r2e = _REGION2_ENTRY_EMPTY; 1287 *r2e = _REGION2_ENTRY_EMPTY;
@@ -1289,7 +1289,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
1289 /* Free region 3 table */ 1289 /* Free region 3 table */
1290 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); 1290 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
1291 list_del(&page->lru); 1291 list_del(&page->lru);
1292 __free_pages(page, 2); 1292 __free_pages(page, CRST_ALLOC_ORDER);
1293} 1293}
1294 1294
1295/** 1295/**
@@ -1309,7 +1309,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
1309 1309
1310 BUG_ON(!gmap_is_shadow(sg)); 1310 BUG_ON(!gmap_is_shadow(sg));
1311 asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; 1311 asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
1312 for (i = 0; i < 2048; i++, raddr += 1UL << 42) { 1312 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
1313 if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) 1313 if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
1314 continue; 1314 continue;
1315 r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); 1315 r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
@@ -1318,7 +1318,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
1318 /* Free region 3 table */ 1318 /* Free region 3 table */
1319 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); 1319 page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
1320 list_del(&page->lru); 1320 list_del(&page->lru);
1321 __free_pages(page, 2); 1321 __free_pages(page, CRST_ALLOC_ORDER);
1322 } 1322 }
1323} 1323}
1324 1324
@@ -1338,8 +1338,8 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
1338 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ 1338 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
1339 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) 1339 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
1340 return; 1340 return;
1341 gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1); 1341 gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
1342 r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff)); 1342 r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
1343 gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); 1343 gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
1344 r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); 1344 r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
1345 *r1e = _REGION1_ENTRY_EMPTY; 1345 *r1e = _REGION1_ENTRY_EMPTY;
@@ -1347,7 +1347,7 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
1347 /* Free region 2 table */ 1347 /* Free region 2 table */
1348 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); 1348 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
1349 list_del(&page->lru); 1349 list_del(&page->lru);
1350 __free_pages(page, 2); 1350 __free_pages(page, CRST_ALLOC_ORDER);
1351} 1351}
1352 1352
1353/** 1353/**
@@ -1367,7 +1367,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
1367 1367
1368 BUG_ON(!gmap_is_shadow(sg)); 1368 BUG_ON(!gmap_is_shadow(sg));
1369 asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; 1369 asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
1370 for (i = 0; i < 2048; i++, raddr += 1UL << 53) { 1370 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
1371 if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) 1371 if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
1372 continue; 1372 continue;
1373 r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); 1373 r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
@@ -1378,7 +1378,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
1378 /* Free region 2 table */ 1378 /* Free region 2 table */
1379 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); 1379 page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
1380 list_del(&page->lru); 1380 list_del(&page->lru);
1381 __free_pages(page, 2); 1381 __free_pages(page, CRST_ALLOC_ORDER);
1382 } 1382 }
1383} 1383}
1384 1384
@@ -1535,7 +1535,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
1535 /* protect after insertion, so it will get properly invalidated */ 1535 /* protect after insertion, so it will get properly invalidated */
1536 down_read(&parent->mm->mmap_sem); 1536 down_read(&parent->mm->mmap_sem);
1537 rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, 1537 rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
1538 ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096, 1538 ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
1539 PROT_READ, PGSTE_VSIE_BIT); 1539 PROT_READ, PGSTE_VSIE_BIT);
1540 up_read(&parent->mm->mmap_sem); 1540 up_read(&parent->mm->mmap_sem);
1541 spin_lock(&parent->shadow_lock); 1541 spin_lock(&parent->shadow_lock);
@@ -1578,7 +1578,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
1578 1578
1579 BUG_ON(!gmap_is_shadow(sg)); 1579 BUG_ON(!gmap_is_shadow(sg));
1580 /* Allocate a shadow region second table */ 1580 /* Allocate a shadow region second table */
1581 page = alloc_pages(GFP_KERNEL, 2); 1581 page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
1582 if (!page) 1582 if (!page)
1583 return -ENOMEM; 1583 return -ENOMEM;
1584 page->index = r2t & _REGION_ENTRY_ORIGIN; 1584 page->index = r2t & _REGION_ENTRY_ORIGIN;
@@ -1614,10 +1614,10 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
1614 } 1614 }
1615 spin_unlock(&sg->guest_table_lock); 1615 spin_unlock(&sg->guest_table_lock);
1616 /* Make r2t read-only in parent gmap page table */ 1616 /* Make r2t read-only in parent gmap page table */
1617 raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1; 1617 raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
1618 origin = r2t & _REGION_ENTRY_ORIGIN; 1618 origin = r2t & _REGION_ENTRY_ORIGIN;
1619 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1619 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1620 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1620 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
1621 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1621 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
1622 spin_lock(&sg->guest_table_lock); 1622 spin_lock(&sg->guest_table_lock);
1623 if (!rc) { 1623 if (!rc) {
@@ -1634,7 +1634,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
1634 return rc; 1634 return rc;
1635out_free: 1635out_free:
1636 spin_unlock(&sg->guest_table_lock); 1636 spin_unlock(&sg->guest_table_lock);
1637 __free_pages(page, 2); 1637 __free_pages(page, CRST_ALLOC_ORDER);
1638 return rc; 1638 return rc;
1639} 1639}
1640EXPORT_SYMBOL_GPL(gmap_shadow_r2t); 1640EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
@@ -1662,7 +1662,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
1662 1662
1663 BUG_ON(!gmap_is_shadow(sg)); 1663 BUG_ON(!gmap_is_shadow(sg));
1664 /* Allocate a shadow region second table */ 1664 /* Allocate a shadow region second table */
1665 page = alloc_pages(GFP_KERNEL, 2); 1665 page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
1666 if (!page) 1666 if (!page)
1667 return -ENOMEM; 1667 return -ENOMEM;
1668 page->index = r3t & _REGION_ENTRY_ORIGIN; 1668 page->index = r3t & _REGION_ENTRY_ORIGIN;
@@ -1697,10 +1697,10 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
1697 } 1697 }
1698 spin_unlock(&sg->guest_table_lock); 1698 spin_unlock(&sg->guest_table_lock);
1699 /* Make r3t read-only in parent gmap page table */ 1699 /* Make r3t read-only in parent gmap page table */
1700 raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2; 1700 raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
1701 origin = r3t & _REGION_ENTRY_ORIGIN; 1701 origin = r3t & _REGION_ENTRY_ORIGIN;
1702 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1702 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1703 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1703 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
1704 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1704 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
1705 spin_lock(&sg->guest_table_lock); 1705 spin_lock(&sg->guest_table_lock);
1706 if (!rc) { 1706 if (!rc) {
@@ -1717,7 +1717,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
1717 return rc; 1717 return rc;
1718out_free: 1718out_free:
1719 spin_unlock(&sg->guest_table_lock); 1719 spin_unlock(&sg->guest_table_lock);
1720 __free_pages(page, 2); 1720 __free_pages(page, CRST_ALLOC_ORDER);
1721 return rc; 1721 return rc;
1722} 1722}
1723EXPORT_SYMBOL_GPL(gmap_shadow_r3t); 1723EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
@@ -1745,7 +1745,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
1745 1745
1746 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); 1746 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
1747 /* Allocate a shadow segment table */ 1747 /* Allocate a shadow segment table */
1748 page = alloc_pages(GFP_KERNEL, 2); 1748 page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
1749 if (!page) 1749 if (!page)
1750 return -ENOMEM; 1750 return -ENOMEM;
1751 page->index = sgt & _REGION_ENTRY_ORIGIN; 1751 page->index = sgt & _REGION_ENTRY_ORIGIN;
@@ -1781,10 +1781,10 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
1781 } 1781 }
1782 spin_unlock(&sg->guest_table_lock); 1782 spin_unlock(&sg->guest_table_lock);
1783 /* Make sgt read-only in parent gmap page table */ 1783 /* Make sgt read-only in parent gmap page table */
1784 raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3; 1784 raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
1785 origin = sgt & _REGION_ENTRY_ORIGIN; 1785 origin = sgt & _REGION_ENTRY_ORIGIN;
1786 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096; 1786 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1787 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset; 1787 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
1788 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ); 1788 rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
1789 spin_lock(&sg->guest_table_lock); 1789 spin_lock(&sg->guest_table_lock);
1790 if (!rc) { 1790 if (!rc) {
@@ -1801,7 +1801,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
1801 return rc; 1801 return rc;
1802out_free: 1802out_free:
1803 spin_unlock(&sg->guest_table_lock); 1803 spin_unlock(&sg->guest_table_lock);
1804 __free_pages(page, 2); 1804 __free_pages(page, CRST_ALLOC_ORDER);
1805 return rc; 1805 return rc;
1806} 1806}
1807EXPORT_SYMBOL_GPL(gmap_shadow_sgt); 1807EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
@@ -1902,7 +1902,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
1902 } 1902 }
1903 spin_unlock(&sg->guest_table_lock); 1903 spin_unlock(&sg->guest_table_lock);
1904 /* Make pgt read-only in parent gmap page table (not the pgste) */ 1904 /* Make pgt read-only in parent gmap page table (not the pgste) */
1905 raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT; 1905 raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
1906 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; 1906 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
1907 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ); 1907 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
1908 spin_lock(&sg->guest_table_lock); 1908 spin_lock(&sg->guest_table_lock);
@@ -2021,7 +2021,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
2021 } 2021 }
2022 /* Check for top level table */ 2022 /* Check for top level table */
2023 start = sg->orig_asce & _ASCE_ORIGIN; 2023 start = sg->orig_asce & _ASCE_ORIGIN;
2024 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096; 2024 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
2025 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && 2025 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
2026 gaddr < end) { 2026 gaddr < end) {
2027 /* The complete shadow table has to go */ 2027 /* The complete shadow table has to go */
@@ -2032,7 +2032,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
2032 return; 2032 return;
2033 } 2033 }
2034 /* Remove the page table tree from on specific entry */ 2034 /* Remove the page table tree from on specific entry */
2035 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12); 2035 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
2036 gmap_for_each_rmap_safe(rmap, rnext, head) { 2036 gmap_for_each_rmap_safe(rmap, rnext, head) {
2037 bits = rmap->raddr & _SHADOW_RMAP_MASK; 2037 bits = rmap->raddr & _SHADOW_RMAP_MASK;
2038 raddr = rmap->raddr ^ bits; 2038 raddr = rmap->raddr ^ bits;
@@ -2076,7 +2076,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
2076 struct gmap *gmap, *sg, *next; 2076 struct gmap *gmap, *sg, *next;
2077 2077
2078 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 2078 offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
2079 offset = offset * (4096 / sizeof(pte_t)); 2079 offset = offset * (PAGE_SIZE / sizeof(pte_t));
2080 rcu_read_lock(); 2080 rcu_read_lock();
2081 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2081 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2082 spin_lock(&gmap->guest_table_lock); 2082 spin_lock(&gmap->guest_table_lock);
@@ -2121,6 +2121,37 @@ static inline void thp_split_mm(struct mm_struct *mm)
2121} 2121}
2122 2122
2123/* 2123/*
2124 * Remove all empty zero pages from the mapping for lazy refaulting
2125 * - This must be called after mm->context.has_pgste is set, to avoid
2126 * future creation of zero pages
2127 * - This must be called after THP was enabled
2128 */
2129static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
2130 unsigned long end, struct mm_walk *walk)
2131{
2132 unsigned long addr;
2133
2134 for (addr = start; addr != end; addr += PAGE_SIZE) {
2135 pte_t *ptep;
2136 spinlock_t *ptl;
2137
2138 ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
2139 if (is_zero_pfn(pte_pfn(*ptep)))
2140 ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
2141 pte_unmap_unlock(ptep, ptl);
2142 }
2143 return 0;
2144}
2145
2146static inline void zap_zero_pages(struct mm_struct *mm)
2147{
2148 struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
2149
2150 walk.mm = mm;
2151 walk_page_range(0, TASK_SIZE, &walk);
2152}
2153
2154/*
2124 * switch on pgstes for its userspace process (for kvm) 2155 * switch on pgstes for its userspace process (for kvm)
2125 */ 2156 */
2126int s390_enable_sie(void) 2157int s390_enable_sie(void)
@@ -2137,6 +2168,7 @@ int s390_enable_sie(void)
2137 mm->context.has_pgste = 1; 2168 mm->context.has_pgste = 1;
2138 /* split thp mappings and disable thp for future mappings */ 2169 /* split thp mappings and disable thp for future mappings */
2139 thp_split_mm(mm); 2170 thp_split_mm(mm);
2171 zap_zero_pages(mm);
2140 up_write(&mm->mmap_sem); 2172 up_write(&mm->mmap_sem);
2141 return 0; 2173 return 0;
2142} 2174}
@@ -2149,13 +2181,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
2149static int __s390_enable_skey(pte_t *pte, unsigned long addr, 2181static int __s390_enable_skey(pte_t *pte, unsigned long addr,
2150 unsigned long next, struct mm_walk *walk) 2182 unsigned long next, struct mm_walk *walk)
2151{ 2183{
2152 /*
2153 * Remove all zero page mappings,
2154 * after establishing a policy to forbid zero page mappings
2155 * following faults for that page will get fresh anonymous pages
2156 */
2157 if (is_zero_pfn(pte_pfn(*pte)))
2158 ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
2159 /* Clear storage key */ 2184 /* Clear storage key */
2160 ptep_zap_key(walk->mm, addr, pte); 2185 ptep_zap_key(walk->mm, addr, pte);
2161 return 0; 2186 return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8111694ce55a..3b567838b905 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -26,6 +26,7 @@
26#include <linux/poison.h> 26#include <linux/poison.h>
27#include <linux/initrd.h> 27#include <linux/initrd.h>
28#include <linux/export.h> 28#include <linux/export.h>
29#include <linux/cma.h>
29#include <linux/gfp.h> 30#include <linux/gfp.h>
30#include <linux/memblock.h> 31#include <linux/memblock.h>
31#include <asm/processor.h> 32#include <asm/processor.h>
@@ -84,7 +85,7 @@ void __init paging_init(void)
84 psw_t psw; 85 psw_t psw;
85 86
86 init_mm.pgd = swapper_pg_dir; 87 init_mm.pgd = swapper_pg_dir;
87 if (VMALLOC_END > (1UL << 42)) { 88 if (VMALLOC_END > _REGION2_SIZE) {
88 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 89 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
89 pgd_type = _REGION2_ENTRY_EMPTY; 90 pgd_type = _REGION2_ENTRY_EMPTY;
90 } else { 91 } else {
@@ -93,8 +94,7 @@ void __init paging_init(void)
93 } 94 }
94 init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; 95 init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
95 S390_lowcore.kernel_asce = init_mm.context.asce; 96 S390_lowcore.kernel_asce = init_mm.context.asce;
96 clear_table((unsigned long *) init_mm.pgd, pgd_type, 97 crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
97 sizeof(unsigned long)*2048);
98 vmem_map_init(); 98 vmem_map_init();
99 99
100 /* enable virtual mapping in kernel mode */ 100 /* enable virtual mapping in kernel mode */
@@ -137,6 +137,8 @@ void __init mem_init(void)
137 free_all_bootmem(); 137 free_all_bootmem();
138 setup_zero_pages(); /* Setup zeroed pages. */ 138 setup_zero_pages(); /* Setup zeroed pages. */
139 139
140 cmma_init_nodat();
141
140 mem_init_print_info(NULL); 142 mem_init_print_info(NULL);
141} 143}
142 144
@@ -166,6 +168,58 @@ unsigned long memory_block_size_bytes(void)
166} 168}
167 169
168#ifdef CONFIG_MEMORY_HOTPLUG 170#ifdef CONFIG_MEMORY_HOTPLUG
171
172#ifdef CONFIG_CMA
173
174/* Prevent memory blocks which contain cma regions from going offline */
175
176struct s390_cma_mem_data {
177 unsigned long start;
178 unsigned long end;
179};
180
181static int s390_cma_check_range(struct cma *cma, void *data)
182{
183 struct s390_cma_mem_data *mem_data;
184 unsigned long start, end;
185
186 mem_data = data;
187 start = cma_get_base(cma);
188 end = start + cma_get_size(cma);
189 if (end < mem_data->start)
190 return 0;
191 if (start >= mem_data->end)
192 return 0;
193 return -EBUSY;
194}
195
196static int s390_cma_mem_notifier(struct notifier_block *nb,
197 unsigned long action, void *data)
198{
199 struct s390_cma_mem_data mem_data;
200 struct memory_notify *arg;
201 int rc = 0;
202
203 arg = data;
204 mem_data.start = arg->start_pfn << PAGE_SHIFT;
205 mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
206 if (action == MEM_GOING_OFFLINE)
207 rc = cma_for_each_area(s390_cma_check_range, &mem_data);
208 return notifier_from_errno(rc);
209}
210
211static struct notifier_block s390_cma_mem_nb = {
212 .notifier_call = s390_cma_mem_notifier,
213};
214
215static int __init s390_cma_mem_init(void)
216{
217 return register_memory_notifier(&s390_cma_mem_nb);
218}
219device_initcall(s390_cma_mem_init);
220
221#endif /* CONFIG_CMA */
222
169int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) 223int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
170{ 224{
171 unsigned long start_pfn = PFN_DOWN(start); 225 unsigned long start_pfn = PFN_DOWN(start);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 69a7b01ae746..07fa7b8ae233 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -10,9 +10,10 @@
10#include <linux/errno.h> 10#include <linux/errno.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/memblock.h>
13#include <linux/gfp.h> 14#include <linux/gfp.h>
14#include <linux/init.h> 15#include <linux/init.h>
15 16#include <asm/facility.h>
16#include <asm/page-states.h> 17#include <asm/page-states.h>
17 18
18static int cmma_flag = 1; 19static int cmma_flag = 1;
@@ -36,14 +37,16 @@ __setup("cmma=", cmma);
36static inline int cmma_test_essa(void) 37static inline int cmma_test_essa(void)
37{ 38{
38 register unsigned long tmp asm("0") = 0; 39 register unsigned long tmp asm("0") = 0;
39 register int rc asm("1") = -EOPNOTSUPP; 40 register int rc asm("1");
40 41
42 /* test ESSA_GET_STATE */
41 asm volatile( 43 asm volatile(
42 " .insn rrf,0xb9ab0000,%1,%1,0,0\n" 44 " .insn rrf,0xb9ab0000,%1,%1,%2,0\n"
43 "0: la %0,0\n" 45 "0: la %0,0\n"
44 "1:\n" 46 "1:\n"
45 EX_TABLE(0b,1b) 47 EX_TABLE(0b,1b)
46 : "+&d" (rc), "+&d" (tmp)); 48 : "=&d" (rc), "+&d" (tmp)
49 : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP));
47 return rc; 50 return rc;
48} 51}
49 52
@@ -51,11 +54,26 @@ void __init cmma_init(void)
51{ 54{
52 if (!cmma_flag) 55 if (!cmma_flag)
53 return; 56 return;
54 if (cmma_test_essa()) 57 if (cmma_test_essa()) {
55 cmma_flag = 0; 58 cmma_flag = 0;
59 return;
60 }
61 if (test_facility(147))
62 cmma_flag = 2;
56} 63}
57 64
58static inline void set_page_unstable(struct page *page, int order) 65static inline unsigned char get_page_state(struct page *page)
66{
67 unsigned char state;
68
69 asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0"
70 : "=&d" (state)
71 : "a" (page_to_phys(page)),
72 "i" (ESSA_GET_STATE));
73 return state & 0x3f;
74}
75
76static inline void set_page_unused(struct page *page, int order)
59{ 77{
60 int i, rc; 78 int i, rc;
61 79
@@ -66,14 +84,18 @@ static inline void set_page_unstable(struct page *page, int order)
66 "i" (ESSA_SET_UNUSED)); 84 "i" (ESSA_SET_UNUSED));
67} 85}
68 86
69void arch_free_page(struct page *page, int order) 87static inline void set_page_stable_dat(struct page *page, int order)
70{ 88{
71 if (!cmma_flag) 89 int i, rc;
72 return; 90
73 set_page_unstable(page, order); 91 for (i = 0; i < (1 << order); i++)
92 asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
93 : "=&d" (rc)
94 : "a" (page_to_phys(page + i)),
95 "i" (ESSA_SET_STABLE));
74} 96}
75 97
76static inline void set_page_stable(struct page *page, int order) 98static inline void set_page_stable_nodat(struct page *page, int order)
77{ 99{
78 int i, rc; 100 int i, rc;
79 101
@@ -81,14 +103,154 @@ static inline void set_page_stable(struct page *page, int order)
81 asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" 103 asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
82 : "=&d" (rc) 104 : "=&d" (rc)
83 : "a" (page_to_phys(page + i)), 105 : "a" (page_to_phys(page + i)),
84 "i" (ESSA_SET_STABLE)); 106 "i" (ESSA_SET_STABLE_NODAT));
107}
108
109static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
110{
111 unsigned long next;
112 struct page *page;
113 pmd_t *pmd;
114
115 pmd = pmd_offset(pud, addr);
116 do {
117 next = pmd_addr_end(addr, end);
118 if (pmd_none(*pmd) || pmd_large(*pmd))
119 continue;
120 page = virt_to_page(pmd_val(*pmd));
121 set_bit(PG_arch_1, &page->flags);
122 } while (pmd++, addr = next, addr != end);
123}
124
125static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
126{
127 unsigned long next;
128 struct page *page;
129 pud_t *pud;
130 int i;
131
132 pud = pud_offset(p4d, addr);
133 do {
134 next = pud_addr_end(addr, end);
135 if (pud_none(*pud) || pud_large(*pud))
136 continue;
137 if (!pud_folded(*pud)) {
138 page = virt_to_page(pud_val(*pud));
139 for (i = 0; i < 3; i++)
140 set_bit(PG_arch_1, &page[i].flags);
141 }
142 mark_kernel_pmd(pud, addr, next);
143 } while (pud++, addr = next, addr != end);
144}
145
146static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
147{
148 unsigned long next;
149 struct page *page;
150 p4d_t *p4d;
151 int i;
152
153 p4d = p4d_offset(pgd, addr);
154 do {
155 next = p4d_addr_end(addr, end);
156 if (p4d_none(*p4d))
157 continue;
158 if (!p4d_folded(*p4d)) {
159 page = virt_to_page(p4d_val(*p4d));
160 for (i = 0; i < 3; i++)
161 set_bit(PG_arch_1, &page[i].flags);
162 }
163 mark_kernel_pud(p4d, addr, next);
164 } while (p4d++, addr = next, addr != end);
165}
166
167static void mark_kernel_pgd(void)
168{
169 unsigned long addr, next;
170 struct page *page;
171 pgd_t *pgd;
172 int i;
173
174 addr = 0;
175 pgd = pgd_offset_k(addr);
176 do {
177 next = pgd_addr_end(addr, MODULES_END);
178 if (pgd_none(*pgd))
179 continue;
180 if (!pgd_folded(*pgd)) {
181 page = virt_to_page(pgd_val(*pgd));
182 for (i = 0; i < 3; i++)
183 set_bit(PG_arch_1, &page[i].flags);
184 }
185 mark_kernel_p4d(pgd, addr, next);
186 } while (pgd++, addr = next, addr != MODULES_END);
187}
188
189void __init cmma_init_nodat(void)
190{
191 struct memblock_region *reg;
192 struct page *page;
193 unsigned long start, end, ix;
194
195 if (cmma_flag < 2)
196 return;
197 /* Mark pages used in kernel page tables */
198 mark_kernel_pgd();
199
200 /* Set all kernel pages not used for page tables to stable/no-dat */
201 for_each_memblock(memory, reg) {
202 start = memblock_region_memory_base_pfn(reg);
203 end = memblock_region_memory_end_pfn(reg);
204 page = pfn_to_page(start);
205 for (ix = start; ix < end; ix++, page++) {
206 if (__test_and_clear_bit(PG_arch_1, &page->flags))
207 continue; /* skip page table pages */
208 if (!list_empty(&page->lru))
209 continue; /* skip free pages */
210 set_page_stable_nodat(page, 0);
211 }
212 }
213}
214
215void arch_free_page(struct page *page, int order)
216{
217 if (!cmma_flag)
218 return;
219 set_page_unused(page, order);
85} 220}
86 221
87void arch_alloc_page(struct page *page, int order) 222void arch_alloc_page(struct page *page, int order)
88{ 223{
89 if (!cmma_flag) 224 if (!cmma_flag)
90 return; 225 return;
91 set_page_stable(page, order); 226 if (cmma_flag < 2)
227 set_page_stable_dat(page, order);
228 else
229 set_page_stable_nodat(page, order);
230}
231
232void arch_set_page_dat(struct page *page, int order)
233{
234 if (!cmma_flag)
235 return;
236 set_page_stable_dat(page, order);
237}
238
239void arch_set_page_nodat(struct page *page, int order)
240{
241 if (cmma_flag < 2)
242 return;
243 set_page_stable_nodat(page, order);
244}
245
246int arch_test_page_nodat(struct page *page)
247{
248 unsigned char state;
249
250 if (cmma_flag < 2)
251 return 0;
252 state = get_page_state(page);
253 return !!(state & 0x20);
92} 254}
93 255
94void arch_set_page_states(int make_stable) 256void arch_set_page_states(int make_stable)
@@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable)
108 list_for_each(l, &zone->free_area[order].free_list[t]) { 270 list_for_each(l, &zone->free_area[order].free_list[t]) {
109 page = list_entry(l, struct page, lru); 271 page = list_entry(l, struct page, lru);
110 if (make_stable) 272 if (make_stable)
111 set_page_stable(page, order); 273 set_page_stable_dat(page, 0);
112 else 274 else
113 set_page_unstable(page, order); 275 set_page_unused(page, order);
114 } 276 }
115 } 277 }
116 spin_unlock_irqrestore(&zone->lock, flags); 278 spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 180481589246..552f898dfa74 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,6 +7,7 @@
7#include <asm/cacheflush.h> 7#include <asm/cacheflush.h>
8#include <asm/facility.h> 8#include <asm/facility.h>
9#include <asm/pgtable.h> 9#include <asm/pgtable.h>
10#include <asm/pgalloc.h>
10#include <asm/page.h> 11#include <asm/page.h>
11#include <asm/set_memory.h> 12#include <asm/set_memory.h>
12 13
@@ -191,7 +192,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
191 pud_t new; 192 pud_t new;
192 int i, ro, nx; 193 int i, ro, nx;
193 194
194 pm_dir = vmem_pmd_alloc(); 195 pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
195 if (!pm_dir) 196 if (!pm_dir)
196 return -ENOMEM; 197 return -ENOMEM;
197 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; 198 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
@@ -328,7 +329,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
328 return; 329 return;
329 } 330 }
330 for (i = 0; i < nr; i++) { 331 for (i = 0; i < nr; i++) {
331 __ptep_ipte(address, pte, IPTE_GLOBAL); 332 __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
332 address += PAGE_SIZE; 333 address += PAGE_SIZE;
333 pte++; 334 pte++;
334 } 335 }
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 18918e394ce4..c5b74dd61197 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
57 57
58 if (!page) 58 if (!page)
59 return NULL; 59 return NULL;
60 arch_set_page_dat(page, 2);
60 return (unsigned long *) page_to_phys(page); 61 return (unsigned long *) page_to_phys(page);
61} 62}
62 63
@@ -82,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
82 int rc, notify; 83 int rc, notify;
83 84
84 /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ 85 /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
85 BUG_ON(mm->context.asce_limit < (1UL << 42)); 86 BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
86 if (end >= TASK_SIZE_MAX) 87 if (end >= TASK_SIZE_MAX)
87 return -ENOMEM; 88 return -ENOMEM;
88 rc = 0; 89 rc = 0;
@@ -95,11 +96,11 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
95 } 96 }
96 spin_lock_bh(&mm->page_table_lock); 97 spin_lock_bh(&mm->page_table_lock);
97 pgd = (unsigned long *) mm->pgd; 98 pgd = (unsigned long *) mm->pgd;
98 if (mm->context.asce_limit == (1UL << 42)) { 99 if (mm->context.asce_limit == _REGION2_SIZE) {
99 crst_table_init(table, _REGION2_ENTRY_EMPTY); 100 crst_table_init(table, _REGION2_ENTRY_EMPTY);
100 p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); 101 p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
101 mm->pgd = (pgd_t *) table; 102 mm->pgd = (pgd_t *) table;
102 mm->context.asce_limit = 1UL << 53; 103 mm->context.asce_limit = _REGION1_SIZE;
103 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 104 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
104 _ASCE_USER_BITS | _ASCE_TYPE_REGION2; 105 _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
105 } else { 106 } else {
@@ -123,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm)
123 pgd_t *pgd; 124 pgd_t *pgd;
124 125
125 /* downgrade should only happen from 3 to 2 levels (compat only) */ 126 /* downgrade should only happen from 3 to 2 levels (compat only) */
126 BUG_ON(mm->context.asce_limit != (1UL << 42)); 127 BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
127 128
128 if (current->active_mm == mm) { 129 if (current->active_mm == mm) {
129 clear_user_asce(); 130 clear_user_asce();
@@ -132,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm)
132 133
133 pgd = mm->pgd; 134 pgd = mm->pgd;
134 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 135 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
135 mm->context.asce_limit = 1UL << 31; 136 mm->context.asce_limit = _REGION3_SIZE;
136 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 137 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
137 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; 138 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
138 crst_table_free(mm, (unsigned long *) pgd); 139 crst_table_free(mm, (unsigned long *) pgd);
@@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
214 __free_page(page); 215 __free_page(page);
215 return NULL; 216 return NULL;
216 } 217 }
218 arch_set_page_dat(page, 0);
217 /* Initialize page table */ 219 /* Initialize page table */
218 table = (unsigned long *) page_to_phys(page); 220 table = (unsigned long *) page_to_phys(page);
219 if (mm_alloc_pgste(mm)) { 221 if (mm_alloc_pgste(mm)) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4a1f7366b17a..4198a71b8fdd 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -25,8 +25,49 @@
25#include <asm/mmu_context.h> 25#include <asm/mmu_context.h>
26#include <asm/page-states.h> 26#include <asm/page-states.h>
27 27
28static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
29 pte_t *ptep, int nodat)
30{
31 unsigned long opt, asce;
32
33 if (MACHINE_HAS_TLB_GUEST) {
34 opt = 0;
35 asce = READ_ONCE(mm->context.gmap_asce);
36 if (asce == 0UL || nodat)
37 opt |= IPTE_NODAT;
38 if (asce != -1UL) {
39 asce = asce ? : mm->context.asce;
40 opt |= IPTE_GUEST_ASCE;
41 }
42 __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
43 } else {
44 __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
45 }
46}
47
48static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
49 pte_t *ptep, int nodat)
50{
51 unsigned long opt, asce;
52
53 if (MACHINE_HAS_TLB_GUEST) {
54 opt = 0;
55 asce = READ_ONCE(mm->context.gmap_asce);
56 if (asce == 0UL || nodat)
57 opt |= IPTE_NODAT;
58 if (asce != -1UL) {
59 asce = asce ? : mm->context.asce;
60 opt |= IPTE_GUEST_ASCE;
61 }
62 __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
63 } else {
64 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
65 }
66}
67
28static inline pte_t ptep_flush_direct(struct mm_struct *mm, 68static inline pte_t ptep_flush_direct(struct mm_struct *mm,
29 unsigned long addr, pte_t *ptep) 69 unsigned long addr, pte_t *ptep,
70 int nodat)
30{ 71{
31 pte_t old; 72 pte_t old;
32 73
@@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
36 atomic_inc(&mm->context.flush_count); 77 atomic_inc(&mm->context.flush_count);
37 if (MACHINE_HAS_TLB_LC && 78 if (MACHINE_HAS_TLB_LC &&
38 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 79 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
39 __ptep_ipte(addr, ptep, IPTE_LOCAL); 80 ptep_ipte_local(mm, addr, ptep, nodat);
40 else 81 else
41 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 82 ptep_ipte_global(mm, addr, ptep, nodat);
42 atomic_dec(&mm->context.flush_count); 83 atomic_dec(&mm->context.flush_count);
43 return old; 84 return old;
44} 85}
45 86
46static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 87static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
47 unsigned long addr, pte_t *ptep) 88 unsigned long addr, pte_t *ptep,
89 int nodat)
48{ 90{
49 pte_t old; 91 pte_t old;
50 92
@@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
57 pte_val(*ptep) |= _PAGE_INVALID; 99 pte_val(*ptep) |= _PAGE_INVALID;
58 mm->context.flush_mm = 1; 100 mm->context.flush_mm = 1;
59 } else 101 } else
60 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 102 ptep_ipte_global(mm, addr, ptep, nodat);
61 atomic_dec(&mm->context.flush_count); 103 atomic_dec(&mm->context.flush_count);
62 return old; 104 return old;
63} 105}
@@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
229{ 271{
230 pgste_t pgste; 272 pgste_t pgste;
231 pte_t old; 273 pte_t old;
274 int nodat;
232 275
233 preempt_disable(); 276 preempt_disable();
234 pgste = ptep_xchg_start(mm, addr, ptep); 277 pgste = ptep_xchg_start(mm, addr, ptep);
235 old = ptep_flush_direct(mm, addr, ptep); 278 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
279 old = ptep_flush_direct(mm, addr, ptep, nodat);
236 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 280 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
237 preempt_enable(); 281 preempt_enable();
238 return old; 282 return old;
@@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
244{ 288{
245 pgste_t pgste; 289 pgste_t pgste;
246 pte_t old; 290 pte_t old;
291 int nodat;
247 292
248 preempt_disable(); 293 preempt_disable();
249 pgste = ptep_xchg_start(mm, addr, ptep); 294 pgste = ptep_xchg_start(mm, addr, ptep);
250 old = ptep_flush_lazy(mm, addr, ptep); 295 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
296 old = ptep_flush_lazy(mm, addr, ptep, nodat);
251 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 297 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
252 preempt_enable(); 298 preempt_enable();
253 return old; 299 return old;
@@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
259{ 305{
260 pgste_t pgste; 306 pgste_t pgste;
261 pte_t old; 307 pte_t old;
308 int nodat;
262 309
263 preempt_disable(); 310 preempt_disable();
264 pgste = ptep_xchg_start(mm, addr, ptep); 311 pgste = ptep_xchg_start(mm, addr, ptep);
265 old = ptep_flush_lazy(mm, addr, ptep); 312 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
313 old = ptep_flush_lazy(mm, addr, ptep, nodat);
266 if (mm_has_pgste(mm)) { 314 if (mm_has_pgste(mm)) {
267 pgste = pgste_update_all(old, pgste, mm); 315 pgste = pgste_update_all(old, pgste, mm);
268 pgste_set(ptep, pgste); 316 pgste_set(ptep, pgste);
@@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
290} 338}
291EXPORT_SYMBOL(ptep_modify_prot_commit); 339EXPORT_SYMBOL(ptep_modify_prot_commit);
292 340
341static inline void pmdp_idte_local(struct mm_struct *mm,
342 unsigned long addr, pmd_t *pmdp)
343{
344 if (MACHINE_HAS_TLB_GUEST)
345 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
346 mm->context.asce, IDTE_LOCAL);
347 else
348 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
349}
350
351static inline void pmdp_idte_global(struct mm_struct *mm,
352 unsigned long addr, pmd_t *pmdp)
353{
354 if (MACHINE_HAS_TLB_GUEST)
355 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
356 mm->context.asce, IDTE_GLOBAL);
357 else if (MACHINE_HAS_IDTE)
358 __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
359 else
360 __pmdp_csp(pmdp);
361}
362
293static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 363static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
294 unsigned long addr, pmd_t *pmdp) 364 unsigned long addr, pmd_t *pmdp)
295{ 365{
@@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
298 old = *pmdp; 368 old = *pmdp;
299 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 369 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
300 return old; 370 return old;
301 if (!MACHINE_HAS_IDTE) {
302 __pmdp_csp(pmdp);
303 return old;
304 }
305 atomic_inc(&mm->context.flush_count); 371 atomic_inc(&mm->context.flush_count);
306 if (MACHINE_HAS_TLB_LC && 372 if (MACHINE_HAS_TLB_LC &&
307 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 373 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
308 __pmdp_idte(addr, pmdp, IDTE_LOCAL); 374 pmdp_idte_local(mm, addr, pmdp);
309 else 375 else
310 __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 376 pmdp_idte_global(mm, addr, pmdp);
311 atomic_dec(&mm->context.flush_count); 377 atomic_dec(&mm->context.flush_count);
312 return old; 378 return old;
313} 379}
@@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
325 cpumask_of(smp_processor_id()))) { 391 cpumask_of(smp_processor_id()))) {
326 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 392 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
327 mm->context.flush_mm = 1; 393 mm->context.flush_mm = 1;
328 } else if (MACHINE_HAS_IDTE) 394 } else {
329 __pmdp_idte(addr, pmdp, IDTE_GLOBAL); 395 pmdp_idte_global(mm, addr, pmdp);
330 else 396 }
331 __pmdp_csp(pmdp);
332 atomic_dec(&mm->context.flush_count); 397 atomic_dec(&mm->context.flush_count);
333 return old; 398 return old;
334} 399}
@@ -359,28 +424,46 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
359} 424}
360EXPORT_SYMBOL(pmdp_xchg_lazy); 425EXPORT_SYMBOL(pmdp_xchg_lazy);
361 426
362static inline pud_t pudp_flush_direct(struct mm_struct *mm, 427static inline void pudp_idte_local(struct mm_struct *mm,
363 unsigned long addr, pud_t *pudp) 428 unsigned long addr, pud_t *pudp)
364{ 429{
365 pud_t old; 430 if (MACHINE_HAS_TLB_GUEST)
431 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
432 mm->context.asce, IDTE_LOCAL);
433 else
434 __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
435}
366 436
367 old = *pudp; 437static inline void pudp_idte_global(struct mm_struct *mm,
368 if (pud_val(old) & _REGION_ENTRY_INVALID) 438 unsigned long addr, pud_t *pudp)
369 return old; 439{
370 if (!MACHINE_HAS_IDTE) { 440 if (MACHINE_HAS_TLB_GUEST)
441 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
442 mm->context.asce, IDTE_GLOBAL);
443 else if (MACHINE_HAS_IDTE)
444 __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
445 else
371 /* 446 /*
372 * Invalid bit position is the same for pmd and pud, so we can 447 * Invalid bit position is the same for pmd and pud, so we can
373 * re-use _pmd_csp() here 448 * re-use _pmd_csp() here
374 */ 449 */
375 __pmdp_csp((pmd_t *) pudp); 450 __pmdp_csp((pmd_t *) pudp);
451}
452
453static inline pud_t pudp_flush_direct(struct mm_struct *mm,
454 unsigned long addr, pud_t *pudp)
455{
456 pud_t old;
457
458 old = *pudp;
459 if (pud_val(old) & _REGION_ENTRY_INVALID)
376 return old; 460 return old;
377 }
378 atomic_inc(&mm->context.flush_count); 461 atomic_inc(&mm->context.flush_count);
379 if (MACHINE_HAS_TLB_LC && 462 if (MACHINE_HAS_TLB_LC &&
380 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 463 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
381 __pudp_idte(addr, pudp, IDTE_LOCAL); 464 pudp_idte_local(mm, addr, pudp);
382 else 465 else
383 __pudp_idte(addr, pudp, IDTE_GLOBAL); 466 pudp_idte_global(mm, addr, pudp);
384 atomic_dec(&mm->context.flush_count); 467 atomic_dec(&mm->context.flush_count);
385 return old; 468 return old;
386} 469}
@@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
482{ 565{
483 pte_t entry; 566 pte_t entry;
484 pgste_t pgste; 567 pgste_t pgste;
485 int pte_i, pte_p; 568 int pte_i, pte_p, nodat;
486 569
487 pgste = pgste_get_lock(ptep); 570 pgste = pgste_get_lock(ptep);
488 entry = *ptep; 571 entry = *ptep;
@@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
495 return -EAGAIN; 578 return -EAGAIN;
496 } 579 }
497 /* Change access rights and set pgste bit */ 580 /* Change access rights and set pgste bit */
581 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
498 if (prot == PROT_NONE && !pte_i) { 582 if (prot == PROT_NONE && !pte_i) {
499 ptep_flush_direct(mm, addr, ptep); 583 ptep_flush_direct(mm, addr, ptep, nodat);
500 pgste = pgste_update_all(entry, pgste, mm); 584 pgste = pgste_update_all(entry, pgste, mm);
501 pte_val(entry) |= _PAGE_INVALID; 585 pte_val(entry) |= _PAGE_INVALID;
502 } 586 }
503 if (prot == PROT_READ && !pte_p) { 587 if (prot == PROT_READ && !pte_p) {
504 ptep_flush_direct(mm, addr, ptep); 588 ptep_flush_direct(mm, addr, ptep, nodat);
505 pte_val(entry) &= ~_PAGE_INVALID; 589 pte_val(entry) &= ~_PAGE_INVALID;
506 pte_val(entry) |= _PAGE_PROTECT; 590 pte_val(entry) |= _PAGE_PROTECT;
507 } 591 }
@@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
541void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 625void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
542{ 626{
543 pgste_t pgste; 627 pgste_t pgste;
628 int nodat;
544 629
545 pgste = pgste_get_lock(ptep); 630 pgste = pgste_get_lock(ptep);
546 /* notifier is called by the caller */ 631 /* notifier is called by the caller */
547 ptep_flush_direct(mm, saddr, ptep); 632 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
633 ptep_flush_direct(mm, saddr, ptep, nodat);
548 /* don't touch the storage key - it belongs to parent pgste */ 634 /* don't touch the storage key - it belongs to parent pgste */
549 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 635 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
550 pgste_set_unlock(ptep, pgste); 636 pgste_set_unlock(ptep, pgste);
@@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
617 pte_t *ptep; 703 pte_t *ptep;
618 pte_t pte; 704 pte_t pte;
619 bool dirty; 705 bool dirty;
706 int nodat;
620 707
621 pgd = pgd_offset(mm, addr); 708 pgd = pgd_offset(mm, addr);
622 p4d = p4d_alloc(mm, pgd, addr); 709 p4d = p4d_alloc(mm, pgd, addr);
@@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
645 pte = *ptep; 732 pte = *ptep;
646 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 733 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
647 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 734 pgste = pgste_pte_notify(mm, addr, ptep, pgste);
648 __ptep_ipte(addr, ptep, IPTE_GLOBAL); 735 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
736 ptep_ipte_global(mm, addr, ptep, nodat);
649 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 737 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
650 pte_val(pte) |= _PAGE_PROTECT; 738 pte_val(pte) |= _PAGE_PROTECT;
651 else 739 else
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d8398962a723..c0af0d7b6e5f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,37 +38,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
38 return (void *) memblock_alloc(size, size); 38 return (void *) memblock_alloc(size, size);
39} 39}
40 40
41static inline p4d_t *vmem_p4d_alloc(void) 41void *vmem_crst_alloc(unsigned long val)
42{ 42{
43 p4d_t *p4d = NULL; 43 unsigned long *table;
44 44
45 p4d = vmem_alloc_pages(2); 45 table = vmem_alloc_pages(CRST_ALLOC_ORDER);
46 if (!p4d) 46 if (table)
47 return NULL; 47 crst_table_init(table, val);
48 clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4); 48 return table;
49 return p4d;
50}
51
52static inline pud_t *vmem_pud_alloc(void)
53{
54 pud_t *pud = NULL;
55
56 pud = vmem_alloc_pages(2);
57 if (!pud)
58 return NULL;
59 clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
60 return pud;
61}
62
63pmd_t *vmem_pmd_alloc(void)
64{
65 pmd_t *pmd = NULL;
66
67 pmd = vmem_alloc_pages(2);
68 if (!pmd)
69 return NULL;
70 clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
71 return pmd;
72} 49}
73 50
74pte_t __ref *vmem_pte_alloc(void) 51pte_t __ref *vmem_pte_alloc(void)
@@ -114,14 +91,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
114 while (address < end) { 91 while (address < end) {
115 pg_dir = pgd_offset_k(address); 92 pg_dir = pgd_offset_k(address);
116 if (pgd_none(*pg_dir)) { 93 if (pgd_none(*pg_dir)) {
117 p4_dir = vmem_p4d_alloc(); 94 p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
118 if (!p4_dir) 95 if (!p4_dir)
119 goto out; 96 goto out;
120 pgd_populate(&init_mm, pg_dir, p4_dir); 97 pgd_populate(&init_mm, pg_dir, p4_dir);
121 } 98 }
122 p4_dir = p4d_offset(pg_dir, address); 99 p4_dir = p4d_offset(pg_dir, address);
123 if (p4d_none(*p4_dir)) { 100 if (p4d_none(*p4_dir)) {
124 pu_dir = vmem_pud_alloc(); 101 pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
125 if (!pu_dir) 102 if (!pu_dir)
126 goto out; 103 goto out;
127 p4d_populate(&init_mm, p4_dir, pu_dir); 104 p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -136,7 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
136 continue; 113 continue;
137 } 114 }
138 if (pud_none(*pu_dir)) { 115 if (pud_none(*pu_dir)) {
139 pm_dir = vmem_pmd_alloc(); 116 pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
140 if (!pm_dir) 117 if (!pm_dir)
141 goto out; 118 goto out;
142 pud_populate(&init_mm, pu_dir, pm_dir); 119 pud_populate(&init_mm, pu_dir, pm_dir);
@@ -253,7 +230,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
253 for (address = start; address < end;) { 230 for (address = start; address < end;) {
254 pg_dir = pgd_offset_k(address); 231 pg_dir = pgd_offset_k(address);
255 if (pgd_none(*pg_dir)) { 232 if (pgd_none(*pg_dir)) {
256 p4_dir = vmem_p4d_alloc(); 233 p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
257 if (!p4_dir) 234 if (!p4_dir)
258 goto out; 235 goto out;
259 pgd_populate(&init_mm, pg_dir, p4_dir); 236 pgd_populate(&init_mm, pg_dir, p4_dir);
@@ -261,7 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
261 238
262 p4_dir = p4d_offset(pg_dir, address); 239 p4_dir = p4d_offset(pg_dir, address);
263 if (p4d_none(*p4_dir)) { 240 if (p4d_none(*p4_dir)) {
264 pu_dir = vmem_pud_alloc(); 241 pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
265 if (!pu_dir) 242 if (!pu_dir)
266 goto out; 243 goto out;
267 p4d_populate(&init_mm, p4_dir, pu_dir); 244 p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -269,7 +246,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
269 246
270 pu_dir = pud_offset(p4_dir, address); 247 pu_dir = pud_offset(p4_dir, address);
271 if (pud_none(*pu_dir)) { 248 if (pud_none(*pu_dir)) {
272 pm_dir = vmem_pmd_alloc(); 249 pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
273 if (!pm_dir) 250 if (!pm_dir)
274 goto out; 251 goto out;
275 pud_populate(&init_mm, pu_dir, pm_dir); 252 pud_populate(&init_mm, pu_dir, pm_dir);
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index bd534b4d40e3..0ae3936e266f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,6 +24,14 @@
24 24
25bool zpci_unique_uid; 25bool zpci_unique_uid;
26 26
27static void update_uid_checking(bool new)
28{
29 if (zpci_unique_uid != new)
30 zpci_dbg(1, "uid checking:%d\n", new);
31
32 zpci_unique_uid = new;
33}
34
27static inline void zpci_err_clp(unsigned int rsp, int rc) 35static inline void zpci_err_clp(unsigned int rsp, int rc)
28{ 36{
29 struct { 37 struct {
@@ -319,7 +327,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
319 goto out; 327 goto out;
320 } 328 }
321 329
322 zpci_unique_uid = rrb->response.uid_checking; 330 update_uid_checking(rrb->response.uid_checking);
323 WARN_ON_ONCE(rrb->response.entry_size != 331 WARN_ON_ONCE(rrb->response.entry_size !=
324 sizeof(struct clp_fh_list_entry)); 332 sizeof(struct clp_fh_list_entry));
325 333
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 025ea20fc4b4..29d72bf8ed2b 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -41,7 +41,7 @@ static struct facility_def facility_defs[] = {
41 27, /* mvcos */ 41 27, /* mvcos */
42 32, /* compare and swap and store */ 42 32, /* compare and swap and store */
43 33, /* compare and swap and store 2 */ 43 33, /* compare and swap and store 2 */
44 34, /* general extension facility */ 44 34, /* general instructions extension */
45 35, /* execute extensions */ 45 35, /* execute extensions */
46#endif 46#endif
47#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 47#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = {
54#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES 54#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
55 53, /* load-and-zero-rightmost-byte, etc. */ 55 53, /* load-and-zero-rightmost-byte, etc. */
56#endif 56#endif
57#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
58 58, /* miscellaneous-instruction-extension 2 */
59#endif
57 -1 /* END */ 60 -1 /* END */
58 } 61 }
59 }, 62 },
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 670ac0a4ef49..9c97ad1ee121 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -801,11 +801,12 @@ static void dasd_profile_end(struct dasd_block *block,
801 struct dasd_ccw_req *cqr, 801 struct dasd_ccw_req *cqr,
802 struct request *req) 802 struct request *req)
803{ 803{
804 long strtime, irqtime, endtime, tottime; /* in microseconds */ 804 unsigned long strtime, irqtime, endtime, tottime;
805 long tottimeps, sectors; 805 unsigned long tottimeps, sectors;
806 struct dasd_device *device; 806 struct dasd_device *device;
807 int sectors_ind, tottime_ind, tottimeps_ind, strtime_ind; 807 int sectors_ind, tottime_ind, tottimeps_ind, strtime_ind;
808 int irqtime_ind, irqtimeps_ind, endtime_ind; 808 int irqtime_ind, irqtimeps_ind, endtime_ind;
809 struct dasd_profile_info *data;
809 810
810 device = cqr->startdev; 811 device = cqr->startdev;
811 if (!(dasd_global_profile_level || 812 if (!(dasd_global_profile_level ||
@@ -835,6 +836,11 @@ static void dasd_profile_end(struct dasd_block *block,
835 836
836 spin_lock(&dasd_global_profile.lock); 837 spin_lock(&dasd_global_profile.lock);
837 if (dasd_global_profile.data) { 838 if (dasd_global_profile.data) {
839 data = dasd_global_profile.data;
840 data->dasd_sum_times += tottime;
841 data->dasd_sum_time_str += strtime;
842 data->dasd_sum_time_irq += irqtime;
843 data->dasd_sum_time_end += endtime;
838 dasd_profile_end_add_data(dasd_global_profile.data, 844 dasd_profile_end_add_data(dasd_global_profile.data,
839 cqr->startdev != block->base, 845 cqr->startdev != block->base,
840 cqr->cpmode == 1, 846 cqr->cpmode == 1,
@@ -847,7 +853,12 @@ static void dasd_profile_end(struct dasd_block *block,
847 spin_unlock(&dasd_global_profile.lock); 853 spin_unlock(&dasd_global_profile.lock);
848 854
849 spin_lock(&block->profile.lock); 855 spin_lock(&block->profile.lock);
850 if (block->profile.data) 856 if (block->profile.data) {
857 data = block->profile.data;
858 data->dasd_sum_times += tottime;
859 data->dasd_sum_time_str += strtime;
860 data->dasd_sum_time_irq += irqtime;
861 data->dasd_sum_time_end += endtime;
851 dasd_profile_end_add_data(block->profile.data, 862 dasd_profile_end_add_data(block->profile.data,
852 cqr->startdev != block->base, 863 cqr->startdev != block->base,
853 cqr->cpmode == 1, 864 cqr->cpmode == 1,
@@ -856,10 +867,16 @@ static void dasd_profile_end(struct dasd_block *block,
856 tottimeps_ind, strtime_ind, 867 tottimeps_ind, strtime_ind,
857 irqtime_ind, irqtimeps_ind, 868 irqtime_ind, irqtimeps_ind,
858 endtime_ind); 869 endtime_ind);
870 }
859 spin_unlock(&block->profile.lock); 871 spin_unlock(&block->profile.lock);
860 872
861 spin_lock(&device->profile.lock); 873 spin_lock(&device->profile.lock);
862 if (device->profile.data) 874 if (device->profile.data) {
875 data = device->profile.data;
876 data->dasd_sum_times += tottime;
877 data->dasd_sum_time_str += strtime;
878 data->dasd_sum_time_irq += irqtime;
879 data->dasd_sum_time_end += endtime;
863 dasd_profile_end_add_data(device->profile.data, 880 dasd_profile_end_add_data(device->profile.data,
864 cqr->startdev != block->base, 881 cqr->startdev != block->base,
865 cqr->cpmode == 1, 882 cqr->cpmode == 1,
@@ -868,6 +885,7 @@ static void dasd_profile_end(struct dasd_block *block,
868 tottimeps_ind, strtime_ind, 885 tottimeps_ind, strtime_ind,
869 irqtime_ind, irqtimeps_ind, 886 irqtime_ind, irqtimeps_ind,
870 endtime_ind); 887 endtime_ind);
888 }
871 spin_unlock(&device->profile.lock); 889 spin_unlock(&device->profile.lock);
872} 890}
873 891
@@ -989,6 +1007,14 @@ static void dasd_stats_seq_print(struct seq_file *m,
989 seq_printf(m, "total_sectors %u\n", data->dasd_io_sects); 1007 seq_printf(m, "total_sectors %u\n", data->dasd_io_sects);
990 seq_printf(m, "total_pav %u\n", data->dasd_io_alias); 1008 seq_printf(m, "total_pav %u\n", data->dasd_io_alias);
991 seq_printf(m, "total_hpf %u\n", data->dasd_io_tpm); 1009 seq_printf(m, "total_hpf %u\n", data->dasd_io_tpm);
1010 seq_printf(m, "avg_total %lu\n", data->dasd_io_reqs ?
1011 data->dasd_sum_times / data->dasd_io_reqs : 0UL);
1012 seq_printf(m, "avg_build_to_ssch %lu\n", data->dasd_io_reqs ?
1013 data->dasd_sum_time_str / data->dasd_io_reqs : 0UL);
1014 seq_printf(m, "avg_ssch_to_irq %lu\n", data->dasd_io_reqs ?
1015 data->dasd_sum_time_irq / data->dasd_io_reqs : 0UL);
1016 seq_printf(m, "avg_irq_to_end %lu\n", data->dasd_io_reqs ?
1017 data->dasd_sum_time_end / data->dasd_io_reqs : 0UL);
992 seq_puts(m, "histogram_sectors "); 1018 seq_puts(m, "histogram_sectors ");
993 dasd_stats_array(m, data->dasd_io_secs); 1019 dasd_stats_array(m, data->dasd_io_secs);
994 seq_puts(m, "histogram_io_times "); 1020 seq_puts(m, "histogram_io_times ");
@@ -1639,7 +1665,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
1639{ 1665{
1640 struct dasd_ccw_req *cqr, *next; 1666 struct dasd_ccw_req *cqr, *next;
1641 struct dasd_device *device; 1667 struct dasd_device *device;
1642 unsigned long long now; 1668 unsigned long now;
1643 int nrf_suppressed = 0; 1669 int nrf_suppressed = 0;
1644 int fp_suppressed = 0; 1670 int fp_suppressed = 0;
1645 u8 *sense = NULL; 1671 u8 *sense = NULL;
@@ -3152,7 +3178,9 @@ static int dasd_alloc_queue(struct dasd_block *block)
3152 */ 3178 */
3153static void dasd_setup_queue(struct dasd_block *block) 3179static void dasd_setup_queue(struct dasd_block *block)
3154{ 3180{
3181 unsigned int logical_block_size = block->bp_block;
3155 struct request_queue *q = block->request_queue; 3182 struct request_queue *q = block->request_queue;
3183 unsigned int max_bytes, max_discard_sectors;
3156 int max; 3184 int max;
3157 3185
3158 if (block->base->features & DASD_FEATURE_USERAW) { 3186 if (block->base->features & DASD_FEATURE_USERAW) {
@@ -3169,7 +3197,7 @@ static void dasd_setup_queue(struct dasd_block *block)
3169 } 3197 }
3170 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 3198 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
3171 q->limits.max_dev_sectors = max; 3199 q->limits.max_dev_sectors = max;
3172 blk_queue_logical_block_size(q, block->bp_block); 3200 blk_queue_logical_block_size(q, logical_block_size);
3173 blk_queue_max_hw_sectors(q, max); 3201 blk_queue_max_hw_sectors(q, max);
3174 blk_queue_max_segments(q, USHRT_MAX); 3202 blk_queue_max_segments(q, USHRT_MAX);
3175 /* with page sized segments we can translate each segement into 3203 /* with page sized segments we can translate each segement into
@@ -3177,6 +3205,21 @@ static void dasd_setup_queue(struct dasd_block *block)
3177 */ 3205 */
3178 blk_queue_max_segment_size(q, PAGE_SIZE); 3206 blk_queue_max_segment_size(q, PAGE_SIZE);
3179 blk_queue_segment_boundary(q, PAGE_SIZE - 1); 3207 blk_queue_segment_boundary(q, PAGE_SIZE - 1);
3208
3209 /* Only activate blocklayer discard support for devices that support it */
3210 if (block->base->features & DASD_FEATURE_DISCARD) {
3211 q->limits.discard_granularity = logical_block_size;
3212 q->limits.discard_alignment = PAGE_SIZE;
3213
3214 /* Calculate max_discard_sectors and make it PAGE aligned */
3215 max_bytes = USHRT_MAX * logical_block_size;
3216 max_bytes = ALIGN(max_bytes, PAGE_SIZE) - PAGE_SIZE;
3217 max_discard_sectors = max_bytes / logical_block_size;
3218
3219 blk_queue_max_discard_sectors(q, max_discard_sectors);
3220 blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
3221 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
3222 }
3180} 3223}
3181 3224
3182/* 3225/*
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 107cd3361e29..e448a0fc0c09 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -2231,7 +2231,7 @@ static void dasd_3990_erp_account_error(struct dasd_ccw_req *erp)
2231 struct dasd_device *device = erp->startdev; 2231 struct dasd_device *device = erp->startdev;
2232 __u8 lpum = erp->refers->irb.esw.esw1.lpum; 2232 __u8 lpum = erp->refers->irb.esw.esw1.lpum;
2233 int pos = pathmask_to_pos(lpum); 2233 int pos = pathmask_to_pos(lpum);
2234 unsigned long long clk; 2234 unsigned long clk;
2235 2235
2236 if (!device->path_thrhld) 2236 if (!device->path_thrhld)
2237 return; 2237 return;
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 779dce069cc5..e38042ce94e6 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -1634,7 +1634,7 @@ static struct attribute * dasd_attrs[] = {
1634 NULL, 1634 NULL,
1635}; 1635};
1636 1636
1637static struct attribute_group dasd_attr_group = { 1637static const struct attribute_group dasd_attr_group = {
1638 .attrs = dasd_attrs, 1638 .attrs = dasd_attrs,
1639}; 1639};
1640 1640
@@ -1676,6 +1676,7 @@ dasd_set_feature(struct ccw_device *cdev, int feature, int flag)
1676 spin_unlock(&dasd_devmap_lock); 1676 spin_unlock(&dasd_devmap_lock);
1677 return 0; 1677 return 0;
1678} 1678}
1679EXPORT_SYMBOL(dasd_set_feature);
1679 1680
1680 1681
1681int 1682int
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 5667146c6a0a..98fb28e49d2c 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -235,7 +235,7 @@ static void dasd_ext_handler(struct ext_code ext_code,
235{ 235{
236 struct dasd_ccw_req *cqr, *next; 236 struct dasd_ccw_req *cqr, *next;
237 struct dasd_device *device; 237 struct dasd_device *device;
238 unsigned long long expires; 238 unsigned long expires;
239 unsigned long flags; 239 unsigned long flags;
240 addr_t ip; 240 addr_t ip;
241 int rc; 241 int rc;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index c3e5ad641b0b..8eafcd5fa004 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -3254,11 +3254,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
3254 /* 1x prefix + one read/write ccw per track */ 3254 /* 1x prefix + one read/write ccw per track */
3255 cplength = 1 + trkcount; 3255 cplength = 1 + trkcount;
3256 3256
3257 /* on 31-bit we need space for two 32 bit addresses per page 3257 datasize = sizeof(struct PFX_eckd_data) + cidaw * sizeof(unsigned long);
3258 * on 64-bit one 64 bit address
3259 */
3260 datasize = sizeof(struct PFX_eckd_data) +
3261 cidaw * sizeof(unsigned long long);
3262 3258
3263 /* Allocate the ccw request. */ 3259 /* Allocate the ccw request. */
3264 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, 3260 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
@@ -3856,7 +3852,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
3856 } 3852 }
3857 size = ALIGN(size, 8); 3853 size = ALIGN(size, 8);
3858 3854
3859 datasize = size + cidaw * sizeof(unsigned long long); 3855 datasize = size + cidaw * sizeof(unsigned long);
3860 3856
3861 /* Allocate the ccw request. */ 3857 /* Allocate the ccw request. */
3862 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, 3858 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength,
diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h
index fb1f537d986a..34e153a6b19c 100644
--- a/drivers/s390/block/dasd_eckd.h
+++ b/drivers/s390/block/dasd_eckd.h
@@ -165,7 +165,7 @@ struct DE_eckd_data {
165 __u8 ga_extended; /* Global Attributes Extended */ 165 __u8 ga_extended; /* Global Attributes Extended */
166 struct ch_t beg_ext; 166 struct ch_t beg_ext;
167 struct ch_t end_ext; 167 struct ch_t end_ext;
168 unsigned long long ep_sys_time; /* Ext Parameter - System Time Stamp */ 168 unsigned long ep_sys_time; /* Ext Parameter - System Time Stamp */
169 __u8 ep_format; /* Extended Parameter format byte */ 169 __u8 ep_format; /* Extended Parameter format byte */
170 __u8 ep_prio; /* Extended Parameter priority I/O byte */ 170 __u8 ep_prio; /* Extended Parameter priority I/O byte */
171 __u8 ep_reserved1; /* Extended Parameter Reserved */ 171 __u8 ep_reserved1; /* Extended Parameter Reserved */
diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index 9e3419124264..6389feb2fb7a 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c
@@ -124,7 +124,7 @@ dasd_default_erp_action(struct dasd_ccw_req *cqr)
124struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr) 124struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr)
125{ 125{
126 int success; 126 int success;
127 unsigned long long startclk, stopclk; 127 unsigned long startclk, stopclk;
128 struct dasd_device *startdev; 128 struct dasd_device *startdev;
129 129
130 BUG_ON(cqr->refers == NULL || cqr->function == NULL); 130 BUG_ON(cqr->refers == NULL || cqr->function == NULL);
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 462cab5d4302..6168ccdb389c 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -174,6 +174,9 @@ dasd_fba_check_characteristics(struct dasd_device *device)
174 if (readonly) 174 if (readonly)
175 set_bit(DASD_FLAG_DEVICE_RO, &device->flags); 175 set_bit(DASD_FLAG_DEVICE_RO, &device->flags);
176 176
177 /* FBA supports discard, set the according feature bit */
178 dasd_set_feature(cdev, DASD_FEATURE_DISCARD, 1);
179
177 dev_info(&device->cdev->dev, 180 dev_info(&device->cdev->dev,
178 "New FBA DASD %04X/%02X (CU %04X/%02X) with %d MB " 181 "New FBA DASD %04X/%02X (CU %04X/%02X) with %d MB "
179 "and %d B/blk%s\n", 182 "and %d B/blk%s\n",
@@ -247,9 +250,192 @@ static void dasd_fba_check_for_device_change(struct dasd_device *device,
247 dasd_generic_handle_state_change(device); 250 dasd_generic_handle_state_change(device);
248}; 251};
249 252
250static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, 253
251 struct dasd_block *block, 254/*
252 struct request *req) 255 * Builds a CCW with no data payload
256 */
257static void ccw_write_no_data(struct ccw1 *ccw)
258{
259 ccw->cmd_code = DASD_FBA_CCW_WRITE;
260 ccw->flags |= CCW_FLAG_SLI;
261 ccw->count = 0;
262}
263
264/*
265 * Builds a CCW that writes only zeroes.
266 */
267static void ccw_write_zero(struct ccw1 *ccw, int count)
268{
269 ccw->cmd_code = DASD_FBA_CCW_WRITE;
270 ccw->flags |= CCW_FLAG_SLI;
271 ccw->count = count;
272 ccw->cda = (__u32) (addr_t) page_to_phys(ZERO_PAGE(0));
273}
274
275/*
276 * Helper function to count the amount of necessary CCWs within a given range
277 * with 4k alignment and command chaining in mind.
278 */
279static int count_ccws(sector_t first_rec, sector_t last_rec,
280 unsigned int blocks_per_page)
281{
282 sector_t wz_stop = 0, d_stop = 0;
283 int cur_pos = 0;
284 int count = 0;
285
286 if (first_rec % blocks_per_page != 0) {
287 wz_stop = first_rec + blocks_per_page -
288 (first_rec % blocks_per_page) - 1;
289 if (wz_stop > last_rec)
290 wz_stop = last_rec;
291 cur_pos = wz_stop - first_rec + 1;
292 count++;
293 }
294
295 if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) {
296 if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0)
297 d_stop = last_rec - ((last_rec - blocks_per_page + 1) %
298 blocks_per_page);
299 else
300 d_stop = last_rec;
301
302 cur_pos += d_stop - (first_rec + cur_pos) + 1;
303 count++;
304 }
305
306 if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec)
307 count++;
308
309 return count;
310}
311
312/*
313 * This function builds a CCW request for block layer discard requests.
314 * Each page in the z/VM hypervisor that represents certain records of an FBA
315 * device will be padded with zeros. This is a special behaviour of the WRITE
316 * command which is triggered when no data payload is added to the CCW.
317 *
318 * Note: Due to issues in some z/VM versions, we can't fully utilise this
319 * special behaviour. We have to keep a 4k (or 8 block) alignment in mind to
320 * work around those issues and write actual zeroes to the unaligned parts in
321 * the request. This workaround might be removed in the future.
322 */
323static struct dasd_ccw_req *dasd_fba_build_cp_discard(
324 struct dasd_device *memdev,
325 struct dasd_block *block,
326 struct request *req)
327{
328 struct LO_fba_data *LO_data;
329 struct dasd_ccw_req *cqr;
330 struct ccw1 *ccw;
331
332 sector_t wz_stop = 0, d_stop = 0;
333 sector_t first_rec, last_rec;
334
335 unsigned int blksize = block->bp_block;
336 unsigned int blocks_per_page;
337 int wz_count = 0;
338 int d_count = 0;
339 int cur_pos = 0; /* Current position within the extent */
340 int count = 0;
341 int cplength;
342 int datasize;
343 int nr_ccws;
344
345 first_rec = blk_rq_pos(req) >> block->s2b_shift;
346 last_rec =
347 (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
348 count = last_rec - first_rec + 1;
349
350 blocks_per_page = BLOCKS_PER_PAGE(blksize);
351 nr_ccws = count_ccws(first_rec, last_rec, blocks_per_page);
352
353 /* define extent + nr_ccws * locate record + nr_ccws * single CCW */
354 cplength = 1 + 2 * nr_ccws;
355 datasize = sizeof(struct DE_fba_data) +
356 nr_ccws * (sizeof(struct LO_fba_data) + sizeof(struct ccw1));
357
358 cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev);
359 if (IS_ERR(cqr))
360 return cqr;
361
362 ccw = cqr->cpaddr;
363
364 define_extent(ccw++, cqr->data, WRITE, blksize, first_rec, count);
365 LO_data = cqr->data + sizeof(struct DE_fba_data);
366
367 /* First part is not aligned. Calculate range to write zeroes. */
368 if (first_rec % blocks_per_page != 0) {
369 wz_stop = first_rec + blocks_per_page -
370 (first_rec % blocks_per_page) - 1;
371 if (wz_stop > last_rec)
372 wz_stop = last_rec;
373 wz_count = wz_stop - first_rec + 1;
374
375 ccw[-1].flags |= CCW_FLAG_CC;
376 locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count);
377
378 ccw[-1].flags |= CCW_FLAG_CC;
379 ccw_write_zero(ccw++, wz_count * blksize);
380
381 cur_pos = wz_count;
382 }
383
384 /* We can do proper discard when we've got at least blocks_per_page blocks. */
385 if (last_rec - (first_rec + cur_pos) + 1 >= blocks_per_page) {
386 /* is last record at page boundary? */
387 if ((last_rec - blocks_per_page + 1) % blocks_per_page != 0)
388 d_stop = last_rec - ((last_rec - blocks_per_page + 1) %
389 blocks_per_page);
390 else
391 d_stop = last_rec;
392
393 d_count = d_stop - (first_rec + cur_pos) + 1;
394
395 ccw[-1].flags |= CCW_FLAG_CC;
396 locate_record(ccw++, LO_data++, WRITE, cur_pos, d_count);
397
398 ccw[-1].flags |= CCW_FLAG_CC;
399 ccw_write_no_data(ccw++);
400
401 cur_pos += d_count;
402 }
403
404 /* We might still have some bits left which need to be zeroed. */
405 if (cur_pos == 0 || first_rec + cur_pos - 1 < last_rec) {
406 if (d_stop != 0)
407 wz_count = last_rec - d_stop;
408 else if (wz_stop != 0)
409 wz_count = last_rec - wz_stop;
410 else
411 wz_count = count;
412
413 ccw[-1].flags |= CCW_FLAG_CC;
414 locate_record(ccw++, LO_data++, WRITE, cur_pos, wz_count);
415
416 ccw[-1].flags |= CCW_FLAG_CC;
417 ccw_write_zero(ccw++, wz_count * blksize);
418 }
419
420 if (blk_noretry_request(req) ||
421 block->base->features & DASD_FEATURE_FAILFAST)
422 set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
423
424 cqr->startdev = memdev;
425 cqr->memdev = memdev;
426 cqr->block = block;
427 cqr->expires = memdev->default_expires * HZ; /* default 5 minutes */
428 cqr->retries = memdev->default_retries;
429 cqr->buildclk = get_tod_clock();
430 cqr->status = DASD_CQR_FILLED;
431
432 return cqr;
433}
434
435static struct dasd_ccw_req *dasd_fba_build_cp_regular(
436 struct dasd_device *memdev,
437 struct dasd_block *block,
438 struct request *req)
253{ 439{
254 struct dasd_fba_private *private = block->base->private; 440 struct dasd_fba_private *private = block->base->private;
255 unsigned long *idaws; 441 unsigned long *idaws;
@@ -372,6 +558,16 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
372 return cqr; 558 return cqr;
373} 559}
374 560
561static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device *memdev,
562 struct dasd_block *block,
563 struct request *req)
564{
565 if (req_op(req) == REQ_OP_DISCARD || req_op(req) == REQ_OP_WRITE_ZEROES)
566 return dasd_fba_build_cp_discard(memdev, block, req);
567 else
568 return dasd_fba_build_cp_regular(memdev, block, req);
569}
570
375static int 571static int
376dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) 572dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
377{ 573{
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index dca7cb1e6f65..f9e25fc03d6b 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -167,6 +167,9 @@ do { \
167 printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \ 167 printk(d_loglevel PRINTK_HEADER " " d_string "\n", d_args); \
168} while(0) 168} while(0)
169 169
170/* Macro to calculate number of blocks per page */
171#define BLOCKS_PER_PAGE(blksize) (PAGE_SIZE / blksize)
172
170struct dasd_ccw_req { 173struct dasd_ccw_req {
171 unsigned int magic; /* Eye catcher */ 174 unsigned int magic; /* Eye catcher */
172 struct list_head devlist; /* for dasd_device request queue */ 175 struct list_head devlist; /* for dasd_device request queue */
@@ -196,10 +199,10 @@ struct dasd_ccw_req {
196 void *function; /* originating ERP action */ 199 void *function; /* originating ERP action */
197 200
198 /* these are for statistics only */ 201 /* these are for statistics only */
199 unsigned long long buildclk; /* TOD-clock of request generation */ 202 unsigned long buildclk; /* TOD-clock of request generation */
200 unsigned long long startclk; /* TOD-clock of request start */ 203 unsigned long startclk; /* TOD-clock of request start */
201 unsigned long long stopclk; /* TOD-clock of request interrupt */ 204 unsigned long stopclk; /* TOD-clock of request interrupt */
202 unsigned long long endclk; /* TOD-clock of request termination */ 205 unsigned long endclk; /* TOD-clock of request termination */
203 206
204 /* Callback that is called after reaching final status. */ 207 /* Callback that is called after reaching final status. */
205 void (*callback)(struct dasd_ccw_req *, void *data); 208 void (*callback)(struct dasd_ccw_req *, void *data);
@@ -423,7 +426,7 @@ struct dasd_path {
423 u8 chpid; 426 u8 chpid;
424 struct dasd_conf_data *conf_data; 427 struct dasd_conf_data *conf_data;
425 atomic_t error_count; 428 atomic_t error_count;
426 unsigned long long errorclk; 429 unsigned long errorclk;
427}; 430};
428 431
429 432
@@ -454,6 +457,10 @@ struct dasd_profile_info {
454 unsigned int dasd_read_time2[32]; /* hist. of time from start to irq */ 457 unsigned int dasd_read_time2[32]; /* hist. of time from start to irq */
455 unsigned int dasd_read_time3[32]; /* hist. of time from irq to end */ 458 unsigned int dasd_read_time3[32]; /* hist. of time from irq to end */
456 unsigned int dasd_read_nr_req[32]; /* hist. of # of requests in chanq */ 459 unsigned int dasd_read_nr_req[32]; /* hist. of # of requests in chanq */
460 unsigned long dasd_sum_times; /* sum of request times */
461 unsigned long dasd_sum_time_str; /* sum of time from build to start */
462 unsigned long dasd_sum_time_irq; /* sum of time from start to irq */
463 unsigned long dasd_sum_time_end; /* sum of time from irq to end */
457}; 464};
458 465
459struct dasd_profile { 466struct dasd_profile {
@@ -535,7 +542,7 @@ struct dasd_block {
535 struct block_device *bdev; 542 struct block_device *bdev;
536 atomic_t open_count; 543 atomic_t open_count;
537 544
538 unsigned long long blocks; /* size of volume in blocks */ 545 unsigned long blocks; /* size of volume in blocks */
539 unsigned int bp_block; /* bytes per block */ 546 unsigned int bp_block; /* bytes per block */
540 unsigned int s2b_shift; /* log2 (bp_block/512) */ 547 unsigned int s2b_shift; /* log2 (bp_block/512) */
541 548
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index 70dc2c4cd3f7..7104d6765773 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -90,7 +90,7 @@ dasd_devices_show(struct seq_file *m, void *v)
90 seq_printf(m, "n/f "); 90 seq_printf(m, "n/f ");
91 else 91 else
92 seq_printf(m, 92 seq_printf(m,
93 "at blocksize: %d, %lld blocks, %lld MB", 93 "at blocksize: %u, %lu blocks, %lu MB",
94 block->bp_block, block->blocks, 94 block->bp_block, block->blocks,
95 ((block->bp_block >> 9) * 95 ((block->bp_block >> 9) *
96 block->blocks) >> 11); 96 block->blocks) >> 11);
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 0071febac9e6..2e7fd966c515 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -249,13 +249,13 @@ static void scm_request_requeue(struct scm_request *scmrq)
249static void scm_request_finish(struct scm_request *scmrq) 249static void scm_request_finish(struct scm_request *scmrq)
250{ 250{
251 struct scm_blk_dev *bdev = scmrq->bdev; 251 struct scm_blk_dev *bdev = scmrq->bdev;
252 int *error;
252 int i; 253 int i;
253 254
254 for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { 255 for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
255 if (scmrq->error) 256 error = blk_mq_rq_to_pdu(scmrq->request[i]);
256 blk_mq_end_request(scmrq->request[i], scmrq->error); 257 *error = scmrq->error;
257 else 258 blk_mq_complete_request(scmrq->request[i]);
258 blk_mq_complete_request(scmrq->request[i]);
259 } 259 }
260 260
261 atomic_dec(&bdev->queued_reqs); 261 atomic_dec(&bdev->queued_reqs);
@@ -415,7 +415,9 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
415 415
416static void scm_blk_request_done(struct request *req) 416static void scm_blk_request_done(struct request *req)
417{ 417{
418 blk_mq_end_request(req, 0); 418 int *error = blk_mq_rq_to_pdu(req);
419
420 blk_mq_end_request(req, *error);
419} 421}
420 422
421static const struct block_device_operations scm_blk_devops = { 423static const struct block_device_operations scm_blk_devops = {
@@ -448,6 +450,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
448 atomic_set(&bdev->queued_reqs, 0); 450 atomic_set(&bdev->queued_reqs, 0);
449 451
450 bdev->tag_set.ops = &scm_mq_ops; 452 bdev->tag_set.ops = &scm_mq_ops;
453 bdev->tag_set.cmd_size = sizeof(int);
451 bdev->tag_set.nr_hw_queues = nr_requests; 454 bdev->tag_set.nr_hw_queues = nr_requests;
452 bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; 455 bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
453 bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 456 bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index b3f1c458905f..97c4c9fdd53d 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -169,10 +169,21 @@ config VMCP
169 def_bool y 169 def_bool y
170 prompt "Support for the z/VM CP interface" 170 prompt "Support for the z/VM CP interface"
171 depends on S390 171 depends on S390
172 select CMA
172 help 173 help
173 Select this option if you want to be able to interact with the control 174 Select this option if you want to be able to interact with the control
174 program on z/VM 175 program on z/VM
175 176
177config VMCP_CMA_SIZE
178 int "Memory in MiB reserved for z/VM CP interface"
179 default "4"
180 depends on VMCP
181 help
182 Specify the default amount of memory in MiB reserved for the z/VM CP
183 interface. If needed this memory is used for large contiguous memory
184 allocations. The default can be changed with the kernel command line
185 parameter "vmcp_cma".
186
176config MONREADER 187config MONREADER
177 def_tristate m 188 def_tristate m
178 prompt "API for reading z/VM monitor service records" 189 prompt "API for reading z/VM monitor service records"
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 710f2292911d..5d4f053d7c38 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -1082,7 +1082,7 @@ static struct attribute * raw3270_attrs[] = {
1082 NULL, 1082 NULL,
1083}; 1083};
1084 1084
1085static struct attribute_group raw3270_attr_group = { 1085static const struct attribute_group raw3270_attr_group = {
1086 .attrs = raw3270_attrs, 1086 .attrs = raw3270_attrs,
1087}; 1087};
1088 1088
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index b9c5522b8a68..dff8b94871f0 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -252,6 +252,7 @@ static int sclp_attach_storage(u8 id)
252 if (!sccb) 252 if (!sccb)
253 return -ENOMEM; 253 return -ENOMEM;
254 sccb->header.length = PAGE_SIZE; 254 sccb->header.length = PAGE_SIZE;
255 sccb->header.function_code = 0x40;
255 rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb, 256 rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb,
256 SCLP_QUEUE_INTERVAL); 257 SCLP_QUEUE_INTERVAL);
257 if (rc) 258 if (rc)
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
index 1406fb688a26..7003d52c2191 100644
--- a/drivers/s390/char/sclp_config.c
+++ b/drivers/s390/char/sclp_config.c
@@ -135,7 +135,7 @@ static ssize_t sysfs_ofb_data_write(struct file *filp, struct kobject *kobj,
135 return rc ?: count; 135 return rc ?: count;
136} 136}
137 137
138static struct bin_attribute ofb_bin_attr = { 138static const struct bin_attribute ofb_bin_attr = {
139 .attr = { 139 .attr = {
140 .name = "event_data", 140 .name = "event_data",
141 .mode = S_IWUSR, 141 .mode = S_IWUSR,
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index efd84d1d178b..bc1fc00910b0 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -39,7 +39,7 @@ struct read_info_sccb {
39 u8 fac84; /* 84 */ 39 u8 fac84; /* 84 */
40 u8 fac85; /* 85 */ 40 u8 fac85; /* 85 */
41 u8 _pad_86[91 - 86]; /* 86-90 */ 41 u8 _pad_86[91 - 86]; /* 86-90 */
42 u8 flags; /* 91 */ 42 u8 fac91; /* 91 */
43 u8 _pad_92[98 - 92]; /* 92-97 */ 43 u8 _pad_92[98 - 92]; /* 92-97 */
44 u8 fac98; /* 98 */ 44 u8 fac98; /* 98 */
45 u8 hamaxpow; /* 99 */ 45 u8 hamaxpow; /* 99 */
@@ -103,6 +103,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
103 sclp.has_kss = !!(sccb->fac98 & 0x01); 103 sclp.has_kss = !!(sccb->fac98 & 0x01);
104 if (sccb->fac85 & 0x02) 104 if (sccb->fac85 & 0x02)
105 S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; 105 S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
106 if (sccb->fac91 & 0x40)
107 S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST;
106 sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; 108 sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
107 sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; 109 sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
108 sclp.rzm <<= 20; 110 sclp.rzm <<= 20;
@@ -139,7 +141,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
139 141
140 /* Save IPL information */ 142 /* Save IPL information */
141 sclp_ipl_info.is_valid = 1; 143 sclp_ipl_info.is_valid = 1;
142 if (sccb->flags & 0x2) 144 if (sccb->fac91 & 0x2)
143 sclp_ipl_info.has_dump = 1; 145 sclp_ipl_info.has_dump = 1;
144 memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); 146 memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN);
145 147
diff --git a/drivers/s390/char/sclp_ocf.c b/drivers/s390/char/sclp_ocf.c
index f59b71776bbd..f9cbb1ab047b 100644
--- a/drivers/s390/char/sclp_ocf.c
+++ b/drivers/s390/char/sclp_ocf.c
@@ -126,7 +126,7 @@ static struct attribute *ocf_attrs[] = {
126 NULL, 126 NULL,
127}; 127};
128 128
129static struct attribute_group ocf_attr_group = { 129static const struct attribute_group ocf_attr_group = {
130 .attrs = ocf_attrs, 130 .attrs = ocf_attrs,
131}; 131};
132 132
diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c
index 3c379da2eef8..9dd4534823b3 100644
--- a/drivers/s390/char/tape_core.c
+++ b/drivers/s390/char/tape_core.c
@@ -175,7 +175,7 @@ static struct attribute *tape_attrs[] = {
175 NULL 175 NULL
176}; 176};
177 177
178static struct attribute_group tape_attr_group = { 178static const struct attribute_group tape_attr_group = {
179 .attrs = tape_attrs, 179 .attrs = tape_attrs,
180}; 180};
181 181
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 98749fa817da..7898bbcc28fc 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -17,15 +17,85 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/miscdevice.h> 18#include <linux/miscdevice.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/uaccess.h>
20#include <linux/export.h> 21#include <linux/export.h>
22#include <linux/mutex.h>
23#include <linux/cma.h>
24#include <linux/mm.h>
21#include <asm/compat.h> 25#include <asm/compat.h>
22#include <asm/cpcmd.h> 26#include <asm/cpcmd.h>
23#include <asm/debug.h> 27#include <asm/debug.h>
24#include <linux/uaccess.h> 28#include <asm/vmcp.h>
25#include "vmcp.h" 29
30struct vmcp_session {
31 char *response;
32 unsigned int bufsize;
33 unsigned int cma_alloc : 1;
34 int resp_size;
35 int resp_code;
36 struct mutex mutex;
37};
26 38
27static debug_info_t *vmcp_debug; 39static debug_info_t *vmcp_debug;
28 40
41static unsigned long vmcp_cma_size __initdata = CONFIG_VMCP_CMA_SIZE * 1024 * 1024;
42static struct cma *vmcp_cma;
43
44static int __init early_parse_vmcp_cma(char *p)
45{
46 vmcp_cma_size = ALIGN(memparse(p, NULL), PAGE_SIZE);
47 return 0;
48}
49early_param("vmcp_cma", early_parse_vmcp_cma);
50
51void __init vmcp_cma_reserve(void)
52{
53 if (!MACHINE_IS_VM)
54 return;
55 cma_declare_contiguous(0, vmcp_cma_size, 0, 0, 0, false, "vmcp", &vmcp_cma);
56}
57
58static void vmcp_response_alloc(struct vmcp_session *session)
59{
60 struct page *page = NULL;
61 int nr_pages, order;
62
63 order = get_order(session->bufsize);
64 nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT;
65 /*
66 * For anything below order 3 allocations rely on the buddy
67 * allocator. If such low-order allocations can't be handled
68 * anymore the system won't work anyway.
69 */
70 if (order > 2)
71 page = cma_alloc(vmcp_cma, nr_pages, 0, GFP_KERNEL);
72 if (page) {
73 session->response = (char *)page_to_phys(page);
74 session->cma_alloc = 1;
75 return;
76 }
77 session->response = (char *)__get_free_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL, order);
78}
79
80static void vmcp_response_free(struct vmcp_session *session)
81{
82 int nr_pages, order;
83 struct page *page;
84
85 if (!session->response)
86 return;
87 order = get_order(session->bufsize);
88 nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT;
89 if (session->cma_alloc) {
90 page = phys_to_page((unsigned long)session->response);
91 cma_release(vmcp_cma, page, nr_pages);
92 session->cma_alloc = 0;
93 } else {
94 free_pages((unsigned long)session->response, order);
95 }
96 session->response = NULL;
97}
98
29static int vmcp_open(struct inode *inode, struct file *file) 99static int vmcp_open(struct inode *inode, struct file *file)
30{ 100{
31 struct vmcp_session *session; 101 struct vmcp_session *session;
@@ -51,7 +121,7 @@ static int vmcp_release(struct inode *inode, struct file *file)
51 121
52 session = file->private_data; 122 session = file->private_data;
53 file->private_data = NULL; 123 file->private_data = NULL;
54 free_pages((unsigned long)session->response, get_order(session->bufsize)); 124 vmcp_response_free(session);
55 kfree(session); 125 kfree(session);
56 return 0; 126 return 0;
57} 127}
@@ -97,9 +167,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count,
97 return -ERESTARTSYS; 167 return -ERESTARTSYS;
98 } 168 }
99 if (!session->response) 169 if (!session->response)
100 session->response = (char *)__get_free_pages(GFP_KERNEL 170 vmcp_response_alloc(session);
101 | __GFP_RETRY_MAYFAIL | GFP_DMA,
102 get_order(session->bufsize));
103 if (!session->response) { 171 if (!session->response) {
104 mutex_unlock(&session->mutex); 172 mutex_unlock(&session->mutex);
105 kfree(cmd); 173 kfree(cmd);
@@ -130,8 +198,8 @@ vmcp_write(struct file *file, const char __user *buff, size_t count,
130static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 198static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
131{ 199{
132 struct vmcp_session *session; 200 struct vmcp_session *session;
201 int ret = -ENOTTY;
133 int __user *argp; 202 int __user *argp;
134 int temp;
135 203
136 session = file->private_data; 204 session = file->private_data;
137 if (is_compat_task()) 205 if (is_compat_task())
@@ -142,28 +210,26 @@ static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
142 return -ERESTARTSYS; 210 return -ERESTARTSYS;
143 switch (cmd) { 211 switch (cmd) {
144 case VMCP_GETCODE: 212 case VMCP_GETCODE:
145 temp = session->resp_code; 213 ret = put_user(session->resp_code, argp);
146 mutex_unlock(&session->mutex); 214 break;
147 return put_user(temp, argp);
148 case VMCP_SETBUF: 215 case VMCP_SETBUF:
149 free_pages((unsigned long)session->response, 216 vmcp_response_free(session);
150 get_order(session->bufsize)); 217 ret = get_user(session->bufsize, argp);
151 session->response=NULL; 218 if (ret)
152 temp = get_user(session->bufsize, argp);
153 if (get_order(session->bufsize) > 8) {
154 session->bufsize = PAGE_SIZE; 219 session->bufsize = PAGE_SIZE;
155 temp = -EINVAL; 220 if (!session->bufsize || get_order(session->bufsize) > 8) {
221 session->bufsize = PAGE_SIZE;
222 ret = -EINVAL;
156 } 223 }
157 mutex_unlock(&session->mutex); 224 break;
158 return temp;
159 case VMCP_GETSIZE: 225 case VMCP_GETSIZE:
160 temp = session->resp_size; 226 ret = put_user(session->resp_size, argp);
161 mutex_unlock(&session->mutex); 227 break;
162 return put_user(temp, argp);
163 default: 228 default:
164 mutex_unlock(&session->mutex); 229 break;
165 return -ENOIOCTLCMD;
166 } 230 }
231 mutex_unlock(&session->mutex);
232 return ret;
167} 233}
168 234
169static const struct file_operations vmcp_fops = { 235static const struct file_operations vmcp_fops = {
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 432fc40990bd..f4166f80c4d4 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -143,7 +143,7 @@ static ssize_t chp_measurement_chars_read(struct file *filp,
143 sizeof(chp->cmg_chars)); 143 sizeof(chp->cmg_chars));
144} 144}
145 145
146static struct bin_attribute chp_measurement_chars_attr = { 146static const struct bin_attribute chp_measurement_chars_attr = {
147 .attr = { 147 .attr = {
148 .name = "measurement_chars", 148 .name = "measurement_chars",
149 .mode = S_IRUSR, 149 .mode = S_IRUSR,
@@ -197,7 +197,7 @@ static ssize_t chp_measurement_read(struct file *filp, struct kobject *kobj,
197 return count; 197 return count;
198} 198}
199 199
200static struct bin_attribute chp_measurement_attr = { 200static const struct bin_attribute chp_measurement_attr = {
201 .attr = { 201 .attr = {
202 .name = "measurement", 202 .name = "measurement",
203 .mode = S_IRUSR, 203 .mode = S_IRUSR,
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 7be01a58b44f..489b583f263d 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -612,7 +612,7 @@ static struct attribute *io_subchannel_attrs[] = {
612 NULL, 612 NULL,
613}; 613};
614 614
615static struct attribute_group io_subchannel_attr_group = { 615static const struct attribute_group io_subchannel_attr_group = {
616 .attrs = io_subchannel_attrs, 616 .attrs = io_subchannel_attrs,
617}; 617};
618 618
@@ -626,7 +626,7 @@ static struct attribute * ccwdev_attrs[] = {
626 NULL, 626 NULL,
627}; 627};
628 628
629static struct attribute_group ccwdev_attr_group = { 629static const struct attribute_group ccwdev_attr_group = {
630 .attrs = ccwdev_attrs, 630 .attrs = ccwdev_attrs,
631}; 631};
632 632
diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c
index 53436ea52230..f85dacf1c284 100644
--- a/drivers/s390/crypto/zcrypt_card.c
+++ b/drivers/s390/crypto/zcrypt_card.c
@@ -98,7 +98,7 @@ static struct attribute *zcrypt_card_attrs[] = {
98 NULL, 98 NULL,
99}; 99};
100 100
101static struct attribute_group zcrypt_card_attr_group = { 101static const struct attribute_group zcrypt_card_attr_group = {
102 .attrs = zcrypt_card_attrs, 102 .attrs = zcrypt_card_attrs,
103}; 103};
104 104
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 4fddb4319481..afd20cee7ea0 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -140,7 +140,7 @@ struct function_and_rules_block {
140 * + 0x000A 'MRP ' (MCL3 'PK' or CEX2C 'PK') 140 * + 0x000A 'MRP ' (MCL3 'PK' or CEX2C 'PK')
141 * - VUD block 141 * - VUD block
142 */ 142 */
143static struct CPRBX static_cprbx = { 143static const struct CPRBX static_cprbx = {
144 .cprb_len = 0x00DC, 144 .cprb_len = 0x00DC,
145 .cprb_ver_id = 0x02, 145 .cprb_ver_id = 0x02,
146 .func_id = {0x54, 0x32}, 146 .func_id = {0x54, 0x32},
diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c
index a303f3b2c328..4742be0eec24 100644
--- a/drivers/s390/crypto/zcrypt_queue.c
+++ b/drivers/s390/crypto/zcrypt_queue.c
@@ -89,7 +89,7 @@ static struct attribute *zcrypt_queue_attrs[] = {
89 NULL, 89 NULL,
90}; 90};
91 91
92static struct attribute_group zcrypt_queue_attr_group = { 92static const struct attribute_group zcrypt_queue_attr_group = {
93 .attrs = zcrypt_queue_attrs, 93 .attrs = zcrypt_queue_attrs,
94}; 94};
95 95
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index f2f94f59e0fa..1a80ce41425e 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -350,7 +350,7 @@ static struct attribute *qeth_l3_device_attrs[] = {
350 NULL, 350 NULL,
351}; 351};
352 352
353static struct attribute_group qeth_l3_device_attr_group = { 353static const struct attribute_group qeth_l3_device_attr_group = {
354 .attrs = qeth_l3_device_attrs, 354 .attrs = qeth_l3_device_attrs,
355}; 355};
356 356
@@ -680,7 +680,7 @@ static struct attribute *qeth_ipato_device_attrs[] = {
680 NULL, 680 NULL,
681}; 681};
682 682
683static struct attribute_group qeth_device_ipato_group = { 683static const struct attribute_group qeth_device_ipato_group = {
684 .name = "ipa_takeover", 684 .name = "ipa_takeover",
685 .attrs = qeth_ipato_device_attrs, 685 .attrs = qeth_ipato_device_attrs,
686}; 686};
@@ -843,7 +843,7 @@ static struct attribute *qeth_vipa_device_attrs[] = {
843 NULL, 843 NULL,
844}; 844};
845 845
846static struct attribute_group qeth_device_vipa_group = { 846static const struct attribute_group qeth_device_vipa_group = {
847 .name = "vipa", 847 .name = "vipa",
848 .attrs = qeth_vipa_device_attrs, 848 .attrs = qeth_vipa_device_attrs,
849}; 849};
@@ -1006,7 +1006,7 @@ static struct attribute *qeth_rxip_device_attrs[] = {
1006 NULL, 1006 NULL,
1007}; 1007};
1008 1008
1009static struct attribute_group qeth_device_rxip_group = { 1009static const struct attribute_group qeth_device_rxip_group = {
1010 .name = "rxip", 1010 .name = "rxip",
1011 .attrs = qeth_rxip_device_attrs, 1011 .attrs = qeth_rxip_device_attrs,
1012}; 1012};