summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2017-09-12 10:37:33 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2018-10-09 05:20:57 -0400
commitce3dc447493ff4186b192b38d723ab5e8c1eb52f (patch)
treef93f58afcd85087876a1ab9badcdaed76b5a37fe
parentff340d2472ec7618443913928af9fb85a7009270 (diff)
s390: add support for virtually mapped kernel stacks
With virtually mapped kernel stacks the kernel stack overflow detection is now fault based, every stack has a guard page in the vmalloc space. The panic_stack is renamed to nodat_stack and is used for all function that need to run without DAT, e.g. memcpy_real or do_start_kdump. The main effect is a reduction in the kernel image size as with vmap stacks the old style overflow checking that adds two instructions per function is not needed anymore. Result from bloat-o-meter: add/remove: 20/1 grow/shrink: 13/26854 up/down: 2198/-216240 (-214042) In regard to performance the micro-benchmark for fork has a hit of a few microseconds, allocating 4 pages in vmalloc space is more expensive compare to an order-2 page allocation. But with real workload I could not find a noticeable difference. Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/processor.h8
-rw-r--r--arch/s390/include/asm/thread_info.h3
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/base.S2
-rw-r--r--arch/s390/kernel/dumpstack.c6
-rw-r--r--arch/s390/kernel/entry.S53
-rw-r--r--arch/s390/kernel/entry.h3
-rw-r--r--arch/s390/kernel/head64.S4
-rw-r--r--arch/s390/kernel/irq.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c17
-rw-r--r--arch/s390/kernel/setup.c89
-rw-r--r--arch/s390/kernel/smp.c86
-rw-r--r--arch/s390/kernel/swsusp.S7
-rw-r--r--arch/s390/mm/maccess.c25
16 files changed, 225 insertions, 88 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9a9c7a6fe925..6061dd7578fe 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -125,6 +125,7 @@ config S390
125 select HAVE_ARCH_SOFT_DIRTY 125 select HAVE_ARCH_SOFT_DIRTY
126 select HAVE_ARCH_TRACEHOOK 126 select HAVE_ARCH_TRACEHOOK
127 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 127 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
128 select HAVE_ARCH_VMAP_STACK
128 select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES 129 select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
129 select HAVE_CMPXCHG_DOUBLE 130 select HAVE_CMPXCHG_DOUBLE
130 select HAVE_CMPXCHG_LOCAL 131 select HAVE_CMPXCHG_LOCAL
@@ -649,6 +650,7 @@ config PACK_STACK
649 650
650config CHECK_STACK 651config CHECK_STACK
651 def_bool y 652 def_bool y
653 depends on !VMAP_STACK
652 prompt "Detect kernel stack overflow" 654 prompt "Detect kernel stack overflow"
653 help 655 help
654 This option enables the compiler option -mstack-guard and 656 This option enables the compiler option -mstack-guard and
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 406d940173ab..cc0947e08b6f 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -102,9 +102,9 @@ struct lowcore {
102 __u64 current_task; /* 0x0338 */ 102 __u64 current_task; /* 0x0338 */
103 __u64 kernel_stack; /* 0x0340 */ 103 __u64 kernel_stack; /* 0x0340 */
104 104
105 /* Interrupt, panic and restart stack. */ 105 /* Interrupt, DAT-off and restartstack. */
106 __u64 async_stack; /* 0x0348 */ 106 __u64 async_stack; /* 0x0348 */
107 __u64 panic_stack; /* 0x0350 */ 107 __u64 nodat_stack; /* 0x0350 */
108 __u64 restart_stack; /* 0x0358 */ 108 __u64 restart_stack; /* 0x0358 */
109 109
110 /* Restart function and parameter. */ 110 /* Restart function and parameter. */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 43494a014d5b..3c1e723a143a 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -163,6 +163,14 @@ struct thread_struct {
163typedef struct thread_struct thread_struct; 163typedef struct thread_struct thread_struct;
164 164
165/* 165/*
166 * General size of a stack
167 */
168#define STACK_ORDER 2
169#define STACK_SIZE (PAGE_SIZE << STACK_ORDER)
170#define STACK_INIT_OFFSET \
171 (STACK_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
172
173/*
166 * Stack layout of a C stack frame. 174 * Stack layout of a C stack frame.
167 */ 175 */
168#ifndef __PACK_STACK 176#ifndef __PACK_STACK
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 3c883c368eb0..3fa2fea0ba23 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -14,10 +14,7 @@
14 * Size of kernel stack for each process 14 * Size of kernel stack for each process
15 */ 15 */
16#define THREAD_SIZE_ORDER 2 16#define THREAD_SIZE_ORDER 2
17#define ASYNC_ORDER 2
18
19#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 17#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
20#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)
21 18
22#ifndef __ASSEMBLY__ 19#ifndef __ASSEMBLY__
23#include <asm/lowcore.h> 20#include <asm/lowcore.h>
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 66e830f1c7bf..164bec175628 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -159,7 +159,7 @@ int main(void)
159 OFFSET(__LC_CURRENT, lowcore, current_task); 159 OFFSET(__LC_CURRENT, lowcore, current_task);
160 OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); 160 OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
161 OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); 161 OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
162 OFFSET(__LC_PANIC_STACK, lowcore, panic_stack); 162 OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
163 OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); 163 OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
164 OFFSET(__LC_RESTART_FN, lowcore, restart_fn); 164 OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
165 OFFSET(__LC_RESTART_DATA, lowcore, restart_data); 165 OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index b65874b0b412..f268fca67e82 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -18,7 +18,7 @@
18 18
19ENTRY(s390_base_mcck_handler) 19ENTRY(s390_base_mcck_handler)
20 basr %r13,0 20 basr %r13,0
210: lg %r15,__LC_PANIC_STACK # load panic stack 210: lg %r15,__LC_NODAT_STACK # load panic stack
22 aghi %r15,-STACK_FRAME_OVERHEAD 22 aghi %r15,-STACK_FRAME_OVERHEAD
23 larl %r1,s390_base_mcck_handler_fn 23 larl %r1,s390_base_mcck_handler_fn
24 lg %r9,0(%r1) 24 lg %r9,0(%r1)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 5b23c4f6e50c..301b945de77b 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
77 frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); 77 frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
78#ifdef CONFIG_CHECK_STACK 78#ifdef CONFIG_CHECK_STACK
79 sp = __dump_trace(func, data, sp, 79 sp = __dump_trace(func, data, sp,
80 S390_lowcore.panic_stack + frame_size - PAGE_SIZE, 80 S390_lowcore.nodat_stack + frame_size - STACK_SIZE,
81 S390_lowcore.panic_stack + frame_size); 81 S390_lowcore.nodat_stack + frame_size);
82#endif 82#endif
83 sp = __dump_trace(func, data, sp, 83 sp = __dump_trace(func, data, sp,
84 S390_lowcore.async_stack + frame_size - ASYNC_SIZE, 84 S390_lowcore.async_stack + frame_size - STACK_SIZE,
85 S390_lowcore.async_stack + frame_size); 85 S390_lowcore.async_stack + frame_size);
86 task = task ?: current; 86 task = task ?: current;
87 __dump_trace(func, data, sp, 87 __dump_trace(func, data, sp,
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 150130c897c3..724fba4d09d2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -85,14 +85,34 @@ _LPP_OFFSET = __LC_LPP
85#endif 85#endif
86 .endm 86 .endm
87 87
88 .macro CHECK_STACK stacksize,savearea 88 .macro CHECK_STACK savearea
89#ifdef CONFIG_CHECK_STACK 89#ifdef CONFIG_CHECK_STACK
90 tml %r15,\stacksize - CONFIG_STACK_GUARD 90 tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
91 lghi %r14,\savearea 91 lghi %r14,\savearea
92 jz stack_overflow 92 jz stack_overflow
93#endif 93#endif
94 .endm 94 .endm
95 95
96 .macro CHECK_VMAP_STACK savearea,oklabel
97#ifdef CONFIG_VMAP_STACK
98 lgr %r14,%r15
99 nill %r14,0x10000 - STACK_SIZE
100 oill %r14,STACK_INIT
101 clg %r14,__LC_KERNEL_STACK
102 je \oklabel
103 clg %r14,__LC_ASYNC_STACK
104 je \oklabel
105 clg %r14,__LC_NODAT_STACK
106 je \oklabel
107 clg %r14,__LC_RESTART_STACK
108 je \oklabel
109 lghi %r14,\savearea
110 j stack_overflow
111#else
112 j \oklabel
113#endif
114 .endm
115
96 .macro SWITCH_ASYNC savearea,timer 116 .macro SWITCH_ASYNC savearea,timer
97 tmhh %r8,0x0001 # interrupting from user ? 117 tmhh %r8,0x0001 # interrupting from user ?
98 jnz 1f 118 jnz 1f
@@ -104,11 +124,11 @@ _LPP_OFFSET = __LC_LPP
104 brasl %r14,cleanup_critical 124 brasl %r14,cleanup_critical
105 tmhh %r8,0x0001 # retest problem state after cleanup 125 tmhh %r8,0x0001 # retest problem state after cleanup
106 jnz 1f 126 jnz 1f
1070: lg %r14,__LC_ASYNC_STACK # are we already on the async stack? 1270: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
108 slgr %r14,%r15 128 slgr %r14,%r15
109 srag %r14,%r14,STACK_SHIFT 129 srag %r14,%r14,STACK_SHIFT
110 jnz 2f 130 jnz 2f
111 CHECK_STACK 1<<STACK_SHIFT,\savearea 131 CHECK_STACK \savearea
112 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) 132 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
113 j 3f 133 j 3f
1141: UPDATE_VTIME %r14,%r15,\timer 1341: UPDATE_VTIME %r14,%r15,\timer
@@ -600,9 +620,10 @@ ENTRY(pgm_check_handler)
600 jnz 1f # -> enabled, can't be a double fault 620 jnz 1f # -> enabled, can't be a double fault
601 tm __LC_PGM_ILC+3,0x80 # check for per exception 621 tm __LC_PGM_ILC+3,0x80 # check for per exception
602 jnz .Lpgm_svcper # -> single stepped svc 622 jnz .Lpgm_svcper # -> single stepped svc
6031: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC 6231: CHECK_STACK __LC_SAVE_AREA_SYNC
604 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) 624 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
605 j 4f 625 # CHECK_VMAP_STACK branches to stack_overflow or 4f
626 CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
6062: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER 6272: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
607 BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP 628 BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
608 lg %r15,__LC_KERNEL_STACK 629 lg %r15,__LC_KERNEL_STACK
@@ -1136,7 +1157,8 @@ ENTRY(mcck_int_handler)
1136 jnz 4f 1157 jnz 4f
1137 TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID 1158 TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
1138 jno .Lmcck_panic 1159 jno .Lmcck_panic
11394: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER 11604: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
1161 SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
1140.Lmcck_skip: 1162.Lmcck_skip:
1141 lghi %r14,__LC_GPREGS_SAVE_AREA+64 1163 lghi %r14,__LC_GPREGS_SAVE_AREA+64
1142 stmg %r0,%r7,__PT_R0(%r11) 1164 stmg %r0,%r7,__PT_R0(%r11)
@@ -1163,7 +1185,6 @@ ENTRY(mcck_int_handler)
1163 xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) 1185 xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
1164 la %r11,STACK_FRAME_OVERHEAD(%r1) 1186 la %r11,STACK_FRAME_OVERHEAD(%r1)
1165 lgr %r15,%r1 1187 lgr %r15,%r1
1166 ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
1167 TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING 1188 TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
1168 jno .Lmcck_return 1189 jno .Lmcck_return
1169 TRACE_IRQS_OFF 1190 TRACE_IRQS_OFF
@@ -1182,7 +1203,7 @@ ENTRY(mcck_int_handler)
1182 lpswe __LC_RETURN_MCCK_PSW 1203 lpswe __LC_RETURN_MCCK_PSW
1183 1204
1184.Lmcck_panic: 1205.Lmcck_panic:
1185 lg %r15,__LC_PANIC_STACK 1206 lg %r15,__LC_NODAT_STACK
1186 la %r11,STACK_FRAME_OVERHEAD(%r15) 1207 la %r11,STACK_FRAME_OVERHEAD(%r15)
1187 j .Lmcck_skip 1208 j .Lmcck_skip
1188 1209
@@ -1193,12 +1214,10 @@ ENTRY(restart_int_handler)
1193 ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 1214 ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
1194 stg %r15,__LC_SAVE_AREA_RESTART 1215 stg %r15,__LC_SAVE_AREA_RESTART
1195 lg %r15,__LC_RESTART_STACK 1216 lg %r15,__LC_RESTART_STACK
1196 aghi %r15,-__PT_SIZE # create pt_regs on stack 1217 xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
1197 xc 0(__PT_SIZE,%r15),0(%r15) 1218 stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
1198 stmg %r0,%r14,__PT_R0(%r15) 1219 mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
1199 mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART 1220 mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
1200 mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
1201 aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
1202 xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) 1221 xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
1203 lg %r1,__LC_RESTART_FN # load fn, parm & source cpu 1222 lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
1204 lg %r2,__LC_RESTART_DATA 1223 lg %r2,__LC_RESTART_DATA
@@ -1216,14 +1235,14 @@ ENTRY(restart_int_handler)
1216 1235
1217 .section .kprobes.text, "ax" 1236 .section .kprobes.text, "ax"
1218 1237
1219#ifdef CONFIG_CHECK_STACK 1238#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
1220/* 1239/*
1221 * The synchronous or the asynchronous stack overflowed. We are dead. 1240 * The synchronous or the asynchronous stack overflowed. We are dead.
1222 * No need to properly save the registers, we are going to panic anyway. 1241 * No need to properly save the registers, we are going to panic anyway.
1223 * Setup a pt_regs so that show_trace can provide a good call trace. 1242 * Setup a pt_regs so that show_trace can provide a good call trace.
1224 */ 1243 */
1225stack_overflow: 1244stack_overflow:
1226 lg %r15,__LC_PANIC_STACK # change to panic stack 1245 lg %r15,__LC_NODAT_STACK # change to panic stack
1227 la %r11,STACK_FRAME_OVERHEAD(%r15) 1246 la %r11,STACK_FRAME_OVERHEAD(%r15)
1228 stmg %r0,%r7,__PT_R0(%r11) 1247 stmg %r0,%r7,__PT_R0(%r11)
1229 stmg %r8,%r9,__PT_PSW(%r11) 1248 stmg %r8,%r9,__PT_PSW(%r11)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 472fa2f1a4a5..c3816ae108b0 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -86,4 +86,7 @@ DECLARE_PER_CPU(u64, mt_cycles[8]);
86void gs_load_bc_cb(struct pt_regs *regs); 86void gs_load_bc_cb(struct pt_regs *regs);
87void set_fs_fixup(void); 87void set_fs_fixup(void);
88 88
89unsigned long stack_alloc(void);
90void stack_free(unsigned long stack);
91
89#endif /* _ENTRY_H */ 92#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index b31dfb102700..57bba24b1c27 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -36,9 +36,7 @@ ENTRY(startup_continue)
36# 36#
37 larl %r14,init_task 37 larl %r14,init_task
38 stg %r14,__LC_CURRENT 38 stg %r14,__LC_CURRENT
39 larl %r15,init_thread_union+THREAD_SIZE 39 larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
40 stg %r15,__LC_KERNEL_STACK # set end of kernel stack
41 aghi %r15,-STACK_FRAME_OVERHEAD
42# 40#
43# Early setup functions that may not rely on an initialized bss section, 41# Early setup functions that may not rely on an initialized bss section,
44# like moving the initrd. Returns with an initialized bss section. 42# like moving the initrd. Returns with an initialized bss section.
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 0e8d68bac82c..b2bc0eb1ca7a 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -171,7 +171,7 @@ void do_softirq_own_stack(void)
171 old = current_stack_pointer(); 171 old = current_stack_pointer();
172 /* Check against async. stack address range. */ 172 /* Check against async. stack address range. */
173 new = S390_lowcore.async_stack; 173 new = S390_lowcore.async_stack;
174 if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) { 174 if (((new - old) >> (PAGE_SHIFT + STACK_ORDER)) != 0) {
175 CALL_ON_STACK(__do_softirq, new, 0); 175 CALL_ON_STACK(__do_softirq, new, 0);
176 } else { 176 } else {
177 /* We are already on the async stack. */ 177 /* We are already on the async stack. */
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index b7020e721ae3..cb582649aba6 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -142,18 +142,27 @@ static noinline void __machine_kdump(void *image)
142} 142}
143#endif 143#endif
144 144
145static unsigned long do_start_kdump(unsigned long addr)
146{
147 struct kimage *image = (struct kimage *) addr;
148 int (*start_kdump)(int) = (void *)image->start;
149 int rc;
150
151 __arch_local_irq_stnsm(0xfb); /* disable DAT */
152 rc = start_kdump(0);
153 __arch_local_irq_stosm(0x04); /* enable DAT */
154 return rc;
155}
156
145/* 157/*
146 * Check if kdump checksums are valid: We call purgatory with parameter "0" 158 * Check if kdump checksums are valid: We call purgatory with parameter "0"
147 */ 159 */
148static bool kdump_csum_valid(struct kimage *image) 160static bool kdump_csum_valid(struct kimage *image)
149{ 161{
150#ifdef CONFIG_CRASH_DUMP 162#ifdef CONFIG_CRASH_DUMP
151 int (*start_kdump)(int) = (void *)image->start;
152 int rc; 163 int rc;
153 164
154 __arch_local_irq_stnsm(0xfb); /* disable DAT */ 165 rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
155 rc = start_kdump(0);
156 __arch_local_irq_stosm(0x04); /* enable DAT */
157 return rc == 0; 166 return rc == 0;
158#else 167#else
159 return false; 168 return false;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c637c12f9e37..eca51c485d09 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -49,6 +49,7 @@
49#include <linux/crash_dump.h> 49#include <linux/crash_dump.h>
50#include <linux/memory.h> 50#include <linux/memory.h>
51#include <linux/compat.h> 51#include <linux/compat.h>
52#include <linux/start_kernel.h>
52 53
53#include <asm/ipl.h> 54#include <asm/ipl.h>
54#include <asm/facility.h> 55#include <asm/facility.h>
@@ -303,6 +304,78 @@ early_param("vmalloc", parse_vmalloc);
303 304
304void *restart_stack __section(.data); 305void *restart_stack __section(.data);
305 306
307unsigned long stack_alloc(void)
308{
309#ifdef CONFIG_VMAP_STACK
310 return (unsigned long)
311 __vmalloc_node_range(STACK_SIZE, STACK_SIZE,
312 VMALLOC_START, VMALLOC_END,
313 THREADINFO_GFP,
314 PAGE_KERNEL, 0, NUMA_NO_NODE,
315 __builtin_return_address(0));
316#else
317 return __get_free_pages(GFP_KERNEL, STACK_ORDER);
318#endif
319}
320
321void stack_free(unsigned long stack)
322{
323#ifdef CONFIG_VMAP_STACK
324 vfree((void *) stack);
325#else
326 free_pages(stack, STACK_ORDER);
327#endif
328}
329
330int __init arch_early_irq_init(void)
331{
332 unsigned long stack;
333
334 stack = __get_free_pages(GFP_KERNEL, STACK_ORDER);
335 if (!stack)
336 panic("Couldn't allocate async stack");
337 S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
338 return 0;
339}
340
341static int __init async_stack_realloc(void)
342{
343 unsigned long old, new;
344
345 old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
346 new = stack_alloc();
347 if (!new)
348 panic("Couldn't allocate async stack");
349 S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
350 free_pages(old, STACK_ORDER);
351 return 0;
352}
353early_initcall(async_stack_realloc);
354
355void __init arch_call_rest_init(void)
356{
357 struct stack_frame *frame;
358 unsigned long stack;
359
360 stack = stack_alloc();
361 if (!stack)
362 panic("Couldn't allocate kernel stack");
363 current->stack = (void *) stack;
364#ifdef CONFIG_VMAP_STACK
365 current->stack_vm_area = (void *) stack;
366#endif
367 set_task_stack_end_magic(current);
368 stack += STACK_INIT_OFFSET;
369 S390_lowcore.kernel_stack = stack;
370 frame = (struct stack_frame *) stack;
371 memset(frame, 0, sizeof(*frame));
372 /* Branch to rest_init on the new stack, never returns */
373 asm volatile(
374 " la 15,0(%[_frame])\n"
375 " jg rest_init\n"
376 : : [_frame] "a" (frame));
377}
378
306static void __init setup_lowcore(void) 379static void __init setup_lowcore(void)
307{ 380{
308 struct lowcore *lc; 381 struct lowcore *lc;
@@ -329,14 +402,8 @@ static void __init setup_lowcore(void)
329 PSW_MASK_DAT | PSW_MASK_MCHECK; 402 PSW_MASK_DAT | PSW_MASK_MCHECK;
330 lc->io_new_psw.addr = (unsigned long) io_int_handler; 403 lc->io_new_psw.addr = (unsigned long) io_int_handler;
331 lc->clock_comparator = clock_comparator_max; 404 lc->clock_comparator = clock_comparator_max;
332 lc->kernel_stack = ((unsigned long) &init_thread_union) 405 lc->nodat_stack = ((unsigned long) &init_thread_union)
333 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); 406 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
334 lc->async_stack = (unsigned long)
335 memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
336 + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
337 lc->panic_stack = (unsigned long)
338 memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
339 + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
340 lc->current_task = (unsigned long)&init_task; 407 lc->current_task = (unsigned long)&init_task;
341 lc->lpp = LPP_MAGIC; 408 lc->lpp = LPP_MAGIC;
342 lc->machine_flags = S390_lowcore.machine_flags; 409 lc->machine_flags = S390_lowcore.machine_flags;
@@ -357,8 +424,12 @@ static void __init setup_lowcore(void)
357 lc->last_update_timer = S390_lowcore.last_update_timer; 424 lc->last_update_timer = S390_lowcore.last_update_timer;
358 lc->last_update_clock = S390_lowcore.last_update_clock; 425 lc->last_update_clock = S390_lowcore.last_update_clock;
359 426
360 restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE); 427 /*
361 restart_stack += ASYNC_SIZE; 428 * Allocate the global restart stack which is the same for
429 * all CPUs in cast *one* of them does a PSW restart.
430 */
431 restart_stack = memblock_virt_alloc(STACK_SIZE, STACK_SIZE);
432 restart_stack += STACK_INIT_OFFSET;
362 433
363 /* 434 /*
364 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant 435 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2f8f7d7dd9a8..fccdb96a04cb 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -186,36 +186,34 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
186 pcpu_sigp_retry(pcpu, order, 0); 186 pcpu_sigp_retry(pcpu, order, 0);
187} 187}
188 188
189#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
190#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
191
192static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) 189static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
193{ 190{
194 unsigned long async_stack, panic_stack; 191 unsigned long async_stack, nodat_stack;
195 struct lowcore *lc; 192 struct lowcore *lc;
196 193
197 if (pcpu != &pcpu_devices[0]) { 194 if (pcpu != &pcpu_devices[0]) {
198 pcpu->lowcore = (struct lowcore *) 195 pcpu->lowcore = (struct lowcore *)
199 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); 196 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
200 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); 197 nodat_stack = __get_free_pages(GFP_KERNEL, STACK_ORDER);
201 panic_stack = __get_free_page(GFP_KERNEL); 198 if (!pcpu->lowcore || !nodat_stack)
202 if (!pcpu->lowcore || !panic_stack || !async_stack)
203 goto out; 199 goto out;
204 } else { 200 } else {
205 async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; 201 nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
206 panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
207 } 202 }
203 async_stack = stack_alloc();
204 if (!async_stack)
205 goto out;
208 lc = pcpu->lowcore; 206 lc = pcpu->lowcore;
209 memcpy(lc, &S390_lowcore, 512); 207 memcpy(lc, &S390_lowcore, 512);
210 memset((char *) lc + 512, 0, sizeof(*lc) - 512); 208 memset((char *) lc + 512, 0, sizeof(*lc) - 512);
211 lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; 209 lc->async_stack = async_stack + STACK_INIT_OFFSET;
212 lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; 210 lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
213 lc->cpu_nr = cpu; 211 lc->cpu_nr = cpu;
214 lc->spinlock_lockval = arch_spin_lockval(cpu); 212 lc->spinlock_lockval = arch_spin_lockval(cpu);
215 lc->spinlock_index = 0; 213 lc->spinlock_index = 0;
216 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 214 lc->br_r1_trampoline = 0x07f1; /* br %r1 */
217 if (nmi_alloc_per_cpu(lc)) 215 if (nmi_alloc_per_cpu(lc))
218 goto out; 216 goto out_async;
219 if (vdso_alloc_per_cpu(lc)) 217 if (vdso_alloc_per_cpu(lc))
220 goto out_mcesa; 218 goto out_mcesa;
221 lowcore_ptr[cpu] = lc; 219 lowcore_ptr[cpu] = lc;
@@ -224,10 +222,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
224 222
225out_mcesa: 223out_mcesa:
226 nmi_free_per_cpu(lc); 224 nmi_free_per_cpu(lc);
225out_async:
226 stack_free(async_stack);
227out: 227out:
228 if (pcpu != &pcpu_devices[0]) { 228 if (pcpu != &pcpu_devices[0]) {
229 free_page(panic_stack); 229 free_pages(nodat_stack, STACK_ORDER);
230 free_pages(async_stack, ASYNC_ORDER);
231 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 230 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
232 } 231 }
233 return -ENOMEM; 232 return -ENOMEM;
@@ -237,15 +236,21 @@ out:
237 236
238static void pcpu_free_lowcore(struct pcpu *pcpu) 237static void pcpu_free_lowcore(struct pcpu *pcpu)
239{ 238{
239 unsigned long async_stack, nodat_stack, lowcore;
240
241 nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
242 async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET;
243 lowcore = (unsigned long) pcpu->lowcore;
244
240 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); 245 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
241 lowcore_ptr[pcpu - pcpu_devices] = NULL; 246 lowcore_ptr[pcpu - pcpu_devices] = NULL;
242 vdso_free_per_cpu(pcpu->lowcore); 247 vdso_free_per_cpu(pcpu->lowcore);
243 nmi_free_per_cpu(pcpu->lowcore); 248 nmi_free_per_cpu(pcpu->lowcore);
249 stack_free(async_stack);
244 if (pcpu == &pcpu_devices[0]) 250 if (pcpu == &pcpu_devices[0])
245 return; 251 return;
246 free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); 252 free_pages(nodat_stack, STACK_ORDER);
247 free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); 253 free_pages(lowcore, LC_ORDER);
248 free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
249} 254}
250 255
251#endif /* CONFIG_HOTPLUG_CPU */ 256#endif /* CONFIG_HOTPLUG_CPU */
@@ -293,7 +298,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
293{ 298{
294 struct lowcore *lc = pcpu->lowcore; 299 struct lowcore *lc = pcpu->lowcore;
295 300
296 lc->restart_stack = lc->kernel_stack; 301 lc->restart_stack = lc->nodat_stack;
297 lc->restart_fn = (unsigned long) func; 302 lc->restart_fn = (unsigned long) func;
298 lc->restart_data = (unsigned long) data; 303 lc->restart_data = (unsigned long) data;
299 lc->restart_source = -1UL; 304 lc->restart_source = -1UL;
@@ -303,15 +308,20 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
303/* 308/*
304 * Call function via PSW restart on pcpu and stop the current cpu. 309 * Call function via PSW restart on pcpu and stop the current cpu.
305 */ 310 */
311static void __pcpu_delegate(void (*func)(void*), void *data)
312{
313 func(data); /* should not return */
314}
315
306static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), 316static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
307 void *data, unsigned long stack) 317 void *data, unsigned long stack)
308{ 318{
309 struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; 319 struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
310 unsigned long source_cpu = stap(); 320 unsigned long source_cpu = stap();
311 321
312 __load_psw_mask(PSW_KERNEL_BITS); 322 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
313 if (pcpu->address == source_cpu) 323 if (pcpu->address == source_cpu)
314 func(data); /* should not return */ 324 CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data);
315 /* Stop target cpu (if func returns this stops the current cpu). */ 325 /* Stop target cpu (if func returns this stops the current cpu). */
316 pcpu_sigp_retry(pcpu, SIGP_STOP, 0); 326 pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
317 /* Restart func on the target cpu and stop the current cpu. */ 327 /* Restart func on the target cpu and stop the current cpu. */
@@ -372,8 +382,7 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
372void smp_call_ipl_cpu(void (*func)(void *), void *data) 382void smp_call_ipl_cpu(void (*func)(void *), void *data)
373{ 383{
374 pcpu_delegate(&pcpu_devices[0], func, data, 384 pcpu_delegate(&pcpu_devices[0], func, data,
375 pcpu_devices->lowcore->panic_stack - 385 pcpu_devices->lowcore->nodat_stack);
376 PANIC_FRAME_OFFSET + PAGE_SIZE);
377} 386}
378 387
379int smp_find_processor_id(u16 address) 388int smp_find_processor_id(u16 address)
@@ -791,37 +800,42 @@ void __init smp_detect_cpus(void)
791 memblock_free_early((unsigned long)info, sizeof(*info)); 800 memblock_free_early((unsigned long)info, sizeof(*info));
792} 801}
793 802
794/* 803static void smp_init_secondary(void)
795 * Activate a secondary processor.
796 */
797static void smp_start_secondary(void *cpuvoid)
798{ 804{
799 int cpu = smp_processor_id(); 805 int cpu = smp_processor_id();
800 806
801 S390_lowcore.last_update_clock = get_tod_clock();
802 S390_lowcore.restart_stack = (unsigned long) restart_stack;
803 S390_lowcore.restart_fn = (unsigned long) do_restart;
804 S390_lowcore.restart_data = 0;
805 S390_lowcore.restart_source = -1UL;
806 restore_access_regs(S390_lowcore.access_regs_save_area);
807 __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
808 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
809 cpu_init(); 807 cpu_init();
810 preempt_disable(); 808 preempt_disable();
811 init_cpu_timer(); 809 init_cpu_timer();
812 vtime_init(); 810 vtime_init();
813 pfault_init(); 811 pfault_init();
814 notify_cpu_starting(cpu); 812 notify_cpu_starting(smp_processor_id());
815 if (topology_cpu_dedicated(cpu)) 813 if (topology_cpu_dedicated(cpu))
816 set_cpu_flag(CIF_DEDICATED_CPU); 814 set_cpu_flag(CIF_DEDICATED_CPU);
817 else 815 else
818 clear_cpu_flag(CIF_DEDICATED_CPU); 816 clear_cpu_flag(CIF_DEDICATED_CPU);
819 set_cpu_online(cpu, true); 817 set_cpu_online(smp_processor_id(), true);
820 inc_irq_stat(CPU_RST); 818 inc_irq_stat(CPU_RST);
821 local_irq_enable(); 819 local_irq_enable();
822 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 820 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
823} 821}
824 822
823/*
824 * Activate a secondary processor.
825 */
826static void smp_start_secondary(void *cpuvoid)
827{
828 S390_lowcore.last_update_clock = get_tod_clock();
829 S390_lowcore.restart_stack = (unsigned long) restart_stack;
830 S390_lowcore.restart_fn = (unsigned long) do_restart;
831 S390_lowcore.restart_data = 0;
832 S390_lowcore.restart_source = -1UL;
833 restore_access_regs(S390_lowcore.access_regs_save_area);
834 __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
835 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
836 CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0);
837}
838
825/* Upping and downing of CPUs */ 839/* Upping and downing of CPUs */
826int __cpu_up(unsigned int cpu, struct task_struct *tidle) 840int __cpu_up(unsigned int cpu, struct task_struct *tidle)
827{ 841{
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 34b014b5cf03..537f97fde37f 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -29,10 +29,11 @@
29 29
30 .section .text 30 .section .text
31ENTRY(swsusp_arch_suspend) 31ENTRY(swsusp_arch_suspend)
32 stmg %r6,%r15,__SF_GPRS(%r15) 32 lg %r1,__LC_NODAT_STACK
33 aghi %r1,-STACK_FRAME_OVERHEAD
34 stmg %r6,%r15,__SF_GPRS(%r1)
35 stg %r15,__SF_BACKCHAIN(%r1)
33 lgr %r1,%r15 36 lgr %r1,%r15
34 aghi %r15,-STACK_FRAME_OVERHEAD
35 stg %r1,__SF_BACKCHAIN(%r15)
36 37
37 /* Store FPU registers */ 38 /* Store FPU registers */
38 brasl %r14,save_fpu_regs 39 brasl %r14,save_fpu_regs
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 7be06475809b..97b3ee53852b 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -89,10 +89,8 @@ static int __memcpy_real(void *dest, void *src, size_t count)
89 return rc; 89 return rc;
90} 90}
91 91
92/* 92static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
93 * Copy memory in real mode (kernel to kernel) 93 unsigned long count)
94 */
95int memcpy_real(void *dest, void *src, size_t count)
96{ 94{
97 int irqs_disabled, rc; 95 int irqs_disabled, rc;
98 unsigned long flags; 96 unsigned long flags;
@@ -103,7 +101,7 @@ int memcpy_real(void *dest, void *src, size_t count)
103 irqs_disabled = arch_irqs_disabled_flags(flags); 101 irqs_disabled = arch_irqs_disabled_flags(flags);
104 if (!irqs_disabled) 102 if (!irqs_disabled)
105 trace_hardirqs_off(); 103 trace_hardirqs_off();
106 rc = __memcpy_real(dest, src, count); 104 rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
107 if (!irqs_disabled) 105 if (!irqs_disabled)
108 trace_hardirqs_on(); 106 trace_hardirqs_on();
109 __arch_local_irq_ssm(flags); 107 __arch_local_irq_ssm(flags);
@@ -111,6 +109,23 @@ int memcpy_real(void *dest, void *src, size_t count)
111} 109}
112 110
113/* 111/*
112 * Copy memory in real mode (kernel to kernel)
113 */
114int memcpy_real(void *dest, void *src, size_t count)
115{
116 if (S390_lowcore.nodat_stack != 0)
117 return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
118 3, dest, src, count);
119 /*
120 * This is a really early memcpy_real call, the stacks are
121 * not set up yet. Just call _memcpy_real on the early boot
122 * stack
123 */
124 return _memcpy_real((unsigned long) dest,(unsigned long) src,
125 (unsigned long) count);
126}
127
128/*
114 * Copy memory in absolute mode (kernel to kernel) 129 * Copy memory in absolute mode (kernel to kernel)
115 */ 130 */
116void memcpy_absolute(void *dest, void *src, size_t count) 131void memcpy_absolute(void *dest, void *src, size_t count)