aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-12-01 04:32:48 -0500
committerIngo Molnar <mingo@kernel.org>2017-12-17 06:58:53 -0500
commit0fd2e9c53d82704a3ba87ea1980ec515188c5316 (patch)
treea828c396110053feba9e65307a4e802b00966519
parent1784f9144b143a1e8b19fe94083b040aa559182b (diff)
parent1e4c4f610f774df6088d7c065b2dd4d22adba698 (diff)
Merge commit 'upstream-x86-entry' into WIP.x86/mm
Pull in a minimal set of v4.15 entry code changes, for a base for the MM isolation patches. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/x86/orc-unwinder.txt2
-rw-r--r--Makefile4
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug39
-rw-r--r--arch/x86/configs/tiny.config4
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/entry/calling.h69
-rw-r--r--arch/x86/entry/entry_64.S141
-rw-r--r--arch/x86/entry/entry_64_compat.S3
-rw-r--r--arch/x86/include/asm/archrandom.h8
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/compat.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h9
-rw-r--r--arch/x86/include/asm/cpufeatures.h11
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/processor.h52
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/switch_to.h24
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/trace/fpu.h10
-rw-r--r--arch/x86/include/asm/traps.h20
-rw-r--r--arch/x86/include/asm/unwind.h8
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h3
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c29
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c125
-rw-r--r--arch/x86/kernel/fpu/init.c11
-rw-r--r--arch/x86/kernel/fpu/xstate.c43
-rw-r--r--arch/x86/kernel/head_32.S5
-rw-r--r--arch/x86/kernel/head_64.S34
-rw-r--r--arch/x86/kernel/ldt.c16
-rw-r--r--arch/x86/kernel/process.c8
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kernel/verify_cpu.S3
-rw-r--r--arch/x86/kernel/vm86_32.c20
-rw-r--r--arch/x86/mm/fault.c88
-rw-r--r--arch/x86/um/ldt.c7
-rw-r--r--arch/x86/xen/enlighten_pv.c9
-rw-r--r--arch/x86/xen/smp_pv.c17
-rw-r--r--arch/x86/xen/xen-asm_64.S2
-rw-r--r--arch/x86/xen/xen-head.S11
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/linux/bitops.h26
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--scripts/Makefile.build2
-rw-r--r--tools/objtool/check.c7
-rw-r--r--tools/objtool/objtool.c6
55 files changed, 579 insertions, 361 deletions
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
index af0c9a4c65a6..cd4b29be29af 100644
--- a/Documentation/x86/orc-unwinder.txt
+++ b/Documentation/x86/orc-unwinder.txt
@@ -4,7 +4,7 @@ ORC unwinder
4Overview 4Overview
5-------- 5--------
6 6
7The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is 7The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
8similar in concept to a DWARF unwinder. The difference is that the 8similar in concept to a DWARF unwinder. The difference is that the
9format of the ORC data is much simpler than DWARF, which in turn allows 9format of the ORC data is much simpler than DWARF, which in turn allows
10the ORC unwinder to be much simpler and faster. 10the ORC unwinder to be much simpler and faster.
diff --git a/Makefile b/Makefile
index ccd981892ef2..9a43f19aad08 100644
--- a/Makefile
+++ b/Makefile
@@ -934,8 +934,8 @@ ifdef CONFIG_STACK_VALIDATION
934 ifeq ($(has_libelf),1) 934 ifeq ($(has_libelf),1)
935 objtool_target := tools/objtool FORCE 935 objtool_target := tools/objtool FORCE
936 else 936 else
937 ifdef CONFIG_ORC_UNWINDER 937 ifdef CONFIG_UNWINDER_ORC
938 $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") 938 $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
939 else 939 else
940 $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") 940 $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
941 endif 941 endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fdb23313dd5..926fdfbadcdb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -171,7 +171,7 @@ config X86
171 select HAVE_PERF_USER_STACK_DUMP 171 select HAVE_PERF_USER_STACK_DUMP
172 select HAVE_RCU_TABLE_FREE 172 select HAVE_RCU_TABLE_FREE
173 select HAVE_REGS_AND_STACK_ACCESS_API 173 select HAVE_REGS_AND_STACK_ACCESS_API
174 select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION 174 select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
175 select HAVE_STACK_VALIDATION if X86_64 175 select HAVE_STACK_VALIDATION if X86_64
176 select HAVE_SYSCALL_TRACEPOINTS 176 select HAVE_SYSCALL_TRACEPOINTS
177 select HAVE_UNSTABLE_SCHED_CLOCK 177 select HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 90b123056f4b..6293a8768a91 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG
359 359
360choice 360choice
361 prompt "Choose kernel unwinder" 361 prompt "Choose kernel unwinder"
362 default FRAME_POINTER_UNWINDER 362 default UNWINDER_ORC if X86_64
363 default UNWINDER_FRAME_POINTER if X86_32
363 ---help--- 364 ---help---
364 This determines which method will be used for unwinding kernel stack 365 This determines which method will be used for unwinding kernel stack
365 traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, 366 traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
366 livepatch, lockdep, and more. 367 livepatch, lockdep, and more.
367 368
368config FRAME_POINTER_UNWINDER 369config UNWINDER_ORC
369 bool "Frame pointer unwinder"
370 select FRAME_POINTER
371 ---help---
372 This option enables the frame pointer unwinder for unwinding kernel
373 stack traces.
374
375 The unwinder itself is fast and it uses less RAM than the ORC
376 unwinder, but the kernel text size will grow by ~3% and the kernel's
377 overall performance will degrade by roughly 5-10%.
378
379 This option is recommended if you want to use the livepatch
380 consistency model, as this is currently the only way to get a
381 reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
382
383config ORC_UNWINDER
384 bool "ORC unwinder" 370 bool "ORC unwinder"
385 depends on X86_64 371 depends on X86_64
386 select STACK_VALIDATION 372 select STACK_VALIDATION
@@ -396,7 +382,22 @@ config ORC_UNWINDER
396 Enabling this option will increase the kernel's runtime memory usage 382 Enabling this option will increase the kernel's runtime memory usage
397 by roughly 2-4MB, depending on your kernel config. 383 by roughly 2-4MB, depending on your kernel config.
398 384
399config GUESS_UNWINDER 385config UNWINDER_FRAME_POINTER
386 bool "Frame pointer unwinder"
387 select FRAME_POINTER
388 ---help---
389 This option enables the frame pointer unwinder for unwinding kernel
390 stack traces.
391
392 The unwinder itself is fast and it uses less RAM than the ORC
393 unwinder, but the kernel text size will grow by ~3% and the kernel's
394 overall performance will degrade by roughly 5-10%.
395
396 This option is recommended if you want to use the livepatch
397 consistency model, as this is currently the only way to get a
398 reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
399
400config UNWINDER_GUESS
400 bool "Guess unwinder" 401 bool "Guess unwinder"
401 depends on EXPERT 402 depends on EXPERT
402 ---help--- 403 ---help---
@@ -411,7 +412,7 @@ config GUESS_UNWINDER
411endchoice 412endchoice
412 413
413config FRAME_POINTER 414config FRAME_POINTER
414 depends on !ORC_UNWINDER && !GUESS_UNWINDER 415 depends on !UNWINDER_ORC && !UNWINDER_GUESS
415 bool 416 bool
416 417
417endmenu 418endmenu
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 550cd5012b73..66c9e2aab16c 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,5 +1,5 @@
1CONFIG_NOHIGHMEM=y 1CONFIG_NOHIGHMEM=y
2# CONFIG_HIGHMEM4G is not set 2# CONFIG_HIGHMEM4G is not set
3# CONFIG_HIGHMEM64G is not set 3# CONFIG_HIGHMEM64G is not set
4CONFIG_GUESS_UNWINDER=y 4CONFIG_UNWINDER_GUESS=y
5# CONFIG_FRAME_POINTER_UNWINDER is not set 5# CONFIG_UNWINDER_FRAME_POINTER is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4a4b16e56d35..e32fc1f274d8 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
299# CONFIG_DEBUG_RODATA_TEST is not set 299# CONFIG_DEBUG_RODATA_TEST is not set
300CONFIG_DEBUG_BOOT_PARAMS=y 300CONFIG_DEBUG_BOOT_PARAMS=y
301CONFIG_OPTIMIZE_INLINING=y 301CONFIG_OPTIMIZE_INLINING=y
302CONFIG_UNWINDER_ORC=y
302CONFIG_SECURITY=y 303CONFIG_SECURITY=y
303CONFIG_SECURITY_NETWORK=y 304CONFIG_SECURITY_NETWORK=y
304CONFIG_SECURITY_SELINUX=y 305CONFIG_SECURITY_SELINUX=y
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 6e160031cfea..3fd8bc560fae 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
142 UNWIND_HINT_REGS offset=\offset 142 UNWIND_HINT_REGS offset=\offset
143 .endm 143 .endm
144 144
145 .macro RESTORE_EXTRA_REGS offset=0 145 .macro POP_EXTRA_REGS
146 movq 0*8+\offset(%rsp), %r15 146 popq %r15
147 movq 1*8+\offset(%rsp), %r14 147 popq %r14
148 movq 2*8+\offset(%rsp), %r13 148 popq %r13
149 movq 3*8+\offset(%rsp), %r12 149 popq %r12
150 movq 4*8+\offset(%rsp), %rbp 150 popq %rbp
151 movq 5*8+\offset(%rsp), %rbx 151 popq %rbx
152 UNWIND_HINT_REGS offset=\offset extra=0 152 .endm
153 .endm 153
154 154 .macro POP_C_REGS
155 .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 155 popq %r11
156 .if \rstor_r11 156 popq %r10
157 movq 6*8(%rsp), %r11 157 popq %r9
158 .endif 158 popq %r8
159 .if \rstor_r8910 159 popq %rax
160 movq 7*8(%rsp), %r10 160 popq %rcx
161 movq 8*8(%rsp), %r9 161 popq %rdx
162 movq 9*8(%rsp), %r8 162 popq %rsi
163 .endif 163 popq %rdi
164 .if \rstor_rax
165 movq 10*8(%rsp), %rax
166 .endif
167 .if \rstor_rcx
168 movq 11*8(%rsp), %rcx
169 .endif
170 .if \rstor_rdx
171 movq 12*8(%rsp), %rdx
172 .endif
173 movq 13*8(%rsp), %rsi
174 movq 14*8(%rsp), %rdi
175 UNWIND_HINT_IRET_REGS offset=16*8
176 .endm
177 .macro RESTORE_C_REGS
178 RESTORE_C_REGS_HELPER 1,1,1,1,1
179 .endm
180 .macro RESTORE_C_REGS_EXCEPT_RAX
181 RESTORE_C_REGS_HELPER 0,1,1,1,1
182 .endm
183 .macro RESTORE_C_REGS_EXCEPT_RCX
184 RESTORE_C_REGS_HELPER 1,0,1,1,1
185 .endm
186 .macro RESTORE_C_REGS_EXCEPT_R11
187 RESTORE_C_REGS_HELPER 1,1,0,1,1
188 .endm
189 .macro RESTORE_C_REGS_EXCEPT_RCX_R11
190 RESTORE_C_REGS_HELPER 1,0,0,1,1
191 .endm
192
193 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
194 subq $-(15*8+\addskip), %rsp
195 .endm 164 .endm
196 165
197 .macro icebp 166 .macro icebp
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index bcfc5668dcb2..a2b30ec69497 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath:
221 TRACE_IRQS_ON /* user mode is traced as IRQs on */ 221 TRACE_IRQS_ON /* user mode is traced as IRQs on */
222 movq RIP(%rsp), %rcx 222 movq RIP(%rsp), %rcx
223 movq EFLAGS(%rsp), %r11 223 movq EFLAGS(%rsp), %r11
224 RESTORE_C_REGS_EXCEPT_RCX_R11 224 addq $6*8, %rsp /* skip extra regs -- they were preserved */
225 movq RSP(%rsp), %rsp
226 UNWIND_HINT_EMPTY 225 UNWIND_HINT_EMPTY
227 USERGS_SYSRET64 226 jmp .Lpop_c_regs_except_rcx_r11_and_sysret
228 227
2291: 2281:
230 /* 229 /*
@@ -246,17 +245,18 @@ entry_SYSCALL64_slow_path:
246 call do_syscall_64 /* returns with IRQs disabled */ 245 call do_syscall_64 /* returns with IRQs disabled */
247 246
248return_from_SYSCALL_64: 247return_from_SYSCALL_64:
249 RESTORE_EXTRA_REGS
250 TRACE_IRQS_IRETQ /* we're about to change IF */ 248 TRACE_IRQS_IRETQ /* we're about to change IF */
251 249
252 /* 250 /*
253 * Try to use SYSRET instead of IRET if we're returning to 251 * Try to use SYSRET instead of IRET if we're returning to
254 * a completely clean 64-bit userspace context. 252 * a completely clean 64-bit userspace context. If we're not,
253 * go to the slow exit path.
255 */ 254 */
256 movq RCX(%rsp), %rcx 255 movq RCX(%rsp), %rcx
257 movq RIP(%rsp), %r11 256 movq RIP(%rsp), %r11
258 cmpq %rcx, %r11 /* RCX == RIP */ 257
259 jne opportunistic_sysret_failed 258 cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
259 jne swapgs_restore_regs_and_return_to_usermode
260 260
261 /* 261 /*
262 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP 262 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -274,14 +274,14 @@ return_from_SYSCALL_64:
274 274
275 /* If this changed %rcx, it was not canonical */ 275 /* If this changed %rcx, it was not canonical */
276 cmpq %rcx, %r11 276 cmpq %rcx, %r11
277 jne opportunistic_sysret_failed 277 jne swapgs_restore_regs_and_return_to_usermode
278 278
279 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ 279 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
280 jne opportunistic_sysret_failed 280 jne swapgs_restore_regs_and_return_to_usermode
281 281
282 movq R11(%rsp), %r11 282 movq R11(%rsp), %r11
283 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ 283 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
284 jne opportunistic_sysret_failed 284 jne swapgs_restore_regs_and_return_to_usermode
285 285
286 /* 286 /*
287 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot 287 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
@@ -302,12 +302,12 @@ return_from_SYSCALL_64:
302 * would never get past 'stuck_here'. 302 * would never get past 'stuck_here'.
303 */ 303 */
304 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 304 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
305 jnz opportunistic_sysret_failed 305 jnz swapgs_restore_regs_and_return_to_usermode
306 306
307 /* nothing to check for RSP */ 307 /* nothing to check for RSP */
308 308
309 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ 309 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
310 jne opportunistic_sysret_failed 310 jne swapgs_restore_regs_and_return_to_usermode
311 311
312 /* 312 /*
313 * We win! This label is here just for ease of understanding 313 * We win! This label is here just for ease of understanding
@@ -315,14 +315,20 @@ return_from_SYSCALL_64:
315 */ 315 */
316syscall_return_via_sysret: 316syscall_return_via_sysret:
317 /* rcx and r11 are already restored (see code above) */ 317 /* rcx and r11 are already restored (see code above) */
318 RESTORE_C_REGS_EXCEPT_RCX_R11
319 movq RSP(%rsp), %rsp
320 UNWIND_HINT_EMPTY 318 UNWIND_HINT_EMPTY
319 POP_EXTRA_REGS
320.Lpop_c_regs_except_rcx_r11_and_sysret:
321 popq %rsi /* skip r11 */
322 popq %r10
323 popq %r9
324 popq %r8
325 popq %rax
326 popq %rsi /* skip rcx */
327 popq %rdx
328 popq %rsi
329 popq %rdi
330 movq RSP-ORIG_RAX(%rsp), %rsp
321 USERGS_SYSRET64 331 USERGS_SYSRET64
322
323opportunistic_sysret_failed:
324 SWAPGS
325 jmp restore_c_regs_and_iret
326END(entry_SYSCALL_64) 332END(entry_SYSCALL_64)
327 333
328ENTRY(stub_ptregs_64) 334ENTRY(stub_ptregs_64)
@@ -423,8 +429,7 @@ ENTRY(ret_from_fork)
423 movq %rsp, %rdi 429 movq %rsp, %rdi
424 call syscall_return_slowpath /* returns with IRQs disabled */ 430 call syscall_return_slowpath /* returns with IRQs disabled */
425 TRACE_IRQS_ON /* user mode is traced as IRQS on */ 431 TRACE_IRQS_ON /* user mode is traced as IRQS on */
426 SWAPGS 432 jmp swapgs_restore_regs_and_return_to_usermode
427 jmp restore_regs_and_iret
428 433
4291: 4341:
430 /* kernel thread */ 435 /* kernel thread */
@@ -612,8 +617,21 @@ GLOBAL(retint_user)
612 mov %rsp,%rdi 617 mov %rsp,%rdi
613 call prepare_exit_to_usermode 618 call prepare_exit_to_usermode
614 TRACE_IRQS_IRETQ 619 TRACE_IRQS_IRETQ
620
621GLOBAL(swapgs_restore_regs_and_return_to_usermode)
622#ifdef CONFIG_DEBUG_ENTRY
623 /* Assert that pt_regs indicates user mode. */
624 testb $3, CS(%rsp)
625 jnz 1f
626 ud2
6271:
628#endif
615 SWAPGS 629 SWAPGS
616 jmp restore_regs_and_iret 630 POP_EXTRA_REGS
631 POP_C_REGS
632 addq $8, %rsp /* skip regs->orig_ax */
633 INTERRUPT_RETURN
634
617 635
618/* Returning to kernel space */ 636/* Returning to kernel space */
619retint_kernel: 637retint_kernel:
@@ -633,15 +651,17 @@ retint_kernel:
633 */ 651 */
634 TRACE_IRQS_IRETQ 652 TRACE_IRQS_IRETQ
635 653
636/* 654GLOBAL(restore_regs_and_return_to_kernel)
637 * At this label, code paths which return to kernel and to user, 655#ifdef CONFIG_DEBUG_ENTRY
638 * which come from interrupts/exception and from syscalls, merge. 656 /* Assert that pt_regs indicates kernel mode. */
639 */ 657 testb $3, CS(%rsp)
640GLOBAL(restore_regs_and_iret) 658 jz 1f
641 RESTORE_EXTRA_REGS 659 ud2
642restore_c_regs_and_iret: 6601:
643 RESTORE_C_REGS 661#endif
644 REMOVE_PT_GPREGS_FROM_STACK 8 662 POP_EXTRA_REGS
663 POP_C_REGS
664 addq $8, %rsp /* skip regs->orig_ax */
645 INTERRUPT_RETURN 665 INTERRUPT_RETURN
646 666
647ENTRY(native_iret) 667ENTRY(native_iret)
@@ -818,7 +838,7 @@ ENTRY(\sym)
818 838
819 ASM_CLAC 839 ASM_CLAC
820 840
821 .ifeq \has_error_code 841 .if \has_error_code == 0
822 pushq $-1 /* ORIG_RAX: no syscall to restart */ 842 pushq $-1 /* ORIG_RAX: no syscall to restart */
823 .endif 843 .endif
824 844
@@ -1059,6 +1079,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1059idtentry stack_segment do_stack_segment has_error_code=1 1079idtentry stack_segment do_stack_segment has_error_code=1
1060 1080
1061#ifdef CONFIG_XEN 1081#ifdef CONFIG_XEN
1082idtentry xennmi do_nmi has_error_code=0
1062idtentry xendebug do_debug has_error_code=0 1083idtentry xendebug do_debug has_error_code=0
1063idtentry xenint3 do_int3 has_error_code=0 1084idtentry xenint3 do_int3 has_error_code=0
1064#endif 1085#endif
@@ -1112,17 +1133,14 @@ ENTRY(paranoid_exit)
1112 DISABLE_INTERRUPTS(CLBR_ANY) 1133 DISABLE_INTERRUPTS(CLBR_ANY)
1113 TRACE_IRQS_OFF_DEBUG 1134 TRACE_IRQS_OFF_DEBUG
1114 testl %ebx, %ebx /* swapgs needed? */ 1135 testl %ebx, %ebx /* swapgs needed? */
1115 jnz paranoid_exit_no_swapgs 1136 jnz .Lparanoid_exit_no_swapgs
1116 TRACE_IRQS_IRETQ 1137 TRACE_IRQS_IRETQ
1117 SWAPGS_UNSAFE_STACK 1138 SWAPGS_UNSAFE_STACK
1118 jmp paranoid_exit_restore 1139 jmp .Lparanoid_exit_restore
1119paranoid_exit_no_swapgs: 1140.Lparanoid_exit_no_swapgs:
1120 TRACE_IRQS_IRETQ_DEBUG 1141 TRACE_IRQS_IRETQ_DEBUG
1121paranoid_exit_restore: 1142.Lparanoid_exit_restore:
1122 RESTORE_EXTRA_REGS 1143 jmp restore_regs_and_return_to_kernel
1123 RESTORE_C_REGS
1124 REMOVE_PT_GPREGS_FROM_STACK 8
1125 INTERRUPT_RETURN
1126END(paranoid_exit) 1144END(paranoid_exit)
1127 1145
1128/* 1146/*
@@ -1223,10 +1241,13 @@ ENTRY(error_exit)
1223 jmp retint_user 1241 jmp retint_user
1224END(error_exit) 1242END(error_exit)
1225 1243
1226/* Runs on exception stack */ 1244/*
1227/* XXX: broken on Xen PV */ 1245 * Runs on exception stack. Xen PV does not go through this path at all,
1246 * so we can use real assembly here.
1247 */
1228ENTRY(nmi) 1248ENTRY(nmi)
1229 UNWIND_HINT_IRET_REGS 1249 UNWIND_HINT_IRET_REGS
1250
1230 /* 1251 /*
1231 * We allow breakpoints in NMIs. If a breakpoint occurs, then 1252 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1232 * the iretq it performs will take us out of NMI context. 1253 * the iretq it performs will take us out of NMI context.
@@ -1284,7 +1305,7 @@ ENTRY(nmi)
1284 * stacks lest we corrupt the "NMI executing" variable. 1305 * stacks lest we corrupt the "NMI executing" variable.
1285 */ 1306 */
1286 1307
1287 SWAPGS_UNSAFE_STACK 1308 swapgs
1288 cld 1309 cld
1289 movq %rsp, %rdx 1310 movq %rsp, %rdx
1290 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 1311 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1328,8 +1349,7 @@ ENTRY(nmi)
1328 * Return back to user mode. We must *not* do the normal exit 1349 * Return back to user mode. We must *not* do the normal exit
1329 * work, because we don't want to enable interrupts. 1350 * work, because we don't want to enable interrupts.
1330 */ 1351 */
1331 SWAPGS 1352 jmp swapgs_restore_regs_and_return_to_usermode
1332 jmp restore_regs_and_iret
1333 1353
1334.Lnmi_from_kernel: 1354.Lnmi_from_kernel:
1335 /* 1355 /*
@@ -1450,7 +1470,7 @@ nested_nmi_out:
1450 popq %rdx 1470 popq %rdx
1451 1471
1452 /* We are returning to kernel mode, so this cannot result in a fault. */ 1472 /* We are returning to kernel mode, so this cannot result in a fault. */
1453 INTERRUPT_RETURN 1473 iretq
1454 1474
1455first_nmi: 1475first_nmi:
1456 /* Restore rdx. */ 1476 /* Restore rdx. */
@@ -1481,7 +1501,7 @@ first_nmi:
1481 pushfq /* RFLAGS */ 1501 pushfq /* RFLAGS */
1482 pushq $__KERNEL_CS /* CS */ 1502 pushq $__KERNEL_CS /* CS */
1483 pushq $1f /* RIP */ 1503 pushq $1f /* RIP */
1484 INTERRUPT_RETURN /* continues at repeat_nmi below */ 1504 iretq /* continues at repeat_nmi below */
1485 UNWIND_HINT_IRET_REGS 1505 UNWIND_HINT_IRET_REGS
14861: 15061:
1487#endif 1507#endif
@@ -1544,29 +1564,34 @@ end_repeat_nmi:
1544nmi_swapgs: 1564nmi_swapgs:
1545 SWAPGS_UNSAFE_STACK 1565 SWAPGS_UNSAFE_STACK
1546nmi_restore: 1566nmi_restore:
1547 RESTORE_EXTRA_REGS 1567 POP_EXTRA_REGS
1548 RESTORE_C_REGS 1568 POP_C_REGS
1549 1569
1550 /* Point RSP at the "iret" frame. */ 1570 /*
1551 REMOVE_PT_GPREGS_FROM_STACK 6*8 1571 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
1572 * at the "iret" frame.
1573 */
1574 addq $6*8, %rsp
1552 1575
1553 /* 1576 /*
1554 * Clear "NMI executing". Set DF first so that we can easily 1577 * Clear "NMI executing". Set DF first so that we can easily
1555 * distinguish the remaining code between here and IRET from 1578 * distinguish the remaining code between here and IRET from
1556 * the SYSCALL entry and exit paths. On a native kernel, we 1579 * the SYSCALL entry and exit paths.
1557 * could just inspect RIP, but, on paravirt kernels, 1580 *
1558 * INTERRUPT_RETURN can translate into a jump into a 1581 * We arguably should just inspect RIP instead, but I (Andy) wrote
1559 * hypercall page. 1582 * this code when I had the misapprehension that Xen PV supported
1583 * NMIs, and Xen PV would break that approach.
1560 */ 1584 */
1561 std 1585 std
1562 movq $0, 5*8(%rsp) /* clear "NMI executing" */ 1586 movq $0, 5*8(%rsp) /* clear "NMI executing" */
1563 1587
1564 /* 1588 /*
1565 * INTERRUPT_RETURN reads the "iret" frame and exits the NMI 1589 * iretq reads the "iret" frame and exits the NMI stack in a
1566 * stack in a single instruction. We are returning to kernel 1590 * single instruction. We are returning to kernel mode, so this
1567 * mode, so this cannot result in a fault. 1591 * cannot result in a fault. Similarly, we don't need to worry
1592 * about espfix64 on the way back to kernel mode.
1568 */ 1593 */
1569 INTERRUPT_RETURN 1594 iretq
1570END(nmi) 1595END(nmi)
1571 1596
1572ENTRY(ignore_sysret) 1597ENTRY(ignore_sysret)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index b5c7a56ed256..568e130d932c 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat)
337 337
338 /* Go back to user mode. */ 338 /* Go back to user mode. */
339 TRACE_IRQS_ON 339 TRACE_IRQS_ON
340 SWAPGS 340 jmp swapgs_restore_regs_and_return_to_usermode
341 jmp restore_regs_and_iret
342END(entry_INT80_compat) 341END(entry_INT80_compat)
343 342
344ENTRY(stub32_clone) 343ENTRY(stub32_clone)
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 5b0579abb398..3ac991d81e74 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
45 bool ok; 45 bool ok;
46 unsigned int retry = RDRAND_RETRY_LOOPS; 46 unsigned int retry = RDRAND_RETRY_LOOPS;
47 do { 47 do {
48 asm volatile(RDRAND_LONG "\n\t" 48 asm volatile(RDRAND_LONG
49 CC_SET(c) 49 CC_SET(c)
50 : CC_OUT(c) (ok), "=a" (*v)); 50 : CC_OUT(c) (ok), "=a" (*v));
51 if (ok) 51 if (ok)
@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
59 bool ok; 59 bool ok;
60 unsigned int retry = RDRAND_RETRY_LOOPS; 60 unsigned int retry = RDRAND_RETRY_LOOPS;
61 do { 61 do {
62 asm volatile(RDRAND_INT "\n\t" 62 asm volatile(RDRAND_INT
63 CC_SET(c) 63 CC_SET(c)
64 : CC_OUT(c) (ok), "=a" (*v)); 64 : CC_OUT(c) (ok), "=a" (*v));
65 if (ok) 65 if (ok)
@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
71static inline bool rdseed_long(unsigned long *v) 71static inline bool rdseed_long(unsigned long *v)
72{ 72{
73 bool ok; 73 bool ok;
74 asm volatile(RDSEED_LONG "\n\t" 74 asm volatile(RDSEED_LONG
75 CC_SET(c) 75 CC_SET(c)
76 : CC_OUT(c) (ok), "=a" (*v)); 76 : CC_OUT(c) (ok), "=a" (*v));
77 return ok; 77 return ok;
@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
80static inline bool rdseed_int(unsigned int *v) 80static inline bool rdseed_int(unsigned int *v)
81{ 81{
82 bool ok; 82 bool ok;
83 asm volatile(RDSEED_INT "\n\t" 83 asm volatile(RDSEED_INT
84 CC_SET(c) 84 CC_SET(c)
85 : CC_OUT(c) (ok), "=a" (*v)); 85 : CC_OUT(c) (ok), "=a" (*v));
86 return ok; 86 return ok;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 2bcf47314959..3fa039855b8f 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
143static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) 143static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
144{ 144{
145 bool negative; 145 bool negative;
146 asm volatile(LOCK_PREFIX "andb %2,%1\n\t" 146 asm volatile(LOCK_PREFIX "andb %2,%1"
147 CC_SET(s) 147 CC_SET(s)
148 : CC_OUT(s) (negative), ADDR 148 : CC_OUT(s) (negative), ADDR
149 : "ir" ((char) ~(1 << nr)) : "memory"); 149 : "ir" ((char) ~(1 << nr)) : "memory");
@@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
246{ 246{
247 bool oldbit; 247 bool oldbit;
248 248
249 asm("bts %2,%1\n\t" 249 asm("bts %2,%1"
250 CC_SET(c) 250 CC_SET(c)
251 : CC_OUT(c) (oldbit), ADDR 251 : CC_OUT(c) (oldbit), ADDR
252 : "Ir" (nr)); 252 : "Ir" (nr));
@@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
286{ 286{
287 bool oldbit; 287 bool oldbit;
288 288
289 asm volatile("btr %2,%1\n\t" 289 asm volatile("btr %2,%1"
290 CC_SET(c) 290 CC_SET(c)
291 : CC_OUT(c) (oldbit), ADDR 291 : CC_OUT(c) (oldbit), ADDR
292 : "Ir" (nr)); 292 : "Ir" (nr));
@@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
298{ 298{
299 bool oldbit; 299 bool oldbit;
300 300
301 asm volatile("btc %2,%1\n\t" 301 asm volatile("btc %2,%1"
302 CC_SET(c) 302 CC_SET(c)
303 : CC_OUT(c) (oldbit), ADDR 303 : CC_OUT(c) (oldbit), ADDR
304 : "Ir" (nr) : "memory"); 304 : "Ir" (nr) : "memory");
@@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
329{ 329{
330 bool oldbit; 330 bool oldbit;
331 331
332 asm volatile("bt %2,%1\n\t" 332 asm volatile("bt %2,%1"
333 CC_SET(c) 333 CC_SET(c)
334 : CC_OUT(c) (oldbit) 334 : CC_OUT(c) (oldbit)
335 : "m" (*(unsigned long *)addr), "Ir" (nr)); 335 : "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 9eef9cc64c68..a600a6cda9ec 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -7,6 +7,7 @@
7 */ 7 */
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/sched/task_stack.h>
10#include <asm/processor.h> 11#include <asm/processor.h>
11#include <asm/user32.h> 12#include <asm/user32.h>
12#include <asm/unistd.h> 13#include <asm/unistd.h>
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0dfa68438e80..bf6a76202a77 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -126,11 +126,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
126#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) 126#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
127 127
128#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) 128#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
129#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) 129
130#define setup_clear_cpu_cap(bit) do { \ 130extern void setup_clear_cpu_cap(unsigned int bit);
131 clear_cpu_cap(&boot_cpu_data, bit); \ 131extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
132 set_bit(bit, (unsigned long *)cpu_caps_cleared); \ 132
133} while (0)
134#define setup_force_cpu_cap(bit) do { \ 133#define setup_force_cpu_cap(bit) do { \
135 set_cpu_cap(&boot_cpu_data, bit); \ 134 set_cpu_cap(&boot_cpu_data, bit); \
136 set_bit(bit, (unsigned long *)cpu_caps_set); \ 135 set_bit(bit, (unsigned long *)cpu_caps_set); \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 793690fbda36..74370734663c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -22,6 +22,11 @@
22 * this feature bit is not displayed in /proc/cpuinfo at all. 22 * this feature bit is not displayed in /proc/cpuinfo at all.
23 */ 23 */
24 24
25/*
26 * When adding new features here that depend on other features,
27 * please update the table in kernel/cpu/cpuid-deps.c
28 */
29
25/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ 30/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
26#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ 31#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
27#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ 32#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
@@ -295,6 +300,12 @@
295#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ 300#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
296#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ 301#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
297#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ 302#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
303#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
304#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
305#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
306#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
307#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
308#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
298#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ 309#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
299#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ 310#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
300#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ 311#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 8546fafa21a9..7948a17febb4 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -6,7 +6,7 @@
6#include <asm/orc_types.h> 6#include <asm/orc_types.h>
7 7
8struct mod_arch_specific { 8struct mod_arch_specific {
9#ifdef CONFIG_ORC_UNWINDER 9#ifdef CONFIG_UNWINDER_ORC
10 unsigned int num_orcs; 10 unsigned int num_orcs;
11 int *orc_unwind_ip; 11 int *orc_unwind_ip;
12 struct orc_entry *orc_unwind; 12 struct orc_entry *orc_unwind;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index fd81228e8037..283efcaac8af 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -16,10 +16,9 @@
16#include <linux/cpumask.h> 16#include <linux/cpumask.h>
17#include <asm/frame.h> 17#include <asm/frame.h>
18 18
19static inline void load_sp0(struct tss_struct *tss, 19static inline void load_sp0(unsigned long sp0)
20 struct thread_struct *thread)
21{ 20{
22 PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); 21 PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
23} 22}
24 23
25/* The paravirtualized CPUID instruction. */ 24/* The paravirtualized CPUID instruction. */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 10cc3b9709fe..6ec54d01972d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -134,7 +134,7 @@ struct pv_cpu_ops {
134 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); 134 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
135 void (*free_ldt)(struct desc_struct *ldt, unsigned entries); 135 void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
136 136
137 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); 137 void (*load_sp0)(unsigned long sp0);
138 138
139 void (*set_iopl_mask)(unsigned mask); 139 void (*set_iopl_mask)(unsigned mask);
140 140
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 377f1ffd18be..ba3c523aaf16 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
526{ 526{
527 bool oldbit; 527 bool oldbit;
528 528
529 asm volatile("bt "__percpu_arg(2)",%1\n\t" 529 asm volatile("bt "__percpu_arg(2)",%1"
530 CC_SET(c) 530 CC_SET(c)
531 : CC_OUT(c) (oldbit) 531 : CC_OUT(c) (oldbit)
532 : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); 532 : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bdac19ab2488..2db7cf720b04 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -431,7 +431,9 @@ typedef struct {
431struct thread_struct { 431struct thread_struct {
432 /* Cached TLS descriptors: */ 432 /* Cached TLS descriptors: */
433 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; 433 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
434#ifdef CONFIG_X86_32
434 unsigned long sp0; 435 unsigned long sp0;
436#endif
435 unsigned long sp; 437 unsigned long sp;
436#ifdef CONFIG_X86_32 438#ifdef CONFIG_X86_32
437 unsigned long sysenter_cs; 439 unsigned long sysenter_cs;
@@ -518,16 +520,9 @@ static inline void native_set_iopl_mask(unsigned mask)
518} 520}
519 521
520static inline void 522static inline void
521native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) 523native_load_sp0(unsigned long sp0)
522{ 524{
523 tss->x86_tss.sp0 = thread->sp0; 525 this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
524#ifdef CONFIG_X86_32
525 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
526 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
527 tss->x86_tss.ss1 = thread->sysenter_cs;
528 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
529 }
530#endif
531} 526}
532 527
533static inline void native_swapgs(void) 528static inline void native_swapgs(void)
@@ -547,15 +542,20 @@ static inline unsigned long current_top_of_stack(void)
547#endif 542#endif
548} 543}
549 544
545static inline bool on_thread_stack(void)
546{
547 return (unsigned long)(current_top_of_stack() -
548 current_stack_pointer) < THREAD_SIZE;
549}
550
550#ifdef CONFIG_PARAVIRT 551#ifdef CONFIG_PARAVIRT
551#include <asm/paravirt.h> 552#include <asm/paravirt.h>
552#else 553#else
553#define __cpuid native_cpuid 554#define __cpuid native_cpuid
554 555
555static inline void load_sp0(struct tss_struct *tss, 556static inline void load_sp0(unsigned long sp0)
556 struct thread_struct *thread)
557{ 557{
558 native_load_sp0(tss, thread); 558 native_load_sp0(sp0);
559} 559}
560 560
561#define set_iopl_mask native_set_iopl_mask 561#define set_iopl_mask native_set_iopl_mask
@@ -804,6 +804,15 @@ static inline void spin_lock_prefetch(const void *x)
804#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ 804#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
805 TOP_OF_KERNEL_STACK_PADDING) 805 TOP_OF_KERNEL_STACK_PADDING)
806 806
807#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
808
809#define task_pt_regs(task) \
810({ \
811 unsigned long __ptr = (unsigned long)task_stack_page(task); \
812 __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
813 ((struct pt_regs *)__ptr) - 1; \
814})
815
807#ifdef CONFIG_X86_32 816#ifdef CONFIG_X86_32
808/* 817/*
809 * User space process size: 3GB (default). 818 * User space process size: 3GB (default).
@@ -823,23 +832,6 @@ static inline void spin_lock_prefetch(const void *x)
823 .addr_limit = KERNEL_DS, \ 832 .addr_limit = KERNEL_DS, \
824} 833}
825 834
826/*
827 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
828 * This is necessary to guarantee that the entire "struct pt_regs"
829 * is accessible even if the CPU haven't stored the SS/ESP registers
830 * on the stack (interrupt gate does not save these registers
831 * when switching to the same priv ring).
832 * Therefore beware: accessing the ss/esp fields of the
833 * "struct pt_regs" is possible, but they may contain the
834 * completely wrong values.
835 */
836#define task_pt_regs(task) \
837({ \
838 unsigned long __ptr = (unsigned long)task_stack_page(task); \
839 __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
840 ((struct pt_regs *)__ptr) - 1; \
841})
842
843#define KSTK_ESP(task) (task_pt_regs(task)->sp) 835#define KSTK_ESP(task) (task_pt_regs(task)->sp)
844 836
845#else 837#else
@@ -873,11 +865,9 @@ static inline void spin_lock_prefetch(const void *x)
873#define STACK_TOP_MAX TASK_SIZE_MAX 865#define STACK_TOP_MAX TASK_SIZE_MAX
874 866
875#define INIT_THREAD { \ 867#define INIT_THREAD { \
876 .sp0 = TOP_OF_INIT_STACK, \
877 .addr_limit = KERNEL_DS, \ 868 .addr_limit = KERNEL_DS, \
878} 869}
879 870
880#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
881extern unsigned long KSTK_ESP(struct task_struct *task); 871extern unsigned long KSTK_ESP(struct task_struct *task);
882 872
883#endif /* CONFIG_X86_64 */ 873#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index c0e3c45cf6ab..14131dd06b29 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
136#endif 136#endif
137} 137}
138 138
139#ifdef CONFIG_X86_64
140static inline bool user_64bit_mode(struct pt_regs *regs) 139static inline bool user_64bit_mode(struct pt_regs *regs)
141{ 140{
141#ifdef CONFIG_X86_64
142#ifndef CONFIG_PARAVIRT 142#ifndef CONFIG_PARAVIRT
143 /* 143 /*
144 * On non-paravirt systems, this is the only long mode CPL 3 144 * On non-paravirt systems, this is the only long mode CPL 3
@@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
149 /* Headers are too twisted for this to go in paravirt.h. */ 149 /* Headers are too twisted for this to go in paravirt.h. */
150 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; 150 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
151#endif 151#endif
152#else /* !CONFIG_X86_64 */
153 return false;
154#endif
152} 155}
153 156
157#ifdef CONFIG_X86_64
154#define current_user_stack_pointer() current_pt_regs()->sp 158#define current_user_stack_pointer() current_pt_regs()->sp
155#define compat_user_stack_pointer() current_pt_regs()->sp 159#define compat_user_stack_pointer() current_pt_regs()->sp
156#endif 160#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index d8f3a6ae9f6c..f91c365e57c3 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -29,7 +29,7 @@ cc_label: \
29#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ 29#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
30do { \ 30do { \
31 bool c; \ 31 bool c; \
32 asm volatile (fullop ";" CC_SET(cc) \ 32 asm volatile (fullop CC_SET(cc) \
33 : [counter] "+m" (var), CC_OUT(cc) (c) \ 33 : [counter] "+m" (var), CC_OUT(cc) (c) \
34 : __VA_ARGS__ : clobbers); \ 34 : __VA_ARGS__ : clobbers); \
35 return c; \ 35 return c; \
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 899084b70412..8c6bd6863db9 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,6 +2,8 @@
2#ifndef _ASM_X86_SWITCH_TO_H 2#ifndef _ASM_X86_SWITCH_TO_H
3#define _ASM_X86_SWITCH_TO_H 3#define _ASM_X86_SWITCH_TO_H
4 4
5#include <linux/sched/task_stack.h>
6
5struct task_struct; /* one of the stranger aspects of C forward declarations */ 7struct task_struct; /* one of the stranger aspects of C forward declarations */
6 8
7struct task_struct *__switch_to_asm(struct task_struct *prev, 9struct task_struct *__switch_to_asm(struct task_struct *prev,
@@ -73,4 +75,26 @@ do { \
73 ((last) = __switch_to_asm((prev), (next))); \ 75 ((last) = __switch_to_asm((prev), (next))); \
74} while (0) 76} while (0)
75 77
78#ifdef CONFIG_X86_32
79static inline void refresh_sysenter_cs(struct thread_struct *thread)
80{
81 /* Only happens when SEP is enabled, no need to test "SEP"arately: */
82 if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
83 return;
84
85 this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
86 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
87}
88#endif
89
90/* This is used when switching tasks or entering/exiting vm86 mode. */
91static inline void update_sp0(struct task_struct *task)
92{
93#ifdef CONFIG_X86_32
94 load_sp0(task->thread.sp0);
95#else
96 load_sp0(task_top_of_stack(task));
97#endif
98}
99
76#endif /* _ASM_X86_SWITCH_TO_H */ 100#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91dfcafe27a6..bad25bb80679 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21asmlinkage long sys_iopl(unsigned int); 21asmlinkage long sys_iopl(unsigned int);
22 22
23/* kernel/ldt.c */ 23/* kernel/ldt.c */
24asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); 24asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
25 25
26/* kernel/signal.c */ 26/* kernel/signal.c */
27asmlinkage long sys_rt_sigreturn(void); 27asmlinkage long sys_rt_sigreturn(void);
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index fa60398bbc3a..069c04be1507 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
34 ) 34 )
35); 35);
36 36
37DEFINE_EVENT(x86_fpu, x86_fpu_state,
38 TP_PROTO(struct fpu *fpu),
39 TP_ARGS(fpu)
40);
41
42DEFINE_EVENT(x86_fpu, x86_fpu_before_save, 37DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
43 TP_PROTO(struct fpu *fpu), 38 TP_PROTO(struct fpu *fpu),
44 TP_ARGS(fpu) 39 TP_ARGS(fpu)
@@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
74 TP_ARGS(fpu) 69 TP_ARGS(fpu)
75); 70);
76 71
77DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
78 TP_PROTO(struct fpu *fpu),
79 TP_ARGS(fpu)
80);
81
82DEFINE_EVENT(x86_fpu, x86_fpu_init_state, 72DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
83 TP_PROTO(struct fpu *fpu), 73 TP_PROTO(struct fpu *fpu),
84 TP_ARGS(fpu) 74 TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index b0cced97a6ce..1fadd310ff68 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void);
38 38
39#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) 39#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
40asmlinkage void xen_divide_error(void); 40asmlinkage void xen_divide_error(void);
41asmlinkage void xen_xennmi(void);
41asmlinkage void xen_xendebug(void); 42asmlinkage void xen_xendebug(void);
42asmlinkage void xen_xenint3(void); 43asmlinkage void xen_xenint3(void);
43asmlinkage void xen_nmi(void);
44asmlinkage void xen_overflow(void); 44asmlinkage void xen_overflow(void);
45asmlinkage void xen_bounds(void); 45asmlinkage void xen_bounds(void);
46asmlinkage void xen_invalid_op(void); 46asmlinkage void xen_invalid_op(void);
@@ -145,4 +145,22 @@ enum {
145 X86_TRAP_IRET = 32, /* 32, IRET Exception */ 145 X86_TRAP_IRET = 32, /* 32, IRET Exception */
146}; 146};
147 147
148/*
149 * Page fault error code bits:
150 *
151 * bit 0 == 0: no page found 1: protection fault
152 * bit 1 == 0: read access 1: write access
153 * bit 2 == 0: kernel-mode access 1: user-mode access
154 * bit 3 == 1: use of reserved bit detected
155 * bit 4 == 1: fault was an instruction fetch
156 * bit 5 == 1: protection keys block access
157 */
158enum x86_pf_error_code {
159 X86_PF_PROT = 1 << 0,
160 X86_PF_WRITE = 1 << 1,
161 X86_PF_USER = 1 << 2,
162 X86_PF_RSVD = 1 << 3,
163 X86_PF_INSTR = 1 << 4,
164 X86_PF_PK = 1 << 5,
165};
148#endif /* _ASM_X86_TRAPS_H */ 166#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 87adc0d38c4a..e9cc6fe1fc6f 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,11 +13,11 @@ struct unwind_state {
13 struct task_struct *task; 13 struct task_struct *task;
14 int graph_idx; 14 int graph_idx;
15 bool error; 15 bool error;
16#if defined(CONFIG_ORC_UNWINDER) 16#if defined(CONFIG_UNWINDER_ORC)
17 bool signal, full_regs; 17 bool signal, full_regs;
18 unsigned long sp, bp, ip; 18 unsigned long sp, bp, ip;
19 struct pt_regs *regs; 19 struct pt_regs *regs;
20#elif defined(CONFIG_FRAME_POINTER_UNWINDER) 20#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
21 bool got_irq; 21 bool got_irq;
22 unsigned long *bp, *orig_sp, ip; 22 unsigned long *bp, *orig_sp, ip;
23 struct pt_regs *regs; 23 struct pt_regs *regs;
@@ -51,7 +51,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
51 __unwind_start(state, task, regs, first_frame); 51 __unwind_start(state, task, regs, first_frame);
52} 52}
53 53
54#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER) 54#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) 55static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
56{ 56{
57 if (unwind_done(state)) 57 if (unwind_done(state))
@@ -66,7 +66,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
66} 66}
67#endif 67#endif
68 68
69#ifdef CONFIG_ORC_UNWINDER 69#ifdef CONFIG_UNWINDER_ORC
70void unwind_init(void); 70void unwind_init(void);
71void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, 71void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
72 void *orc, size_t orc_size); 72 void *orc, size_t orc_size);
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 6f3355399665..53b4ca55ebb6 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -152,5 +152,8 @@
152#define CX86_ARR_BASE 0xc4 152#define CX86_ARR_BASE 0xc4
153#define CX86_RCR_BASE 0xdc 153#define CX86_RCR_BASE 0xdc
154 154
155#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
156 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
157 X86_CR0_PG)
155 158
156#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */ 159#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5f70044340ff..d12da41f72da 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -27,7 +27,6 @@ KASAN_SANITIZE_dumpstack.o := n
27KASAN_SANITIZE_dumpstack_$(BITS).o := n 27KASAN_SANITIZE_dumpstack_$(BITS).o := n
28KASAN_SANITIZE_stacktrace.o := n 28KASAN_SANITIZE_stacktrace.o := n
29 29
30OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
31OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y 30OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
32OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y 31OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
33OBJECT_FILES_NON_STANDARD_test_nx.o := y 32OBJECT_FILES_NON_STANDARD_test_nx.o := y
@@ -128,9 +127,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
128obj-$(CONFIG_TRACING) += tracepoint.o 127obj-$(CONFIG_TRACING) += tracepoint.o
129obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o 128obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
130 129
131obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o 130obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
132obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o 131obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
133obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o 132obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
134 133
135### 134###
136# 64 bit specific files 135# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c60922a66385..90cb82dbba57 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -23,6 +23,7 @@ obj-y += rdrand.o
23obj-y += match.o 23obj-y += match.o
24obj-y += bugs.o 24obj-y += bugs.o
25obj-$(CONFIG_CPU_FREQ) += aperfmperf.o 25obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
26obj-y += cpuid-deps.o
26 27
27obj-$(CONFIG_PROC_FS) += proc.o 28obj-$(CONFIG_PROC_FS) += proc.o
28obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o 29obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..cdf79ab628c2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1301,18 +1301,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
1301 pr_cont(")\n"); 1301 pr_cont(")\n");
1302} 1302}
1303 1303
1304static __init int setup_disablecpuid(char *arg) 1304/*
1305 * clearcpuid= was already parsed in fpu__init_parse_early_param.
1306 * But we need to keep a dummy __setup around otherwise it would
1307 * show up as an environment variable for init.
1308 */
1309static __init int setup_clearcpuid(char *arg)
1305{ 1310{
1306 int bit;
1307
1308 if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
1309 setup_clear_cpu_cap(bit);
1310 else
1311 return 0;
1312
1313 return 1; 1311 return 1;
1314} 1312}
1315__setup("clearcpuid=", setup_disablecpuid); 1313__setup("clearcpuid=", setup_clearcpuid);
1316 1314
1317#ifdef CONFIG_X86_64 1315#ifdef CONFIG_X86_64
1318DEFINE_PER_CPU_FIRST(union irq_stack_union, 1316DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1572,9 +1570,13 @@ void cpu_init(void)
1572 initialize_tlbstate_and_flush(); 1570 initialize_tlbstate_and_flush();
1573 enter_lazy_tlb(&init_mm, me); 1571 enter_lazy_tlb(&init_mm, me);
1574 1572
1575 load_sp0(t, &current->thread); 1573 /*
1574 * Initialize the TSS. Don't bother initializing sp0, as the initial
1575 * task never enters user mode.
1576 */
1576 set_tss_desc(cpu, t); 1577 set_tss_desc(cpu, t);
1577 load_TR_desc(); 1578 load_TR_desc();
1579
1578 load_mm_ldt(&init_mm); 1580 load_mm_ldt(&init_mm);
1579 1581
1580 clear_all_debug_regs(); 1582 clear_all_debug_regs();
@@ -1596,7 +1598,6 @@ void cpu_init(void)
1596 int cpu = smp_processor_id(); 1598 int cpu = smp_processor_id();
1597 struct task_struct *curr = current; 1599 struct task_struct *curr = current;
1598 struct tss_struct *t = &per_cpu(cpu_tss, cpu); 1600 struct tss_struct *t = &per_cpu(cpu_tss, cpu);
1599 struct thread_struct *thread = &curr->thread;
1600 1601
1601 wait_for_master_cpu(cpu); 1602 wait_for_master_cpu(cpu);
1602 1603
@@ -1627,9 +1628,13 @@ void cpu_init(void)
1627 initialize_tlbstate_and_flush(); 1628 initialize_tlbstate_and_flush();
1628 enter_lazy_tlb(&init_mm, curr); 1629 enter_lazy_tlb(&init_mm, curr);
1629 1630
1630 load_sp0(t, thread); 1631 /*
1632 * Initialize the TSS. Don't bother initializing sp0, as the initial
1633 * task never enters user mode.
1634 */
1631 set_tss_desc(cpu, t); 1635 set_tss_desc(cpu, t);
1632 load_TR_desc(); 1636 load_TR_desc();
1637
1633 load_mm_ldt(&init_mm); 1638 load_mm_ldt(&init_mm);
1634 1639
1635 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1640 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 000000000000..c21f22d836ad
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,125 @@
1/* Declare dependencies between CPUIDs */
2#include <linux/kernel.h>
3#include <linux/init.h>
4#include <linux/module.h>
5#include <asm/cpufeature.h>
6
7struct cpuid_dep {
8 unsigned int feature;
9 unsigned int depends;
10};
11
12/*
13 * Table of CPUID features that depend on others.
14 *
15 * This only includes dependencies that can be usefully disabled, not
16 * features part of the base set (like FPU).
17 *
18 * Note this all is not __init / __initdata because it can be
19 * called from cpu hotplug. It shouldn't do anything in this case,
20 * but it's difficult to tell that to the init reference checker.
21 */
22const static struct cpuid_dep cpuid_deps[] = {
23 { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
24 { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
25 { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
26 { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
27 { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
28 { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
29 { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
30 { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
31 { X86_FEATURE_XMM, X86_FEATURE_FXSR },
32 { X86_FEATURE_XMM2, X86_FEATURE_XMM },
33 { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
34 { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
35 { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
36 { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
37 { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
38 { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
39 { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
40 { X86_FEATURE_AES, X86_FEATURE_XMM2 },
41 { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
42 { X86_FEATURE_FMA, X86_FEATURE_AVX },
43 { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
44 { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
45 { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
46 { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
47 { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
48 { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
49 { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
50 { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
51 { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
52 { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
53 { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
54 { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
55 { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
56 { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
57 { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
58 { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
59 { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
60 { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
61 { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
62 {}
63};
64
65static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
66{
67 clear_bit32(bit, c->x86_capability);
68}
69
70static inline void __setup_clear_cpu_cap(unsigned int bit)
71{
72 clear_cpu_cap(&boot_cpu_data, bit);
73 set_bit32(bit, cpu_caps_cleared);
74}
75
76static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
77{
78 if (!c)
79 __setup_clear_cpu_cap(feature);
80 else
81 __clear_cpu_cap(c, feature);
82}
83
84/* Take the capabilities and the BUG bits into account */
85#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
86
87static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
88{
89 DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
90 const struct cpuid_dep *d;
91 bool changed;
92
93 if (WARN_ON(feature >= MAX_FEATURE_BITS))
94 return;
95
96 clear_feature(c, feature);
97
98 /* Collect all features to disable, handling dependencies */
99 memset(disable, 0, sizeof(disable));
100 __set_bit(feature, disable);
101
102 /* Loop until we get a stable state. */
103 do {
104 changed = false;
105 for (d = cpuid_deps; d->feature; d++) {
106 if (!test_bit(d->depends, disable))
107 continue;
108 if (__test_and_set_bit(d->feature, disable))
109 continue;
110
111 changed = true;
112 clear_feature(c, d->feature);
113 }
114 } while (changed);
115}
116
117void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
118{
119 do_clear_cpu_cap(c, feature);
120}
121
122void setup_clear_cpu_cap(unsigned int feature)
123{
124 do_clear_cpu_cap(NULL, feature);
125}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e3d9a5..6abd83572b01 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
249 */ 249 */
250static void __init fpu__init_parse_early_param(void) 250static void __init fpu__init_parse_early_param(void)
251{ 251{
252 char arg[32];
253 char *argptr = arg;
254 int bit;
255
252 if (cmdline_find_option_bool(boot_command_line, "no387")) 256 if (cmdline_find_option_bool(boot_command_line, "no387"))
253 setup_clear_cpu_cap(X86_FEATURE_FPU); 257 setup_clear_cpu_cap(X86_FEATURE_FPU);
254 258
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
266 270
267 if (cmdline_find_option_bool(boot_command_line, "noxsaves")) 271 if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
268 setup_clear_cpu_cap(X86_FEATURE_XSAVES); 272 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
273
274 if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
275 sizeof(arg)) &&
276 get_option(&argptr, &bit) &&
277 bit >= 0 &&
278 bit < NCAPINTS * 32)
279 setup_clear_cpu_cap(bit);
269} 280}
270 281
271/* 282/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476c9022..87a57b7642d3 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
15#include <asm/fpu/xstate.h> 15#include <asm/fpu/xstate.h>
16 16
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/cpufeature.h>
18 19
19/* 20/*
20 * Although we spell it out in here, the Processor Trace 21 * Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
36 "unknown xstate feature" , 37 "unknown xstate feature" ,
37}; 38};
38 39
40static short xsave_cpuid_features[] __initdata = {
41 X86_FEATURE_FPU,
42 X86_FEATURE_XMM,
43 X86_FEATURE_AVX,
44 X86_FEATURE_MPX,
45 X86_FEATURE_MPX,
46 X86_FEATURE_AVX512F,
47 X86_FEATURE_AVX512F,
48 X86_FEATURE_AVX512F,
49 X86_FEATURE_INTEL_PT,
50 X86_FEATURE_PKU,
51};
52
39/* 53/*
40 * Mask of xstate features supported by the CPU and the kernel: 54 * Mask of xstate features supported by the CPU and the kernel:
41 */ 55 */
@@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
59void fpu__xstate_clear_all_cpu_caps(void) 73void fpu__xstate_clear_all_cpu_caps(void)
60{ 74{
61 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 75 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
62 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
63 setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
64 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
65 setup_clear_cpu_cap(X86_FEATURE_AVX);
66 setup_clear_cpu_cap(X86_FEATURE_AVX2);
67 setup_clear_cpu_cap(X86_FEATURE_AVX512F);
68 setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
69 setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
70 setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
71 setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
72 setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
73 setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
74 setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
75 setup_clear_cpu_cap(X86_FEATURE_MPX);
76 setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
77 setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
78 setup_clear_cpu_cap(X86_FEATURE_PKU);
79 setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
80 setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
81 setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
82} 76}
83 77
84/* 78/*
@@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
726 unsigned int eax, ebx, ecx, edx; 720 unsigned int eax, ebx, ecx, edx;
727 static int on_boot_cpu __initdata = 1; 721 static int on_boot_cpu __initdata = 1;
728 int err; 722 int err;
723 int i;
729 724
730 WARN_ON_FPU(!on_boot_cpu); 725 WARN_ON_FPU(!on_boot_cpu);
731 on_boot_cpu = 0; 726 on_boot_cpu = 0;
@@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
759 goto out_disable; 754 goto out_disable;
760 } 755 }
761 756
757 /*
758 * Clear XSAVE features that are disabled in the normal CPUID.
759 */
760 for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
761 if (!boot_cpu_has(xsave_cpuid_features[i]))
762 xfeatures_mask &= ~BIT(i);
763 }
764
762 xfeatures_mask &= fpu__get_supported_xfeatures_mask(); 765 xfeatures_mask &= fpu__get_supported_xfeatures_mask();
763 766
764 /* Enable xstate instructions to be able to continue with initialization: */ 767 /* Enable xstate instructions to be able to continue with initialization: */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f1d528bb66a6..c29020907886 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -212,9 +212,6 @@ ENTRY(startup_32_smp)
212#endif 212#endif
213 213
214.Ldefault_entry: 214.Ldefault_entry:
215#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
216 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
217 X86_CR0_PG)
218 movl $(CR0_STATE & ~X86_CR0_PG),%eax 215 movl $(CR0_STATE & ~X86_CR0_PG),%eax
219 movl %eax,%cr0 216 movl %eax,%cr0
220 217
@@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array)
402 # 24(%rsp) error code 399 # 24(%rsp) error code
403 i = 0 400 i = 0
404 .rept NUM_EXCEPTION_VECTORS 401 .rept NUM_EXCEPTION_VECTORS
405 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 402 .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
406 pushl $0 # Dummy error code, to make stack frame uniform 403 pushl $0 # Dummy error code, to make stack frame uniform
407 .endif 404 .endif
408 pushl $i # 20(%esp) Vector number 405 pushl $i # 20(%esp) Vector number
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6dde3f3fc1f8..fd58835d8f9b 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -50,6 +50,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
50 .code64 50 .code64
51 .globl startup_64 51 .globl startup_64
52startup_64: 52startup_64:
53 UNWIND_HINT_EMPTY
53 /* 54 /*
54 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, 55 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
55 * and someone has loaded an identity mapped page table 56 * and someone has loaded an identity mapped page table
@@ -89,6 +90,7 @@ startup_64:
89 addq $(early_top_pgt - __START_KERNEL_map), %rax 90 addq $(early_top_pgt - __START_KERNEL_map), %rax
90 jmp 1f 91 jmp 1f
91ENTRY(secondary_startup_64) 92ENTRY(secondary_startup_64)
93 UNWIND_HINT_EMPTY
92 /* 94 /*
93 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, 95 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
94 * and someone has loaded a mapped page table. 96 * and someone has loaded a mapped page table.
@@ -133,6 +135,7 @@ ENTRY(secondary_startup_64)
133 movq $1f, %rax 135 movq $1f, %rax
134 jmp *%rax 136 jmp *%rax
1351: 1371:
138 UNWIND_HINT_EMPTY
136 139
137 /* Check if nx is implemented */ 140 /* Check if nx is implemented */
138 movl $0x80000001, %eax 141 movl $0x80000001, %eax
@@ -150,9 +153,6 @@ ENTRY(secondary_startup_64)
1501: wrmsr /* Make changes effective */ 1531: wrmsr /* Make changes effective */
151 154
152 /* Setup cr0 */ 155 /* Setup cr0 */
153#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
154 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
155 X86_CR0_PG)
156 movl $CR0_STATE, %eax 156 movl $CR0_STATE, %eax
157 /* Make changes effective */ 157 /* Make changes effective */
158 movq %rax, %cr0 158 movq %rax, %cr0
@@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
235 pushq %rax # target address in negative space 235 pushq %rax # target address in negative space
236 lretq 236 lretq
237.Lafter_lret: 237.Lafter_lret:
238ENDPROC(secondary_startup_64) 238END(secondary_startup_64)
239 239
240#include "verify_cpu.S" 240#include "verify_cpu.S"
241 241
@@ -247,6 +247,7 @@ ENDPROC(secondary_startup_64)
247 */ 247 */
248ENTRY(start_cpu0) 248ENTRY(start_cpu0)
249 movq initial_stack(%rip), %rsp 249 movq initial_stack(%rip), %rsp
250 UNWIND_HINT_EMPTY
250 jmp .Ljump_to_C_code 251 jmp .Ljump_to_C_code
251ENDPROC(start_cpu0) 252ENDPROC(start_cpu0)
252#endif 253#endif
@@ -266,26 +267,24 @@ ENDPROC(start_cpu0)
266 .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS 267 .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
267 __FINITDATA 268 __FINITDATA
268 269
269bad_address:
270 jmp bad_address
271
272 __INIT 270 __INIT
273ENTRY(early_idt_handler_array) 271ENTRY(early_idt_handler_array)
274 # 104(%rsp) %rflags
275 # 96(%rsp) %cs
276 # 88(%rsp) %rip
277 # 80(%rsp) error code
278 i = 0 272 i = 0
279 .rept NUM_EXCEPTION_VECTORS 273 .rept NUM_EXCEPTION_VECTORS
280 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 274 .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
281 pushq $0 # Dummy error code, to make stack frame uniform 275 UNWIND_HINT_IRET_REGS
276 pushq $0 # Dummy error code, to make stack frame uniform
277 .else
278 UNWIND_HINT_IRET_REGS offset=8
282 .endif 279 .endif
283 pushq $i # 72(%rsp) Vector number 280 pushq $i # 72(%rsp) Vector number
284 jmp early_idt_handler_common 281 jmp early_idt_handler_common
282 UNWIND_HINT_IRET_REGS
285 i = i + 1 283 i = i + 1
286 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc 284 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
287 .endr 285 .endr
288ENDPROC(early_idt_handler_array) 286 UNWIND_HINT_IRET_REGS offset=16
287END(early_idt_handler_array)
289 288
290early_idt_handler_common: 289early_idt_handler_common:
291 /* 290 /*
@@ -313,6 +312,7 @@ early_idt_handler_common:
313 pushq %r13 /* pt_regs->r13 */ 312 pushq %r13 /* pt_regs->r13 */
314 pushq %r14 /* pt_regs->r14 */ 313 pushq %r14 /* pt_regs->r14 */
315 pushq %r15 /* pt_regs->r15 */ 314 pushq %r15 /* pt_regs->r15 */
315 UNWIND_HINT_REGS
316 316
317 cmpq $14,%rsi /* Page fault? */ 317 cmpq $14,%rsi /* Page fault? */
318 jnz 10f 318 jnz 10f
@@ -327,8 +327,8 @@ early_idt_handler_common:
327 327
32820: 32820:
329 decl early_recursion_flag(%rip) 329 decl early_recursion_flag(%rip)
330 jmp restore_regs_and_iret 330 jmp restore_regs_and_return_to_kernel
331ENDPROC(early_idt_handler_common) 331END(early_idt_handler_common)
332 332
333 __INITDATA 333 __INITDATA
334 334
@@ -435,7 +435,7 @@ ENTRY(phys_base)
435EXPORT_SYMBOL(phys_base) 435EXPORT_SYMBOL(phys_base)
436 436
437#include "../../x86/xen/xen-head.S" 437#include "../../x86/xen/xen-head.S"
438 438
439 __PAGE_ALIGNED_BSS 439 __PAGE_ALIGNED_BSS
440NEXT_PAGE(empty_zero_page) 440NEXT_PAGE(empty_zero_page)
441 .skip PAGE_SIZE 441 .skip PAGE_SIZE
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 4d17bacf4030..ae5615b03def 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -13,6 +13,7 @@
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/syscalls.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
18#include <linux/uaccess.h> 19#include <linux/uaccess.h>
@@ -295,8 +296,8 @@ out:
295 return error; 296 return error;
296} 297}
297 298
298asmlinkage int sys_modify_ldt(int func, void __user *ptr, 299SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
299 unsigned long bytecount) 300 unsigned long , bytecount)
300{ 301{
301 int ret = -ENOSYS; 302 int ret = -ENOSYS;
302 303
@@ -314,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
314 ret = write_ldt(ptr, bytecount, 0); 315 ret = write_ldt(ptr, bytecount, 0);
315 break; 316 break;
316 } 317 }
317 return ret; 318 /*
319 * The SYSCALL_DEFINE() macros give us an 'unsigned long'
320 * return type, but tht ABI for sys_modify_ldt() expects
321 * 'int'. This cast gives us an int-sized value in %rax
322 * for the return code. The 'unsigned' is necessary so
323 * the compiler does not try to sign-extend the negative
324 * return codes into the high half of the register when
325 * taking the value from int->long.
326 */
327 return (unsigned int)ret;
318} 328}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c67685337c5a..97fb3e5737f5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -49,7 +49,13 @@
49 */ 49 */
50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { 50__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
51 .x86_tss = { 51 .x86_tss = {
52 .sp0 = TOP_OF_INIT_STACK, 52 /*
53 * .sp0 is only used when entering ring 0 from a lower
54 * privilege level. Since the init task never runs anything
55 * but ring 0 code, there is no need for a valid value here.
56 * Poison it.
57 */
58 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
53#ifdef CONFIG_X86_32 59#ifdef CONFIG_X86_32
54 .ss0 = __KERNEL_DS, 60 .ss0 = __KERNEL_DS,
55 .ss1 = __KERNEL_CS, 61 .ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 11966251cd42..45bf0c5f93e1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
284 284
285 /* 285 /*
286 * Reload esp0 and cpu_current_top_of_stack. This changes 286 * Reload esp0 and cpu_current_top_of_stack. This changes
287 * current_thread_info(). 287 * current_thread_info(). Refresh the SYSENTER configuration in
288 * case prev or next is vm86.
288 */ 289 */
289 load_sp0(tss, next); 290 update_sp0(next_p);
291 refresh_sysenter_cs(next);
290 this_cpu_write(cpu_current_top_of_stack, 292 this_cpu_write(cpu_current_top_of_stack,
291 (unsigned long)task_stack_page(next_p) + 293 (unsigned long)task_stack_page(next_p) +
292 THREAD_SIZE); 294 THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 302e7b2572d1..eeeb34f85c25 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
274 struct inactive_task_frame *frame; 274 struct inactive_task_frame *frame;
275 struct task_struct *me = current; 275 struct task_struct *me = current;
276 276
277 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
278 childregs = task_pt_regs(p); 277 childregs = task_pt_regs(p);
279 fork_frame = container_of(childregs, struct fork_frame, regs); 278 fork_frame = container_of(childregs, struct fork_frame, regs);
280 frame = &fork_frame->frame; 279 frame = &fork_frame->frame;
@@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
464 */ 463 */
465 this_cpu_write(current_task, next_p); 464 this_cpu_write(current_task, next_p);
466 465
467 /* Reload esp0 and ss1. This changes current_thread_info(). */ 466 /* Reload sp0. */
468 load_sp0(tss, next); 467 update_sp0(next_p);
469 468
470 /* 469 /*
471 * Now maybe reload the debug registers and handle I/O bitmaps 470 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 65a0ccdc3050..d56c1d209283 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
962#ifdef CONFIG_X86_32 962#ifdef CONFIG_X86_32
963 /* Stack for startup_32 can be just as for start_secondary onwards */ 963 /* Stack for startup_32 can be just as for start_secondary onwards */
964 irq_ctx_init(cpu); 964 irq_ctx_init(cpu);
965 per_cpu(cpu_current_top_of_stack, cpu) = 965 per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
966 (unsigned long)task_stack_page(idle) + THREAD_SIZE;
967#else 966#else
968 initial_gs = per_cpu_offset(cpu); 967 initial_gs = per_cpu_offset(cpu);
969#endif 968#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5a6b8f809792..d366adfc61da 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
141 * will catch asm bugs and any attempt to use ist_preempt_enable 141 * will catch asm bugs and any attempt to use ist_preempt_enable
142 * from double_fault. 142 * from double_fault.
143 */ 143 */
144 BUG_ON((unsigned long)(current_top_of_stack() - 144 BUG_ON(!on_thread_stack());
145 current_stack_pointer) >= THREAD_SIZE);
146 145
147 preempt_enable_no_resched(); 146 preempt_enable_no_resched();
148} 147}
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 014ea59aa153..3d3c2f71f617 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -33,7 +33,7 @@
33#include <asm/cpufeatures.h> 33#include <asm/cpufeatures.h>
34#include <asm/msr-index.h> 34#include <asm/msr-index.h>
35 35
36verify_cpu: 36ENTRY(verify_cpu)
37 pushf # Save caller passed flags 37 pushf # Save caller passed flags
38 push $0 # Kill any dangerous flags 38 push $0 # Kill any dangerous flags
39 popf 39 popf
@@ -139,3 +139,4 @@ verify_cpu:
139 popf # Restore caller passed flags 139 popf # Restore caller passed flags
140 xorl %eax, %eax 140 xorl %eax, %eax
141 ret 141 ret
142ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 68244742ecb0..5edb27f1a2c4 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -55,6 +55,7 @@
55#include <asm/irq.h> 55#include <asm/irq.h>
56#include <asm/traps.h> 56#include <asm/traps.h>
57#include <asm/vm86.h> 57#include <asm/vm86.h>
58#include <asm/switch_to.h>
58 59
59/* 60/*
60 * Known problems: 61 * Known problems:
@@ -94,7 +95,6 @@
94 95
95void save_v86_state(struct kernel_vm86_regs *regs, int retval) 96void save_v86_state(struct kernel_vm86_regs *regs, int retval)
96{ 97{
97 struct tss_struct *tss;
98 struct task_struct *tsk = current; 98 struct task_struct *tsk = current;
99 struct vm86plus_struct __user *user; 99 struct vm86plus_struct __user *user;
100 struct vm86 *vm86 = current->thread.vm86; 100 struct vm86 *vm86 = current->thread.vm86;
@@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
146 do_exit(SIGSEGV); 146 do_exit(SIGSEGV);
147 } 147 }
148 148
149 tss = &per_cpu(cpu_tss, get_cpu()); 149 preempt_disable();
150 tsk->thread.sp0 = vm86->saved_sp0; 150 tsk->thread.sp0 = vm86->saved_sp0;
151 tsk->thread.sysenter_cs = __KERNEL_CS; 151 tsk->thread.sysenter_cs = __KERNEL_CS;
152 load_sp0(tss, &tsk->thread); 152 update_sp0(tsk);
153 refresh_sysenter_cs(&tsk->thread);
153 vm86->saved_sp0 = 0; 154 vm86->saved_sp0 = 0;
154 put_cpu(); 155 preempt_enable();
155 156
156 memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs)); 157 memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
157 158
@@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
237 238
238static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) 239static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
239{ 240{
240 struct tss_struct *tss;
241 struct task_struct *tsk = current; 241 struct task_struct *tsk = current;
242 struct vm86 *vm86 = tsk->thread.vm86; 242 struct vm86 *vm86 = tsk->thread.vm86;
243 struct kernel_vm86_regs vm86regs; 243 struct kernel_vm86_regs vm86regs;
@@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
365 vm86->saved_sp0 = tsk->thread.sp0; 365 vm86->saved_sp0 = tsk->thread.sp0;
366 lazy_save_gs(vm86->regs32.gs); 366 lazy_save_gs(vm86->regs32.gs);
367 367
368 tss = &per_cpu(cpu_tss, get_cpu());
369 /* make room for real-mode segments */ 368 /* make room for real-mode segments */
369 preempt_disable();
370 tsk->thread.sp0 += 16; 370 tsk->thread.sp0 += 16;
371 371
372 if (static_cpu_has(X86_FEATURE_SEP)) 372 if (static_cpu_has(X86_FEATURE_SEP)) {
373 tsk->thread.sysenter_cs = 0; 373 tsk->thread.sysenter_cs = 0;
374 refresh_sysenter_cs(&tsk->thread);
375 }
374 376
375 load_sp0(tss, &tsk->thread); 377 update_sp0(tsk);
376 put_cpu(); 378 preempt_enable();
377 379
378 if (vm86->flags & VM86_SCREEN_BITMAP) 380 if (vm86->flags & VM86_SCREEN_BITMAP)
379 mark_screen_rdonly(tsk->mm); 381 mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b0ff378650a9..3109ba6c6ede 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -30,26 +30,6 @@
30#include <asm/trace/exceptions.h> 30#include <asm/trace/exceptions.h>
31 31
32/* 32/*
33 * Page fault error code bits:
34 *
35 * bit 0 == 0: no page found 1: protection fault
36 * bit 1 == 0: read access 1: write access
37 * bit 2 == 0: kernel-mode access 1: user-mode access
38 * bit 3 == 1: use of reserved bit detected
39 * bit 4 == 1: fault was an instruction fetch
40 * bit 5 == 1: protection keys block access
41 */
42enum x86_pf_error_code {
43
44 PF_PROT = 1 << 0,
45 PF_WRITE = 1 << 1,
46 PF_USER = 1 << 2,
47 PF_RSVD = 1 << 3,
48 PF_INSTR = 1 << 4,
49 PF_PK = 1 << 5,
50};
51
52/*
53 * Returns 0 if mmiotrace is disabled, or if the fault is not 33 * Returns 0 if mmiotrace is disabled, or if the fault is not
54 * handled by mmiotrace: 34 * handled by mmiotrace:
55 */ 35 */
@@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
150 * If it was a exec (instruction fetch) fault on NX page, then 130 * If it was a exec (instruction fetch) fault on NX page, then
151 * do not ignore the fault: 131 * do not ignore the fault:
152 */ 132 */
153 if (error_code & PF_INSTR) 133 if (error_code & X86_PF_INSTR)
154 return 0; 134 return 0;
155 135
156 instr = (void *)convert_ip_to_linear(current, regs); 136 instr = (void *)convert_ip_to_linear(current, regs);
@@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
180 * siginfo so userspace can discover which protection key was set 160 * siginfo so userspace can discover which protection key was set
181 * on the PTE. 161 * on the PTE.
182 * 162 *
183 * If we get here, we know that the hardware signaled a PF_PK 163 * If we get here, we know that the hardware signaled a X86_PF_PK
184 * fault and that there was a VMA once we got in the fault 164 * fault and that there was a VMA once we got in the fault
185 * handler. It does *not* guarantee that the VMA we find here 165 * handler. It does *not* guarantee that the VMA we find here
186 * was the one that we faulted on. 166 * was the one that we faulted on.
@@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
205 /* 185 /*
206 * force_sig_info_fault() is called from a number of 186 * force_sig_info_fault() is called from a number of
207 * contexts, some of which have a VMA and some of which 187 * contexts, some of which have a VMA and some of which
208 * do not. The PF_PK handing happens after we have a 188 * do not. The X86_PF_PK handing happens after we have a
209 * valid VMA, so we should never reach this without a 189 * valid VMA, so we should never reach this without a
210 * valid VMA. 190 * valid VMA.
211 */ 191 */
@@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
698 if (!oops_may_print()) 678 if (!oops_may_print())
699 return; 679 return;
700 680
701 if (error_code & PF_INSTR) { 681 if (error_code & X86_PF_INSTR) {
702 unsigned int level; 682 unsigned int level;
703 pgd_t *pgd; 683 pgd_t *pgd;
704 pte_t *pte; 684 pte_t *pte;
@@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
780 */ 760 */
781 if (current->thread.sig_on_uaccess_err && signal) { 761 if (current->thread.sig_on_uaccess_err && signal) {
782 tsk->thread.trap_nr = X86_TRAP_PF; 762 tsk->thread.trap_nr = X86_TRAP_PF;
783 tsk->thread.error_code = error_code | PF_USER; 763 tsk->thread.error_code = error_code | X86_PF_USER;
784 tsk->thread.cr2 = address; 764 tsk->thread.cr2 = address;
785 765
786 /* XXX: hwpoison faults will set the wrong code. */ 766 /* XXX: hwpoison faults will set the wrong code. */
@@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
898 struct task_struct *tsk = current; 878 struct task_struct *tsk = current;
899 879
900 /* User mode accesses just cause a SIGSEGV */ 880 /* User mode accesses just cause a SIGSEGV */
901 if (error_code & PF_USER) { 881 if (error_code & X86_PF_USER) {
902 /* 882 /*
903 * It's possible to have interrupts off here: 883 * It's possible to have interrupts off here:
904 */ 884 */
@@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
919 * Instruction fetch faults in the vsyscall page might need 899 * Instruction fetch faults in the vsyscall page might need
920 * emulation. 900 * emulation.
921 */ 901 */
922 if (unlikely((error_code & PF_INSTR) && 902 if (unlikely((error_code & X86_PF_INSTR) &&
923 ((address & ~0xfff) == VSYSCALL_ADDR))) { 903 ((address & ~0xfff) == VSYSCALL_ADDR))) {
924 if (emulate_vsyscall(regs, address)) 904 if (emulate_vsyscall(regs, address))
925 return; 905 return;
@@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
932 * are always protection faults. 912 * are always protection faults.
933 */ 913 */
934 if (address >= TASK_SIZE_MAX) 914 if (address >= TASK_SIZE_MAX)
935 error_code |= PF_PROT; 915 error_code |= X86_PF_PROT;
936 916
937 if (likely(show_unhandled_signals)) 917 if (likely(show_unhandled_signals))
938 show_signal_msg(regs, error_code, address, tsk); 918 show_signal_msg(regs, error_code, address, tsk);
@@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
993 973
994 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 974 if (!boot_cpu_has(X86_FEATURE_OSPKE))
995 return false; 975 return false;
996 if (error_code & PF_PK) 976 if (error_code & X86_PF_PK)
997 return true; 977 return true;
998 /* this checks permission keys on the VMA: */ 978 /* this checks permission keys on the VMA: */
999 if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), 979 if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
1000 (error_code & PF_INSTR), foreign)) 980 (error_code & X86_PF_INSTR), foreign))
1001 return true; 981 return true;
1002 return false; 982 return false;
1003} 983}
@@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
1025 int code = BUS_ADRERR; 1005 int code = BUS_ADRERR;
1026 1006
1027 /* Kernel mode? Handle exceptions or die: */ 1007 /* Kernel mode? Handle exceptions or die: */
1028 if (!(error_code & PF_USER)) { 1008 if (!(error_code & X86_PF_USER)) {
1029 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); 1009 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
1030 return; 1010 return;
1031 } 1011 }
@@ -1053,14 +1033,14 @@ static noinline void
1053mm_fault_error(struct pt_regs *regs, unsigned long error_code, 1033mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1054 unsigned long address, u32 *pkey, unsigned int fault) 1034 unsigned long address, u32 *pkey, unsigned int fault)
1055{ 1035{
1056 if (fatal_signal_pending(current) && !(error_code & PF_USER)) { 1036 if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
1057 no_context(regs, error_code, address, 0, 0); 1037 no_context(regs, error_code, address, 0, 0);
1058 return; 1038 return;
1059 } 1039 }
1060 1040
1061 if (fault & VM_FAULT_OOM) { 1041 if (fault & VM_FAULT_OOM) {
1062 /* Kernel mode? Handle exceptions or die: */ 1042 /* Kernel mode? Handle exceptions or die: */
1063 if (!(error_code & PF_USER)) { 1043 if (!(error_code & X86_PF_USER)) {
1064 no_context(regs, error_code, address, 1044 no_context(regs, error_code, address,
1065 SIGSEGV, SEGV_MAPERR); 1045 SIGSEGV, SEGV_MAPERR);
1066 return; 1046 return;
@@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1085 1065
1086static int spurious_fault_check(unsigned long error_code, pte_t *pte) 1066static int spurious_fault_check(unsigned long error_code, pte_t *pte)
1087{ 1067{
1088 if ((error_code & PF_WRITE) && !pte_write(*pte)) 1068 if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
1089 return 0; 1069 return 0;
1090 1070
1091 if ((error_code & PF_INSTR) && !pte_exec(*pte)) 1071 if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
1092 return 0; 1072 return 0;
1093 /* 1073 /*
1094 * Note: We do not do lazy flushing on protection key 1074 * Note: We do not do lazy flushing on protection key
1095 * changes, so no spurious fault will ever set PF_PK. 1075 * changes, so no spurious fault will ever set X86_PF_PK.
1096 */ 1076 */
1097 if ((error_code & PF_PK)) 1077 if ((error_code & X86_PF_PK))
1098 return 1; 1078 return 1;
1099 1079
1100 return 1; 1080 return 1;
@@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
1140 * change, so user accesses are not expected to cause spurious 1120 * change, so user accesses are not expected to cause spurious
1141 * faults. 1121 * faults.
1142 */ 1122 */
1143 if (error_code != (PF_WRITE | PF_PROT) 1123 if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
1144 && error_code != (PF_INSTR | PF_PROT)) 1124 error_code != (X86_PF_INSTR | X86_PF_PROT))
1145 return 0; 1125 return 0;
1146 1126
1147 pgd = init_mm.pgd + pgd_index(address); 1127 pgd = init_mm.pgd + pgd_index(address);
@@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
1201 * always an unconditional error and can never result in 1181 * always an unconditional error and can never result in
1202 * a follow-up action to resolve the fault, like a COW. 1182 * a follow-up action to resolve the fault, like a COW.
1203 */ 1183 */
1204 if (error_code & PF_PK) 1184 if (error_code & X86_PF_PK)
1205 return 1; 1185 return 1;
1206 1186
1207 /* 1187 /*
1208 * Make sure to check the VMA so that we do not perform 1188 * Make sure to check the VMA so that we do not perform
1209 * faults just to hit a PF_PK as soon as we fill in a 1189 * faults just to hit a X86_PF_PK as soon as we fill in a
1210 * page. 1190 * page.
1211 */ 1191 */
1212 if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), 1192 if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
1213 (error_code & PF_INSTR), foreign)) 1193 (error_code & X86_PF_INSTR), foreign))
1214 return 1; 1194 return 1;
1215 1195
1216 if (error_code & PF_WRITE) { 1196 if (error_code & X86_PF_WRITE) {
1217 /* write, present and write, not present: */ 1197 /* write, present and write, not present: */
1218 if (unlikely(!(vma->vm_flags & VM_WRITE))) 1198 if (unlikely(!(vma->vm_flags & VM_WRITE)))
1219 return 1; 1199 return 1;
@@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
1221 } 1201 }
1222 1202
1223 /* read, present: */ 1203 /* read, present: */
1224 if (unlikely(error_code & PF_PROT)) 1204 if (unlikely(error_code & X86_PF_PROT))
1225 return 1; 1205 return 1;
1226 1206
1227 /* read, not present: */ 1207 /* read, not present: */
@@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
1244 if (!static_cpu_has(X86_FEATURE_SMAP)) 1224 if (!static_cpu_has(X86_FEATURE_SMAP))
1245 return false; 1225 return false;
1246 1226
1247 if (error_code & PF_USER) 1227 if (error_code & X86_PF_USER)
1248 return false; 1228 return false;
1249 1229
1250 if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) 1230 if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
@@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1297 * protection error (error_code & 9) == 0. 1277 * protection error (error_code & 9) == 0.
1298 */ 1278 */
1299 if (unlikely(fault_in_kernel_space(address))) { 1279 if (unlikely(fault_in_kernel_space(address))) {
1300 if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { 1280 if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
1301 if (vmalloc_fault(address) >= 0) 1281 if (vmalloc_fault(address) >= 0)
1302 return; 1282 return;
1303 1283
@@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1325 if (unlikely(kprobes_fault(regs))) 1305 if (unlikely(kprobes_fault(regs)))
1326 return; 1306 return;
1327 1307
1328 if (unlikely(error_code & PF_RSVD)) 1308 if (unlikely(error_code & X86_PF_RSVD))
1329 pgtable_bad(regs, error_code, address); 1309 pgtable_bad(regs, error_code, address);
1330 1310
1331 if (unlikely(smap_violation(error_code, regs))) { 1311 if (unlikely(smap_violation(error_code, regs))) {
@@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1351 */ 1331 */
1352 if (user_mode(regs)) { 1332 if (user_mode(regs)) {
1353 local_irq_enable(); 1333 local_irq_enable();
1354 error_code |= PF_USER; 1334 error_code |= X86_PF_USER;
1355 flags |= FAULT_FLAG_USER; 1335 flags |= FAULT_FLAG_USER;
1356 } else { 1336 } else {
1357 if (regs->flags & X86_EFLAGS_IF) 1337 if (regs->flags & X86_EFLAGS_IF)
@@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1360 1340
1361 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 1341 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1362 1342
1363 if (error_code & PF_WRITE) 1343 if (error_code & X86_PF_WRITE)
1364 flags |= FAULT_FLAG_WRITE; 1344 flags |= FAULT_FLAG_WRITE;
1365 if (error_code & PF_INSTR) 1345 if (error_code & X86_PF_INSTR)
1366 flags |= FAULT_FLAG_INSTRUCTION; 1346 flags |= FAULT_FLAG_INSTRUCTION;
1367 1347
1368 /* 1348 /*
@@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1382 * space check, thus avoiding the deadlock: 1362 * space check, thus avoiding the deadlock:
1383 */ 1363 */
1384 if (unlikely(!down_read_trylock(&mm->mmap_sem))) { 1364 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
1385 if ((error_code & PF_USER) == 0 && 1365 if (!(error_code & X86_PF_USER) &&
1386 !search_exception_tables(regs->ip)) { 1366 !search_exception_tables(regs->ip)) {
1387 bad_area_nosemaphore(regs, error_code, address, NULL); 1367 bad_area_nosemaphore(regs, error_code, address, NULL);
1388 return; 1368 return;
@@ -1409,7 +1389,7 @@ retry:
1409 bad_area(regs, error_code, address); 1389 bad_area(regs, error_code, address);
1410 return; 1390 return;
1411 } 1391 }
1412 if (error_code & PF_USER) { 1392 if (error_code & X86_PF_USER) {
1413 /* 1393 /*
1414 * Accessing the stack below %sp is always a bug. 1394 * Accessing the stack below %sp is always a bug.
1415 * The large cushion allows instructions like enter 1395 * The large cushion allows instructions like enter
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 836a1eb5df43..3ee234b6234d 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -6,6 +6,7 @@
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/syscalls.h>
9#include <linux/uaccess.h> 10#include <linux/uaccess.h>
10#include <asm/unistd.h> 11#include <asm/unistd.h>
11#include <os.h> 12#include <os.h>
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
369 mm->arch.ldt.entry_count = 0; 370 mm->arch.ldt.entry_count = 0;
370} 371}
371 372
372int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) 373SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
374 unsigned long , bytecount)
373{ 375{
374 return do_modify_ldt_skas(func, ptr, bytecount); 376 /* See non-um modify_ldt() for why we do this cast */
377 return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
375} 378}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d4396e27b1fb..e55d276afc70 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = {
601#ifdef CONFIG_X86_MCE 601#ifdef CONFIG_X86_MCE
602 { machine_check, xen_machine_check, true }, 602 { machine_check, xen_machine_check, true },
603#endif 603#endif
604 { nmi, xen_nmi, true }, 604 { nmi, xen_xennmi, true },
605 { overflow, xen_overflow, false }, 605 { overflow, xen_overflow, false },
606#ifdef CONFIG_IA32_EMULATION 606#ifdef CONFIG_IA32_EMULATION
607 { entry_INT80_compat, xen_entry_INT80_compat, false }, 607 { entry_INT80_compat, xen_entry_INT80_compat, false },
@@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
811 } 811 }
812} 812}
813 813
814static void xen_load_sp0(struct tss_struct *tss, 814static void xen_load_sp0(unsigned long sp0)
815 struct thread_struct *thread)
816{ 815{
817 struct multicall_space mcs; 816 struct multicall_space mcs;
818 817
819 mcs = xen_mc_entry(0); 818 mcs = xen_mc_entry(0);
820 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); 819 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
821 xen_mc_issue(PARAVIRT_LAZY_CPU); 820 xen_mc_issue(PARAVIRT_LAZY_CPU);
822 tss->x86_tss.sp0 = thread->sp0; 821 this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
823} 822}
824 823
825void xen_set_iopl_mask(unsigned mask) 824void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 05f91ce9b55e..c0c756c76afe 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -14,6 +14,7 @@
14 * single-threaded. 14 * single-threaded.
15 */ 15 */
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/sched/task_stack.h>
17#include <linux/err.h> 18#include <linux/err.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
19#include <linux/smp.h> 20#include <linux/smp.h>
@@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
294#endif 295#endif
295 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 296 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
296 297
298 /*
299 * Bring up the CPU in cpu_bringup_and_idle() with the stack
300 * pointing just below where pt_regs would be if it were a normal
301 * kernel entry.
302 */
297 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 303 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
298 ctxt->flags = VGCF_IN_KERNEL; 304 ctxt->flags = VGCF_IN_KERNEL;
299 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 305 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
300 ctxt->user_regs.ds = __USER_DS; 306 ctxt->user_regs.ds = __USER_DS;
301 ctxt->user_regs.es = __USER_DS; 307 ctxt->user_regs.es = __USER_DS;
302 ctxt->user_regs.ss = __KERNEL_DS; 308 ctxt->user_regs.ss = __KERNEL_DS;
309 ctxt->user_regs.cs = __KERNEL_CS;
310 ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
303 311
304 xen_copy_trap_info(ctxt->trap_ctxt); 312 xen_copy_trap_info(ctxt->trap_ctxt);
305 313
@@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
314 ctxt->gdt_frames[0] = gdt_mfn; 322 ctxt->gdt_frames[0] = gdt_mfn;
315 ctxt->gdt_ents = GDT_ENTRIES; 323 ctxt->gdt_ents = GDT_ENTRIES;
316 324
325 /*
326 * Set SS:SP that Xen will use when entering guest kernel mode
327 * from guest user mode. Subsequent calls to load_sp0() can
328 * change this value.
329 */
317 ctxt->kernel_ss = __KERNEL_DS; 330 ctxt->kernel_ss = __KERNEL_DS;
318 ctxt->kernel_sp = idle->thread.sp0; 331 ctxt->kernel_sp = task_top_of_stack(idle);
319 332
320#ifdef CONFIG_X86_32 333#ifdef CONFIG_X86_32
321 ctxt->event_callback_cs = __KERNEL_CS; 334 ctxt->event_callback_cs = __KERNEL_CS;
@@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
327 (unsigned long)xen_hypervisor_callback; 340 (unsigned long)xen_hypervisor_callback;
328 ctxt->failsafe_callback_eip = 341 ctxt->failsafe_callback_eip =
329 (unsigned long)xen_failsafe_callback; 342 (unsigned long)xen_failsafe_callback;
330 ctxt->user_regs.cs = __KERNEL_CS;
331 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 343 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
332 344
333 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
334 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); 345 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
335 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) 346 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
336 BUG(); 347 BUG();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c98a48c861fd..8a10c9a9e2b5 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -30,7 +30,7 @@ xen_pv_trap debug
30xen_pv_trap xendebug 30xen_pv_trap xendebug
31xen_pv_trap int3 31xen_pv_trap int3
32xen_pv_trap xenint3 32xen_pv_trap xenint3
33xen_pv_trap nmi 33xen_pv_trap xennmi
34xen_pv_trap overflow 34xen_pv_trap overflow
35xen_pv_trap bounds 35xen_pv_trap bounds
36xen_pv_trap invalid_op 36xen_pv_trap invalid_op
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index b5b8d7f43557..497cc55a0c16 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -10,6 +10,7 @@
10#include <asm/boot.h> 10#include <asm/boot.h>
11#include <asm/asm.h> 11#include <asm/asm.h>
12#include <asm/page_types.h> 12#include <asm/page_types.h>
13#include <asm/unwind_hints.h>
13 14
14#include <xen/interface/elfnote.h> 15#include <xen/interface/elfnote.h>
15#include <xen/interface/features.h> 16#include <xen/interface/features.h>
@@ -20,6 +21,7 @@
20#ifdef CONFIG_XEN_PV 21#ifdef CONFIG_XEN_PV
21 __INIT 22 __INIT
22ENTRY(startup_xen) 23ENTRY(startup_xen)
24 UNWIND_HINT_EMPTY
23 cld 25 cld
24 26
25 /* Clear .bss */ 27 /* Clear .bss */
@@ -34,21 +36,24 @@ ENTRY(startup_xen)
34 mov $init_thread_union+THREAD_SIZE, %_ASM_SP 36 mov $init_thread_union+THREAD_SIZE, %_ASM_SP
35 37
36 jmp xen_start_kernel 38 jmp xen_start_kernel
37 39END(startup_xen)
38 __FINIT 40 __FINIT
39#endif 41#endif
40 42
41.pushsection .text 43.pushsection .text
42 .balign PAGE_SIZE 44 .balign PAGE_SIZE
43ENTRY(hypercall_page) 45ENTRY(hypercall_page)
44 .skip PAGE_SIZE 46 .rept (PAGE_SIZE / 32)
47 UNWIND_HINT_EMPTY
48 .skip 32
49 .endr
45 50
46#define HYPERCALL(n) \ 51#define HYPERCALL(n) \
47 .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ 52 .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
48 .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 53 .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
49#include <asm/xen-hypercalls.h> 54#include <asm/xen-hypercalls.h>
50#undef HYPERCALL 55#undef HYPERCALL
51 56END(hypercall_page)
52.popsection 57.popsection
53 58
54 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") 59 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8acfc1e099e1..63e56f6c1877 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -687,7 +687,7 @@
687#define BUG_TABLE 687#define BUG_TABLE
688#endif 688#endif
689 689
690#ifdef CONFIG_ORC_UNWINDER 690#ifdef CONFIG_UNWINDER_ORC
691#define ORC_UNWIND_TABLE \ 691#define ORC_UNWIND_TABLE \
692 . = ALIGN(4); \ 692 . = ALIGN(4); \
693 .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ 693 .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index d03c5dd6185d..8a7e9924df57 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -228,6 +228,32 @@ static inline unsigned long __ffs64(u64 word)
228 return __ffs((unsigned long)word); 228 return __ffs((unsigned long)word);
229} 229}
230 230
231/*
232 * clear_bit32 - Clear a bit in memory for u32 array
233 * @nr: Bit to clear
234 * @addr: u32 * address of bitmap
235 *
236 * Same as clear_bit, but avoids needing casts for u32 arrays.
237 */
238
239static __always_inline void clear_bit32(long nr, volatile u32 *addr)
240{
241 clear_bit(nr, (volatile unsigned long *)addr);
242}
243
244/*
245 * set_bit32 - Set a bit in memory for u32 array
246 * @nr: Bit to clear
247 * @addr: u32 * address of bitmap
248 *
249 * Same as set_bit, but avoids needing casts for u32 arrays.
250 */
251
252static __always_inline void set_bit32(long nr, volatile u32 *addr)
253{
254 set_bit(nr, (volatile unsigned long *)addr);
255}
256
231#ifdef __KERNEL__ 257#ifdef __KERNEL__
232 258
233#ifndef set_mask_bits 259#ifndef set_mask_bits
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dfdad67d8f6c..ff21b4dbb392 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -376,7 +376,7 @@ config STACK_VALIDATION
376 that runtime stack traces are more reliable. 376 that runtime stack traces are more reliable.
377 377
378 This is also a prerequisite for generation of ORC unwind data, which 378 This is also a prerequisite for generation of ORC unwind data, which
379 is needed for CONFIG_ORC_UNWINDER. 379 is needed for CONFIG_UNWINDER_ORC.
380 380
381 For more information, see 381 For more information, see
382 tools/objtool/Documentation/stack-validation.txt. 382 tools/objtool/Documentation/stack-validation.txt.
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index bb831d49bcfd..e63af4e19382 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
259 259
260__objtool_obj := $(objtree)/tools/objtool/objtool 260__objtool_obj := $(objtree)/tools/objtool/objtool
261 261
262objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check) 262objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
263 263
264ifndef CONFIG_FRAME_POINTER 264ifndef CONFIG_FRAME_POINTER
265objtool_args += --no-fp 265objtool_args += --no-fp
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c0e26ad1fa7e..9b341584eb1b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
1757 if (insn->dead_end) 1757 if (insn->dead_end)
1758 return 0; 1758 return 0;
1759 1759
1760 insn = next_insn; 1760 if (!next_insn) {
1761 if (!insn) { 1761 if (state.cfa.base == CFI_UNDEFINED)
1762 return 0;
1762 WARN("%s: unexpected end of section", sec->name); 1763 WARN("%s: unexpected end of section", sec->name);
1763 return 1; 1764 return 1;
1764 } 1765 }
1766
1767 insn = next_insn;
1765 } 1768 }
1766 1769
1767 return 0; 1770 return 0;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 31e0f9143840..07f329919828 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -70,7 +70,7 @@ static void cmd_usage(void)
70 70
71 printf("\n"); 71 printf("\n");
72 72
73 exit(1); 73 exit(129);
74} 74}
75 75
76static void handle_options(int *argc, const char ***argv) 76static void handle_options(int *argc, const char ***argv)
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
86 break; 86 break;
87 } else { 87 } else {
88 fprintf(stderr, "Unknown option: %s\n", cmd); 88 fprintf(stderr, "Unknown option: %s\n", cmd);
89 fprintf(stderr, "\n Usage: %s\n", 89 cmd_usage();
90 objtool_usage_string);
91 exit(1);
92 } 90 }
93 91
94 (*argv)++; 92 (*argv)++;