summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-08 19:59:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-08 19:59:34 -0400
commita1aab6f3d295f078c008893ee7fa2c011626c46f (patch)
tree1ae344c8ed1b1dc531be1ae99188c180ad0c6c7c
parentdad1c12ed831a7a89cc01e5582cd0b81a4be7f19 (diff)
parent7457c0da024b181a9143988d740001f9bc98698d (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm updates from Ingo Molnar: "Most of the changes relate to Peter Zijlstra's cleanup of ptregs handling, in particular the i386 part is now much simplified and standardized - no more partial ptregs stack frames via the esp/ss oddity. This simplifies ftrace, kprobes, the unwinder, ptrace, kdump and kgdb. There's also a CR4 hardening enhancements by Kees Cook, to make the generic platform functions such as native_write_cr4() less useful as ROP gadgets that disable SMEP/SMAP. Also protect the WP bit of CR0 against similar attacks. The rest is smaller cleanups/fixes" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/alternatives: Add int3_emulate_call() selftest x86/stackframe/32: Allow int3_emulate_push() x86/stackframe/32: Provide consistent pt_regs x86/stackframe, x86/ftrace: Add pt_regs frame annotations x86/stackframe, x86/kprobes: Fix frame pointer annotations x86/stackframe: Move ENCODE_FRAME_POINTER to asm/frame.h x86/entry/32: Clean up return from interrupt preemption path x86/asm: Pin sensitive CR0 bits x86/asm: Pin sensitive CR4 bits Documentation/x86: Fix path to entry_32.S x86/asm: Remove unused TASK_TI_flags from asm-offsets.c
-rw-r--r--Documentation/x86/exception-tables.rst2
-rw-r--r--arch/x86/entry/calling.h15
-rw-r--r--arch/x86/entry/entry_32.S145
-rw-r--r--arch/x86/include/asm/frame.h49
-rw-r--r--arch/x86/include/asm/kexec.h17
-rw-r--r--arch/x86/include/asm/ptrace.h17
-rw-r--r--arch/x86/include/asm/special_insns.h37
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/text-patching.h2
-rw-r--r--arch/x86/kernel/alternative.c81
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/common.c20
-rw-r--r--arch/x86/kernel/crash.c8
-rw-r--r--arch/x86/kernel/ftrace.c7
-rw-r--r--arch/x86/kernel/ftrace_32.S78
-rw-r--r--arch/x86/kernel/ftrace_64.S3
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/kprobes/common.h28
-rw-r--r--arch/x86/kernel/kprobes/core.c29
-rw-r--r--arch/x86/kernel/kprobes/opt.c20
-rw-r--r--arch/x86/kernel/process_32.c16
-rw-r--r--arch/x86/kernel/ptrace.c29
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/unwind_frame.c32
-rw-r--r--arch/x86/kernel/unwind_orc.c2
26 files changed, 394 insertions, 265 deletions
diff --git a/Documentation/x86/exception-tables.rst b/Documentation/x86/exception-tables.rst
index 24596c8210b5..ed6d4b0cf62c 100644
--- a/Documentation/x86/exception-tables.rst
+++ b/Documentation/x86/exception-tables.rst
@@ -35,7 +35,7 @@ page fault handler::
35 void do_page_fault(struct pt_regs *regs, unsigned long error_code) 35 void do_page_fault(struct pt_regs *regs, unsigned long error_code)
36 36
37in arch/x86/mm/fault.c. The parameters on the stack are set up by 37in arch/x86/mm/fault.c. The parameters on the stack are set up by
38the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter 38the low level assembly glue in arch/x86/entry/entry_32.S. The parameter
39regs is a pointer to the saved registers on the stack, error_code 39regs is a pointer to the saved registers on the stack, error_code
40contains a reason code for the exception. 40contains a reason code for the exception.
41 41
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index efb0d1b1f15f..9f1f9e3b8230 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with
172 .endif 172 .endif
173.endm 173.endm
174 174
175/*
176 * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
177 * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
178 * is just setting the LSB, which makes it an invalid stack address and is also
179 * a signal to the unwinder that it's a pt_regs pointer in disguise.
180 *
181 * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
182 * the original rbp.
183 */
184.macro ENCODE_FRAME_POINTER ptregs_offset=0
185#ifdef CONFIG_FRAME_POINTER
186 leaq 1+\ptregs_offset(%rsp), %rbp
187#endif
188.endm
189
190#ifdef CONFIG_PAGE_TABLE_ISOLATION 175#ifdef CONFIG_PAGE_TABLE_ISOLATION
191 176
192/* 177/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 44c6e6f54bf7..1285e5abf669 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -67,7 +67,6 @@
67# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 67# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
68#else 68#else
69# define preempt_stop(clobbers) 69# define preempt_stop(clobbers)
70# define resume_kernel restore_all_kernel
71#endif 70#endif
72 71
73.macro TRACE_IRQS_IRET 72.macro TRACE_IRQS_IRET
@@ -203,9 +202,102 @@
203.Lend_\@: 202.Lend_\@:
204.endm 203.endm
205 204
205#define CS_FROM_ENTRY_STACK (1 << 31)
206#define CS_FROM_USER_CR3 (1 << 30)
207#define CS_FROM_KERNEL (1 << 29)
208
209.macro FIXUP_FRAME
210 /*
211 * The high bits of the CS dword (__csh) are used for CS_FROM_*.
212 * Clear them in case hardware didn't do this for us.
213 */
214 andl $0x0000ffff, 3*4(%esp)
215
216#ifdef CONFIG_VM86
217 testl $X86_EFLAGS_VM, 4*4(%esp)
218 jnz .Lfrom_usermode_no_fixup_\@
219#endif
220 testl $SEGMENT_RPL_MASK, 3*4(%esp)
221 jnz .Lfrom_usermode_no_fixup_\@
222
223 orl $CS_FROM_KERNEL, 3*4(%esp)
224
225 /*
226 * When we're here from kernel mode; the (exception) stack looks like:
227 *
228 * 5*4(%esp) - <previous context>
229 * 4*4(%esp) - flags
230 * 3*4(%esp) - cs
231 * 2*4(%esp) - ip
232 * 1*4(%esp) - orig_eax
233 * 0*4(%esp) - gs / function
234 *
235 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
236 * is complete and in particular regs->sp is correct. This gives us
237 * the original 5 enties as gap:
238 *
239 * 12*4(%esp) - <previous context>
240 * 11*4(%esp) - gap / flags
241 * 10*4(%esp) - gap / cs
242 * 9*4(%esp) - gap / ip
243 * 8*4(%esp) - gap / orig_eax
244 * 7*4(%esp) - gap / gs / function
245 * 6*4(%esp) - ss
246 * 5*4(%esp) - sp
247 * 4*4(%esp) - flags
248 * 3*4(%esp) - cs
249 * 2*4(%esp) - ip
250 * 1*4(%esp) - orig_eax
251 * 0*4(%esp) - gs / function
252 */
253
254 pushl %ss # ss
255 pushl %esp # sp (points at ss)
256 addl $6*4, (%esp) # point sp back at the previous context
257 pushl 6*4(%esp) # flags
258 pushl 6*4(%esp) # cs
259 pushl 6*4(%esp) # ip
260 pushl 6*4(%esp) # orig_eax
261 pushl 6*4(%esp) # gs / function
262.Lfrom_usermode_no_fixup_\@:
263.endm
264
265.macro IRET_FRAME
266 testl $CS_FROM_KERNEL, 1*4(%esp)
267 jz .Lfinished_frame_\@
268
269 /*
270 * Reconstruct the 3 entry IRET frame right after the (modified)
271 * regs->sp without lowering %esp in between, such that an NMI in the
272 * middle doesn't scribble our stack.
273 */
274 pushl %eax
275 pushl %ecx
276 movl 5*4(%esp), %eax # (modified) regs->sp
277
278 movl 4*4(%esp), %ecx # flags
279 movl %ecx, -4(%eax)
280
281 movl 3*4(%esp), %ecx # cs
282 andl $0x0000ffff, %ecx
283 movl %ecx, -8(%eax)
284
285 movl 2*4(%esp), %ecx # ip
286 movl %ecx, -12(%eax)
287
288 movl 1*4(%esp), %ecx # eax
289 movl %ecx, -16(%eax)
290
291 popl %ecx
292 lea -16(%eax), %esp
293 popl %eax
294.Lfinished_frame_\@:
295.endm
296
206.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 297.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
207 cld 298 cld
208 PUSH_GS 299 PUSH_GS
300 FIXUP_FRAME
209 pushl %fs 301 pushl %fs
210 pushl %es 302 pushl %es
211 pushl %ds 303 pushl %ds
@@ -247,22 +339,6 @@
247.Lend_\@: 339.Lend_\@:
248.endm 340.endm
249 341
250/*
251 * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
252 * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
253 * is just clearing the MSB, which makes it an invalid stack address and is also
254 * a signal to the unwinder that it's a pt_regs pointer in disguise.
255 *
256 * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
257 * original rbp.
258 */
259.macro ENCODE_FRAME_POINTER
260#ifdef CONFIG_FRAME_POINTER
261 mov %esp, %ebp
262 andl $0x7fffffff, %ebp
263#endif
264.endm
265
266.macro RESTORE_INT_REGS 342.macro RESTORE_INT_REGS
267 popl %ebx 343 popl %ebx
268 popl %ecx 344 popl %ecx
@@ -375,9 +451,6 @@
375 * switch to it before we do any copying. 451 * switch to it before we do any copying.
376 */ 452 */
377 453
378#define CS_FROM_ENTRY_STACK (1 << 31)
379#define CS_FROM_USER_CR3 (1 << 30)
380
381.macro SWITCH_TO_KERNEL_STACK 454.macro SWITCH_TO_KERNEL_STACK
382 455
383 ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV 456 ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -391,13 +464,6 @@
391 * that register for the time this macro runs 464 * that register for the time this macro runs
392 */ 465 */
393 466
394 /*
395 * The high bits of the CS dword (__csh) are used for
396 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
397 * hardware didn't do this for us.
398 */
399 andl $(0x0000ffff), PT_CS(%esp)
400
401 /* Are we on the entry stack? Bail out if not! */ 467 /* Are we on the entry stack? Bail out if not! */
402 movl PER_CPU_VAR(cpu_entry_area), %ecx 468 movl PER_CPU_VAR(cpu_entry_area), %ecx
403 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx 469 addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -755,7 +821,7 @@ ret_from_intr:
755 andl $SEGMENT_RPL_MASK, %eax 821 andl $SEGMENT_RPL_MASK, %eax
756#endif 822#endif
757 cmpl $USER_RPL, %eax 823 cmpl $USER_RPL, %eax
758 jb resume_kernel # not returning to v8086 or userspace 824 jb restore_all_kernel # not returning to v8086 or userspace
759 825
760ENTRY(resume_userspace) 826ENTRY(resume_userspace)
761 DISABLE_INTERRUPTS(CLBR_ANY) 827 DISABLE_INTERRUPTS(CLBR_ANY)
@@ -765,18 +831,6 @@ ENTRY(resume_userspace)
765 jmp restore_all 831 jmp restore_all
766END(ret_from_exception) 832END(ret_from_exception)
767 833
768#ifdef CONFIG_PREEMPT
769ENTRY(resume_kernel)
770 DISABLE_INTERRUPTS(CLBR_ANY)
771 cmpl $0, PER_CPU_VAR(__preempt_count)
772 jnz restore_all_kernel
773 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
774 jz restore_all_kernel
775 call preempt_schedule_irq
776 jmp restore_all_kernel
777END(resume_kernel)
778#endif
779
780GLOBAL(__begin_SYSENTER_singlestep_region) 834GLOBAL(__begin_SYSENTER_singlestep_region)
781/* 835/*
782 * All code from here through __end_SYSENTER_singlestep_region is subject 836 * All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1019,6 +1073,7 @@ restore_all:
1019 /* Restore user state */ 1073 /* Restore user state */
1020 RESTORE_REGS pop=4 # skip orig_eax/error_code 1074 RESTORE_REGS pop=4 # skip orig_eax/error_code
1021.Lirq_return: 1075.Lirq_return:
1076 IRET_FRAME
1022 /* 1077 /*
1023 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization 1078 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
1024 * when returning from IPI handler and when returning from 1079 * when returning from IPI handler and when returning from
@@ -1027,6 +1082,15 @@ restore_all:
1027 INTERRUPT_RETURN 1082 INTERRUPT_RETURN
1028 1083
1029restore_all_kernel: 1084restore_all_kernel:
1085#ifdef CONFIG_PREEMPT
1086 DISABLE_INTERRUPTS(CLBR_ANY)
1087 cmpl $0, PER_CPU_VAR(__preempt_count)
1088 jnz .Lno_preempt
1089 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
1090 jz .Lno_preempt
1091 call preempt_schedule_irq
1092.Lno_preempt:
1093#endif
1030 TRACE_IRQS_IRET 1094 TRACE_IRQS_IRET
1031 PARANOID_EXIT_TO_KERNEL_MODE 1095 PARANOID_EXIT_TO_KERNEL_MODE
1032 BUG_IF_WRONG_CR3 1096 BUG_IF_WRONG_CR3
@@ -1384,6 +1448,7 @@ END(page_fault)
1384 1448
1385common_exception: 1449common_exception:
1386 /* the function address is in %gs's slot on the stack */ 1450 /* the function address is in %gs's slot on the stack */
1451 FIXUP_FRAME
1387 pushl %fs 1452 pushl %fs
1388 pushl %es 1453 pushl %es
1389 pushl %ds 1454 pushl %ds
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 5cbce6fbb534..296b346184b2 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -22,6 +22,35 @@
22 pop %_ASM_BP 22 pop %_ASM_BP
23.endm 23.endm
24 24
25#ifdef CONFIG_X86_64
26/*
27 * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
28 * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
29 * is just setting the LSB, which makes it an invalid stack address and is also
30 * a signal to the unwinder that it's a pt_regs pointer in disguise.
31 *
32 * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
33 * the original rbp.
34 */
35.macro ENCODE_FRAME_POINTER ptregs_offset=0
36 leaq 1+\ptregs_offset(%rsp), %rbp
37.endm
38#else /* !CONFIG_X86_64 */
39/*
40 * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
41 * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
42 * is just clearing the MSB, which makes it an invalid stack address and is also
43 * a signal to the unwinder that it's a pt_regs pointer in disguise.
44 *
45 * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
46 * original ebp.
47 */
48.macro ENCODE_FRAME_POINTER
49 mov %esp, %ebp
50 andl $0x7fffffff, %ebp
51.endm
52#endif /* CONFIG_X86_64 */
53
25#else /* !__ASSEMBLY__ */ 54#else /* !__ASSEMBLY__ */
26 55
27#define FRAME_BEGIN \ 56#define FRAME_BEGIN \
@@ -30,12 +59,32 @@
30 59
31#define FRAME_END "pop %" _ASM_BP "\n" 60#define FRAME_END "pop %" _ASM_BP "\n"
32 61
62#ifdef CONFIG_X86_64
63#define ENCODE_FRAME_POINTER \
64 "lea 1(%rsp), %rbp\n\t"
65#else /* !CONFIG_X86_64 */
66#define ENCODE_FRAME_POINTER \
67 "movl %esp, %ebp\n\t" \
68 "andl $0x7fffffff, %ebp\n\t"
69#endif /* CONFIG_X86_64 */
70
33#endif /* __ASSEMBLY__ */ 71#endif /* __ASSEMBLY__ */
34 72
35#define FRAME_OFFSET __ASM_SEL(4, 8) 73#define FRAME_OFFSET __ASM_SEL(4, 8)
36 74
37#else /* !CONFIG_FRAME_POINTER */ 75#else /* !CONFIG_FRAME_POINTER */
38 76
77#ifdef __ASSEMBLY__
78
79.macro ENCODE_FRAME_POINTER ptregs_offset=0
80.endm
81
82#else /* !__ASSEMBLY */
83
84#define ENCODE_FRAME_POINTER
85
86#endif
87
39#define FRAME_BEGIN 88#define FRAME_BEGIN
40#define FRAME_END 89#define FRAME_END
41#define FRAME_OFFSET 0 90#define FRAME_OFFSET 0
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 003f2daa3b0f..5e7d6b46de97 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -71,22 +71,6 @@ struct kimage;
71#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ 71#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
72 72
73/* 73/*
74 * CPU does not save ss and sp on stack if execution is already
75 * running in kernel mode at the time of NMI occurrence. This code
76 * fixes it.
77 */
78static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
79 struct pt_regs *oldregs)
80{
81#ifdef CONFIG_X86_32
82 newregs->sp = (unsigned long)&(oldregs->sp);
83 asm volatile("xorl %%eax, %%eax\n\t"
84 "movw %%ss, %%ax\n\t"
85 :"=a"(newregs->ss));
86#endif
87}
88
89/*
90 * This function is responsible for capturing register states if coming 74 * This function is responsible for capturing register states if coming
91 * via panic otherwise just fix up the ss and sp if coming via kernel 75 * via panic otherwise just fix up the ss and sp if coming via kernel
92 * mode exception. 76 * mode exception.
@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
96{ 80{
97 if (oldregs) { 81 if (oldregs) {
98 memcpy(newregs, oldregs, sizeof(*newregs)); 82 memcpy(newregs, oldregs, sizeof(*newregs));
99 crash_fixup_ss_esp(newregs, oldregs);
100 } else { 83 } else {
101#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
102 asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); 85 asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 8a7fc0cca2d1..3703c91f441e 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -166,14 +166,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
166#define compat_user_stack_pointer() current_pt_regs()->sp 166#define compat_user_stack_pointer() current_pt_regs()->sp
167#endif 167#endif
168 168
169#ifdef CONFIG_X86_32
170extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
171#else
172static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) 169static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
173{ 170{
174 return regs->sp; 171 return regs->sp;
175} 172}
176#endif
177 173
178#define GET_IP(regs) ((regs)->ip) 174#define GET_IP(regs) ((regs)->ip)
179#define GET_FP(regs) ((regs)->bp) 175#define GET_FP(regs) ((regs)->bp)
@@ -201,14 +197,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
201 if (unlikely(offset > MAX_REG_OFFSET)) 197 if (unlikely(offset > MAX_REG_OFFSET))
202 return 0; 198 return 0;
203#ifdef CONFIG_X86_32 199#ifdef CONFIG_X86_32
204 /*
205 * Traps from the kernel do not save sp and ss.
206 * Use the helper function to retrieve sp.
207 */
208 if (offset == offsetof(struct pt_regs, sp) &&
209 regs->cs == __KERNEL_CS)
210 return kernel_stack_pointer(regs);
211
212 /* The selector fields are 16-bit. */ 200 /* The selector fields are 16-bit. */
213 if (offset == offsetof(struct pt_regs, cs) || 201 if (offset == offsetof(struct pt_regs, cs) ||
214 offset == offsetof(struct pt_regs, ss) || 202 offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +222,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
234static inline int regs_within_kernel_stack(struct pt_regs *regs, 222static inline int regs_within_kernel_stack(struct pt_regs *regs,
235 unsigned long addr) 223 unsigned long addr)
236{ 224{
237 return ((addr & ~(THREAD_SIZE - 1)) == 225 return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
238 (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
239} 226}
240 227
241/** 228/**
@@ -249,7 +236,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
249 */ 236 */
250static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) 237static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
251{ 238{
252 unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); 239 unsigned long *addr = (unsigned long *)regs->sp;
253 240
254 addr += n; 241 addr += n;
255 if (regs_within_kernel_stack(regs, (unsigned long)addr)) 242 if (regs_within_kernel_stack(regs, (unsigned long)addr))
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 0a3c4cab39db..b2e84d113f2a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,8 @@
6#ifdef __KERNEL__ 6#ifdef __KERNEL__
7 7
8#include <asm/nops.h> 8#include <asm/nops.h>
9#include <asm/processor-flags.h>
10#include <linux/jump_label.h>
9 11
10/* 12/*
11 * Volatile isn't enough to prevent the compiler from reordering the 13 * Volatile isn't enough to prevent the compiler from reordering the
@@ -16,6 +18,10 @@
16 */ 18 */
17extern unsigned long __force_order; 19extern unsigned long __force_order;
18 20
21/* Starts false and gets enabled once CPU feature detection is done. */
22DECLARE_STATIC_KEY_FALSE(cr_pinning);
23extern unsigned long cr4_pinned_bits;
24
19static inline unsigned long native_read_cr0(void) 25static inline unsigned long native_read_cr0(void)
20{ 26{
21 unsigned long val; 27 unsigned long val;
@@ -25,7 +31,20 @@ static inline unsigned long native_read_cr0(void)
25 31
26static inline void native_write_cr0(unsigned long val) 32static inline void native_write_cr0(unsigned long val)
27{ 33{
28 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); 34 unsigned long bits_missing = 0;
35
36set_register:
37 asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
38
39 if (static_branch_likely(&cr_pinning)) {
40 if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
41 bits_missing = X86_CR0_WP;
42 val |= bits_missing;
43 goto set_register;
44 }
45 /* Warn after we've set the missing bits. */
46 WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
47 }
29} 48}
30 49
31static inline unsigned long native_read_cr2(void) 50static inline unsigned long native_read_cr2(void)
@@ -74,7 +93,21 @@ static inline unsigned long native_read_cr4(void)
74 93
75static inline void native_write_cr4(unsigned long val) 94static inline void native_write_cr4(unsigned long val)
76{ 95{
77 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); 96 unsigned long bits_missing = 0;
97
98set_register:
99 asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
100
101 if (static_branch_likely(&cr_pinning)) {
102 if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
103 bits_missing = ~val & cr4_pinned_bits;
104 val |= bits_missing;
105 goto set_register;
106 }
107 /* Warn after we've set the missing bits. */
108 WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
109 bits_missing);
110 }
78} 111}
79 112
80#ifdef CONFIG_X86_64 113#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a8d0cdf48616..14db05086bbf 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -78,7 +78,7 @@ static inline unsigned long *
78get_stack_pointer(struct task_struct *task, struct pt_regs *regs) 78get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
79{ 79{
80 if (regs) 80 if (regs)
81 return (unsigned long *)kernel_stack_pointer(regs); 81 return (unsigned long *)regs->sp;
82 82
83 if (task == current) 83 if (task == current)
84 return __builtin_frame_address(0); 84 return __builtin_frame_address(0);
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index d83e9f771d86..70c09967a999 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -66,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
66#define INT3_INSN_SIZE 1 66#define INT3_INSN_SIZE 1
67#define CALL_INSN_SIZE 5 67#define CALL_INSN_SIZE 5
68 68
69#ifdef CONFIG_X86_64
70static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) 69static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
71{ 70{
72 /* 71 /*
@@ -84,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
84 int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); 83 int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
85 int3_emulate_jmp(regs, func); 84 int3_emulate_jmp(regs, func);
86} 85}
87#endif /* CONFIG_X86_64 */
88#endif /* !CONFIG_UML_X86 */ 86#endif /* !CONFIG_UML_X86 */
89 87
90#endif /* _ASM_X86_TEXT_PATCHING_H */ 88#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index bd542f9b0953..c3468b5242fd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -616,11 +616,83 @@ extern struct paravirt_patch_site __start_parainstructions[],
616 __stop_parainstructions[]; 616 __stop_parainstructions[];
617#endif /* CONFIG_PARAVIRT */ 617#endif /* CONFIG_PARAVIRT */
618 618
619/*
620 * Self-test for the INT3 based CALL emulation code.
621 *
622 * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
623 * properly and that there is a stack gap between the INT3 frame and the
624 * previous context. Without this gap doing a virtual PUSH on the interrupted
625 * stack would corrupt the INT3 IRET frame.
626 *
627 * See entry_{32,64}.S for more details.
628 */
629static void __init int3_magic(unsigned int *ptr)
630{
631 *ptr = 1;
632}
633
634extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
635
636static int __init
637int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
638{
639 struct die_args *args = data;
640 struct pt_regs *regs = args->regs;
641
642 if (!regs || user_mode(regs))
643 return NOTIFY_DONE;
644
645 if (val != DIE_INT3)
646 return NOTIFY_DONE;
647
648 if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
649 return NOTIFY_DONE;
650
651 int3_emulate_call(regs, (unsigned long)&int3_magic);
652 return NOTIFY_STOP;
653}
654
655static void __init int3_selftest(void)
656{
657 static __initdata struct notifier_block int3_exception_nb = {
658 .notifier_call = int3_exception_notify,
659 .priority = INT_MAX-1, /* last */
660 };
661 unsigned int val = 0;
662
663 BUG_ON(register_die_notifier(&int3_exception_nb));
664
665 /*
666 * Basically: int3_magic(&val); but really complicated :-)
667 *
668 * Stick the address of the INT3 instruction into int3_selftest_ip,
669 * then trigger the INT3, padded with NOPs to match a CALL instruction
670 * length.
671 */
672 asm volatile ("1: int3; nop; nop; nop; nop\n\t"
673 ".pushsection .init.data,\"aw\"\n\t"
674 ".align " __ASM_SEL(4, 8) "\n\t"
675 ".type int3_selftest_ip, @object\n\t"
676 ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
677 "int3_selftest_ip:\n\t"
678 __ASM_SEL(.long, .quad) " 1b\n\t"
679 ".popsection\n\t"
680 : : __ASM_SEL_RAW(a, D) (&val) : "memory");
681
682 BUG_ON(val != 1);
683
684 unregister_die_notifier(&int3_exception_nb);
685}
686
619void __init alternative_instructions(void) 687void __init alternative_instructions(void)
620{ 688{
621 /* The patching is not fully atomic, so try to avoid local interruptions 689 int3_selftest();
622 that might execute the to be patched code. 690
623 Other CPUs are not running. */ 691 /*
692 * The patching is not fully atomic, so try to avoid local
693 * interruptions that might execute the to be patched code.
694 * Other CPUs are not running.
695 */
624 stop_nmi(); 696 stop_nmi();
625 697
626 /* 698 /*
@@ -645,10 +717,11 @@ void __init alternative_instructions(void)
645 _text, _etext); 717 _text, _etext);
646 } 718 }
647 719
648 if (!uniproc_patched || num_possible_cpus() == 1) 720 if (!uniproc_patched || num_possible_cpus() == 1) {
649 free_init_pages("SMP alternatives", 721 free_init_pages("SMP alternatives",
650 (unsigned long)__smp_locks, 722 (unsigned long)__smp_locks,
651 (unsigned long)__smp_locks_end); 723 (unsigned long)__smp_locks_end);
724 }
652#endif 725#endif
653 726
654 apply_paravirt(__parainstructions, __parainstructions_end); 727 apply_paravirt(__parainstructions, __parainstructions_end);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d077d7..da64452584b0 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -38,7 +38,6 @@ static void __used common(void)
38#endif 38#endif
39 39
40 BLANK(); 40 BLANK();
41 OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
42 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); 41 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
43 42
44 BLANK(); 43 BLANK();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dad20bc891d5..8febe90470f4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -366,6 +366,25 @@ out:
366 cr4_clear_bits(X86_CR4_UMIP); 366 cr4_clear_bits(X86_CR4_UMIP);
367} 367}
368 368
369DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
370EXPORT_SYMBOL(cr_pinning);
371unsigned long cr4_pinned_bits __ro_after_init;
372EXPORT_SYMBOL(cr4_pinned_bits);
373
374/*
375 * Once CPU feature detection is finished (and boot params have been
376 * parsed), record any of the sensitive CR bits that are set, and
377 * enable CR pinning.
378 */
379static void __init setup_cr_pinning(void)
380{
381 unsigned long mask;
382
383 mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
384 cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
385 static_key_enable(&cr_pinning.key);
386}
387
369/* 388/*
370 * Protection Keys are not available in 32-bit mode. 389 * Protection Keys are not available in 32-bit mode.
371 */ 390 */
@@ -1468,6 +1487,7 @@ void __init identify_boot_cpu(void)
1468 enable_sep_cpu(); 1487 enable_sep_cpu();
1469#endif 1488#endif
1470 cpu_detect_tlb(&boot_cpu_data); 1489 cpu_detect_tlb(&boot_cpu_data);
1490 setup_cr_pinning();
1471} 1491}
1472 1492
1473void identify_secondary_cpu(struct cpuinfo_x86 *c) 1493void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 576b2e1bfc12..84e2d3ddd0eb 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
73 73
74static void kdump_nmi_callback(int cpu, struct pt_regs *regs) 74static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
75{ 75{
76#ifdef CONFIG_X86_32
77 struct pt_regs fixed_regs;
78
79 if (!user_mode(regs)) {
80 crash_fixup_ss_esp(&fixed_regs, regs);
81 regs = &fixed_regs;
82 }
83#endif
84 crash_save_cpu(regs, cpu); 76 crash_save_cpu(regs, cpu);
85 77
86 /* 78 /*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 76228525acd0..4b73f5937f41 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -310,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
310 310
311 ip = regs->ip - INT3_INSN_SIZE; 311 ip = regs->ip - INT3_INSN_SIZE;
312 312
313#ifdef CONFIG_X86_64
314 if (ftrace_location(ip)) { 313 if (ftrace_location(ip)) {
315 int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); 314 int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
316 return 1; 315 return 1;
@@ -322,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
322 int3_emulate_call(regs, ftrace_update_func_call); 321 int3_emulate_call(regs, ftrace_update_func_call);
323 return 1; 322 return 1;
324 } 323 }
325#else
326 if (ftrace_location(ip) || is_ftrace_caller(ip)) {
327 int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
328 return 1;
329 }
330#endif
331 324
332 return 0; 325 return 0;
333} 326}
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 2ba914a34b06..073aab525d80 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -9,6 +9,8 @@
9#include <asm/export.h> 9#include <asm/export.h>
10#include <asm/ftrace.h> 10#include <asm/ftrace.h>
11#include <asm/nospec-branch.h> 11#include <asm/nospec-branch.h>
12#include <asm/frame.h>
13#include <asm/asm-offsets.h>
12 14
13# define function_hook __fentry__ 15# define function_hook __fentry__
14EXPORT_SYMBOL(__fentry__) 16EXPORT_SYMBOL(__fentry__)
@@ -89,26 +91,38 @@ END(ftrace_caller)
89 91
90ENTRY(ftrace_regs_caller) 92ENTRY(ftrace_regs_caller)
91 /* 93 /*
92 * i386 does not save SS and ESP when coming from kernel. 94 * We're here from an mcount/fentry CALL, and the stack frame looks like:
93 * Instead, to get sp, &regs->sp is used (see ptrace.h). 95 *
94 * Unfortunately, that means eflags must be at the same location 96 * <previous context>
95 * as the current return ip is. We move the return ip into the 97 * RET-IP
96 * regs->ip location, and move flags into the return ip location. 98 *
99 * The purpose of this function is to call out in an emulated INT3
100 * environment with a stack frame like:
101 *
102 * <previous context>
103 * gap / RET-IP
104 * gap
105 * gap
106 * gap
107 * pt_regs
108 *
109 * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
97 */ 110 */
98 pushl $__KERNEL_CS 111 subl $3*4, %esp # RET-IP + 3 gaps
99 pushl 4(%esp) /* Save the return ip */ 112 pushl %ss # ss
100 pushl $0 /* Load 0 into orig_ax */ 113 pushl %esp # points at ss
114 addl $5*4, (%esp) # make it point at <previous context>
115 pushfl # flags
116 pushl $__KERNEL_CS # cs
117 pushl 7*4(%esp) # ip <- RET-IP
118 pushl $0 # orig_eax
119
101 pushl %gs 120 pushl %gs
102 pushl %fs 121 pushl %fs
103 pushl %es 122 pushl %es
104 pushl %ds 123 pushl %ds
105 pushl %eax
106
107 /* Get flags and place them into the return ip slot */
108 pushf
109 popl %eax
110 movl %eax, 8*4(%esp)
111 124
125 pushl %eax
112 pushl %ebp 126 pushl %ebp
113 pushl %edi 127 pushl %edi
114 pushl %esi 128 pushl %esi
@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller)
116 pushl %ecx 130 pushl %ecx
117 pushl %ebx 131 pushl %ebx
118 132
119 movl 12*4(%esp), %eax /* Load ip (1st parameter) */ 133 ENCODE_FRAME_POINTER
120 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ 134
121 movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */ 135 movl PT_EIP(%esp), %eax # 1st argument: IP
122 movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ 136 subl $MCOUNT_INSN_SIZE, %eax
123 pushl %esp /* Save pt_regs as 4th parameter */ 137 movl 21*4(%esp), %edx # 2nd argument: parent ip
138 movl function_trace_op, %ecx # 3rd argument: ftrace_pos
139 pushl %esp # 4th argument: pt_regs
124 140
125GLOBAL(ftrace_regs_call) 141GLOBAL(ftrace_regs_call)
126 call ftrace_stub 142 call ftrace_stub
127 143
128 addl $4, %esp /* Skip pt_regs */ 144 addl $4, %esp # skip 4th argument
129 145
130 /* restore flags */ 146 /* place IP below the new SP */
131 push 14*4(%esp) 147 movl PT_OLDESP(%esp), %eax
132 popf 148 movl PT_EIP(%esp), %ecx
149 movl %ecx, -4(%eax)
133 150
134 /* Move return ip back to its original location */ 151 /* place EAX below that */
135 movl 12*4(%esp), %eax 152 movl PT_EAX(%esp), %ecx
136 movl %eax, 14*4(%esp) 153 movl %ecx, -8(%eax)
137 154
138 popl %ebx 155 popl %ebx
139 popl %ecx 156 popl %ecx
@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call)
141 popl %esi 158 popl %esi
142 popl %edi 159 popl %edi
143 popl %ebp 160 popl %ebp
144 popl %eax
145 popl %ds
146 popl %es
147 popl %fs
148 popl %gs
149 161
150 /* use lea to not affect flags */ 162 lea -8(%eax), %esp
151 lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */ 163 popl %eax
152 164
153 jmp .Lftrace_ret 165 jmp .Lftrace_ret
154 166
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 10eb2760ef2c..809d54397dba 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -9,6 +9,7 @@
9#include <asm/export.h> 9#include <asm/export.h>
10#include <asm/nospec-branch.h> 10#include <asm/nospec-branch.h>
11#include <asm/unwind_hints.h> 11#include <asm/unwind_hints.h>
12#include <asm/frame.h>
12 13
13 .code64 14 .code64
14 .section .entry.text, "ax" 15 .section .entry.text, "ax"
@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr)
203 leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx 204 leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
204 movq %rcx, RSP(%rsp) 205 movq %rcx, RSP(%rsp)
205 206
207 ENCODE_FRAME_POINTER
208
206 /* regs go into 4th parameter */ 209 /* regs go into 4th parameter */
207 leaq (%rsp), %rcx 210 leaq (%rsp), %rcx
208 211
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 6690c5652aeb..23297ea64f5f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
118 118
119#ifdef CONFIG_X86_32 119#ifdef CONFIG_X86_32
120 switch (regno) { 120 switch (regno) {
121 case GDB_SS:
122 if (!user_mode(regs))
123 *(unsigned long *)mem = __KERNEL_DS;
124 break;
125 case GDB_SP:
126 if (!user_mode(regs))
127 *(unsigned long *)mem = kernel_stack_pointer(regs);
128 break;
129 case GDB_GS: 121 case GDB_GS:
130 case GDB_FS: 122 case GDB_FS:
131 *(unsigned long *)mem = 0xFFFF; 123 *(unsigned long *)mem = 0xFFFF;
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2b949f4fd4d8..7d3a2e2daf01 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -5,15 +5,10 @@
5/* Kprobes and Optprobes common header */ 5/* Kprobes and Optprobes common header */
6 6
7#include <asm/asm.h> 7#include <asm/asm.h>
8 8#include <asm/frame.h>
9#ifdef CONFIG_FRAME_POINTER
10# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
11 " mov %" _ASM_SP ", %" _ASM_BP "\n"
12#else
13# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
14#endif
15 9
16#ifdef CONFIG_X86_64 10#ifdef CONFIG_X86_64
11
17#define SAVE_REGS_STRING \ 12#define SAVE_REGS_STRING \
18 /* Skip cs, ip, orig_ax. */ \ 13 /* Skip cs, ip, orig_ax. */ \
19 " subq $24, %rsp\n" \ 14 " subq $24, %rsp\n" \
@@ -27,11 +22,13 @@
27 " pushq %r10\n" \ 22 " pushq %r10\n" \
28 " pushq %r11\n" \ 23 " pushq %r11\n" \
29 " pushq %rbx\n" \ 24 " pushq %rbx\n" \
30 SAVE_RBP_STRING \ 25 " pushq %rbp\n" \
31 " pushq %r12\n" \ 26 " pushq %r12\n" \
32 " pushq %r13\n" \ 27 " pushq %r13\n" \
33 " pushq %r14\n" \ 28 " pushq %r14\n" \
34 " pushq %r15\n" 29 " pushq %r15\n" \
30 ENCODE_FRAME_POINTER
31
35#define RESTORE_REGS_STRING \ 32#define RESTORE_REGS_STRING \
36 " popq %r15\n" \ 33 " popq %r15\n" \
37 " popq %r14\n" \ 34 " popq %r14\n" \
@@ -51,19 +48,22 @@
51 /* Skip orig_ax, ip, cs */ \ 48 /* Skip orig_ax, ip, cs */ \
52 " addq $24, %rsp\n" 49 " addq $24, %rsp\n"
53#else 50#else
51
54#define SAVE_REGS_STRING \ 52#define SAVE_REGS_STRING \
55 /* Skip cs, ip, orig_ax and gs. */ \ 53 /* Skip cs, ip, orig_ax and gs. */ \
56 " subl $16, %esp\n" \ 54 " subl $4*4, %esp\n" \
57 " pushl %fs\n" \ 55 " pushl %fs\n" \
58 " pushl %es\n" \ 56 " pushl %es\n" \
59 " pushl %ds\n" \ 57 " pushl %ds\n" \
60 " pushl %eax\n" \ 58 " pushl %eax\n" \
61 SAVE_RBP_STRING \ 59 " pushl %ebp\n" \
62 " pushl %edi\n" \ 60 " pushl %edi\n" \
63 " pushl %esi\n" \ 61 " pushl %esi\n" \
64 " pushl %edx\n" \ 62 " pushl %edx\n" \
65 " pushl %ecx\n" \ 63 " pushl %ecx\n" \
66 " pushl %ebx\n" 64 " pushl %ebx\n" \
65 ENCODE_FRAME_POINTER
66
67#define RESTORE_REGS_STRING \ 67#define RESTORE_REGS_STRING \
68 " popl %ebx\n" \ 68 " popl %ebx\n" \
69 " popl %ecx\n" \ 69 " popl %ecx\n" \
@@ -72,8 +72,8 @@
72 " popl %edi\n" \ 72 " popl %edi\n" \
73 " popl %ebp\n" \ 73 " popl %ebp\n" \
74 " popl %eax\n" \ 74 " popl %eax\n" \
75 /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ 75 /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\
76 " addl $24, %esp\n" 76 " addl $7*4, %esp\n"
77#endif 77#endif
78 78
79/* Ensure if the instruction can be boostable */ 79/* Ensure if the instruction can be boostable */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6afd8061dbae..bd17dbb15d6a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -56,7 +56,7 @@
56DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 56DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
57DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 57DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
58 58
59#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) 59#define stack_addr(regs) ((unsigned long *)regs->sp)
60 60
61#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ 61#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
62 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ 62 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -718,29 +718,27 @@ asm(
718 ".global kretprobe_trampoline\n" 718 ".global kretprobe_trampoline\n"
719 ".type kretprobe_trampoline, @function\n" 719 ".type kretprobe_trampoline, @function\n"
720 "kretprobe_trampoline:\n" 720 "kretprobe_trampoline:\n"
721#ifdef CONFIG_X86_64
722 /* We don't bother saving the ss register */ 721 /* We don't bother saving the ss register */
722#ifdef CONFIG_X86_64
723 " pushq %rsp\n" 723 " pushq %rsp\n"
724 " pushfq\n" 724 " pushfq\n"
725 SAVE_REGS_STRING 725 SAVE_REGS_STRING
726 " movq %rsp, %rdi\n" 726 " movq %rsp, %rdi\n"
727 " call trampoline_handler\n" 727 " call trampoline_handler\n"
728 /* Replace saved sp with true return address. */ 728 /* Replace saved sp with true return address. */
729 " movq %rax, 152(%rsp)\n" 729 " movq %rax, 19*8(%rsp)\n"
730 RESTORE_REGS_STRING 730 RESTORE_REGS_STRING
731 " popfq\n" 731 " popfq\n"
732#else 732#else
733 " pushf\n" 733 " pushl %esp\n"
734 " pushfl\n"
734 SAVE_REGS_STRING 735 SAVE_REGS_STRING
735 " movl %esp, %eax\n" 736 " movl %esp, %eax\n"
736 " call trampoline_handler\n" 737 " call trampoline_handler\n"
737 /* Move flags to cs */ 738 /* Replace saved sp with true return address. */
738 " movl 56(%esp), %edx\n" 739 " movl %eax, 15*4(%esp)\n"
739 " movl %edx, 52(%esp)\n"
740 /* Replace saved flags with true return address. */
741 " movl %eax, 56(%esp)\n"
742 RESTORE_REGS_STRING 740 RESTORE_REGS_STRING
743 " popf\n" 741 " popfl\n"
744#endif 742#endif
745 " ret\n" 743 " ret\n"
746 ".size kretprobe_trampoline, .-kretprobe_trampoline\n" 744 ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
@@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
781 INIT_HLIST_HEAD(&empty_rp); 779 INIT_HLIST_HEAD(&empty_rp);
782 kretprobe_hash_lock(current, &head, &flags); 780 kretprobe_hash_lock(current, &head, &flags);
783 /* fixup registers */ 781 /* fixup registers */
784#ifdef CONFIG_X86_64
785 regs->cs = __KERNEL_CS; 782 regs->cs = __KERNEL_CS;
786 /* On x86-64, we use pt_regs->sp for return address holder. */ 783#ifdef CONFIG_X86_32
787 frame_pointer = &regs->sp; 784 regs->cs |= get_kernel_rpl();
788#else
789 regs->cs = __KERNEL_CS | get_kernel_rpl();
790 regs->gs = 0; 785 regs->gs = 0;
791 /* On x86-32, we use pt_regs->flags for return address holder. */
792 frame_pointer = &regs->flags;
793#endif 786#endif
787 /* We use pt_regs->sp for return address holder. */
788 frame_pointer = &regs->sp;
794 regs->ip = trampoline_address; 789 regs->ip = trampoline_address;
795 regs->orig_ax = ~0UL; 790 regs->orig_ax = ~0UL;
796 791
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7c361a24c6df..282b4eb67e30 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -102,14 +102,15 @@ asm (
102 "optprobe_template_call:\n" 102 "optprobe_template_call:\n"
103 ASM_NOP5 103 ASM_NOP5
104 /* Move flags to rsp */ 104 /* Move flags to rsp */
105 " movq 144(%rsp), %rdx\n" 105 " movq 18*8(%rsp), %rdx\n"
106 " movq %rdx, 152(%rsp)\n" 106 " movq %rdx, 19*8(%rsp)\n"
107 RESTORE_REGS_STRING 107 RESTORE_REGS_STRING
108 /* Skip flags entry */ 108 /* Skip flags entry */
109 " addq $8, %rsp\n" 109 " addq $8, %rsp\n"
110 " popfq\n" 110 " popfq\n"
111#else /* CONFIG_X86_32 */ 111#else /* CONFIG_X86_32 */
112 " pushf\n" 112 " pushl %esp\n"
113 " pushfl\n"
113 SAVE_REGS_STRING 114 SAVE_REGS_STRING
114 " movl %esp, %edx\n" 115 " movl %esp, %edx\n"
115 ".global optprobe_template_val\n" 116 ".global optprobe_template_val\n"
@@ -118,9 +119,13 @@ asm (
118 ".global optprobe_template_call\n" 119 ".global optprobe_template_call\n"
119 "optprobe_template_call:\n" 120 "optprobe_template_call:\n"
120 ASM_NOP5 121 ASM_NOP5
122 /* Move flags into esp */
123 " movl 14*4(%esp), %edx\n"
124 " movl %edx, 15*4(%esp)\n"
121 RESTORE_REGS_STRING 125 RESTORE_REGS_STRING
122 " addl $4, %esp\n" /* skip cs */ 126 /* Skip flags entry */
123 " popf\n" 127 " addl $4, %esp\n"
128 " popfl\n"
124#endif 129#endif
125 ".global optprobe_template_end\n" 130 ".global optprobe_template_end\n"
126 "optprobe_template_end:\n" 131 "optprobe_template_end:\n"
@@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
152 } else { 157 } else {
153 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 158 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
154 /* Save skipped registers */ 159 /* Save skipped registers */
155#ifdef CONFIG_X86_64
156 regs->cs = __KERNEL_CS; 160 regs->cs = __KERNEL_CS;
157#else 161#ifdef CONFIG_X86_32
158 regs->cs = __KERNEL_CS | get_kernel_rpl(); 162 regs->cs |= get_kernel_rpl();
159 regs->gs = 0; 163 regs->gs = 0;
160#endif 164#endif
161 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 165 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2399e910d109..b8ceec4974fe 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
62{ 62{
63 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 63 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
64 unsigned long d0, d1, d2, d3, d6, d7; 64 unsigned long d0, d1, d2, d3, d6, d7;
65 unsigned long sp; 65 unsigned short gs;
66 unsigned short ss, gs;
67 66
68 if (user_mode(regs)) { 67 if (user_mode(regs))
69 sp = regs->sp;
70 ss = regs->ss;
71 gs = get_user_gs(regs); 68 gs = get_user_gs(regs);
72 } else { 69 else
73 sp = kernel_stack_pointer(regs);
74 savesegment(ss, ss);
75 savesegment(gs, gs); 70 savesegment(gs, gs);
76 }
77 71
78 show_ip(regs, KERN_DEFAULT); 72 show_ip(regs, KERN_DEFAULT);
79 73
80 printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", 74 printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
81 regs->ax, regs->bx, regs->cx, regs->dx); 75 regs->ax, regs->bx, regs->cx, regs->dx);
82 printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", 76 printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
83 regs->si, regs->di, regs->bp, sp); 77 regs->si, regs->di, regs->bp, regs->sp);
84 printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", 78 printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
85 (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); 79 (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
86 80
87 if (mode != SHOW_REGS_ALL) 81 if (mode != SHOW_REGS_ALL)
88 return; 82 return;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ee9099061d01..8eb1e58de043 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -155,35 +155,6 @@ static inline bool invalid_selector(u16 value)
155 155
156#define FLAG_MASK FLAG_MASK_32 156#define FLAG_MASK FLAG_MASK_32
157 157
158/*
159 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
160 * when it traps. The previous stack will be directly underneath the saved
161 * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
162 *
163 * Now, if the stack is empty, '&regs->sp' is out of range. In this
164 * case we try to take the previous stack. To always return a non-null
165 * stack pointer we fall back to regs as stack if no previous stack
166 * exists.
167 *
168 * This is valid only for kernel mode traps.
169 */
170unsigned long kernel_stack_pointer(struct pt_regs *regs)
171{
172 unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
173 unsigned long sp = (unsigned long)&regs->sp;
174 u32 *prev_esp;
175
176 if (context == (sp & ~(THREAD_SIZE - 1)))
177 return sp;
178
179 prev_esp = (u32 *)(context);
180 if (*prev_esp)
181 return (unsigned long)*prev_esp;
182
183 return (unsigned long)regs;
184}
185EXPORT_SYMBOL_GPL(kernel_stack_pointer);
186
187static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 158static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
188{ 159{
189 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 160 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 362dd8953f48..1af7a2d89419 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -205,13 +205,19 @@ static int enable_start_cpu0;
205 */ 205 */
206static void notrace start_secondary(void *unused) 206static void notrace start_secondary(void *unused)
207{ 207{
208 unsigned long cr4 = __read_cr4();
209
208 /* 210 /*
209 * Don't put *anything* except direct CPU state initialization 211 * Don't put *anything* except direct CPU state initialization
210 * before cpu_init(), SMP booting is too fragile that we want to 212 * before cpu_init(), SMP booting is too fragile that we want to
211 * limit the things done here to the most necessary things. 213 * limit the things done here to the most necessary things.
212 */ 214 */
213 if (boot_cpu_has(X86_FEATURE_PCID)) 215 if (boot_cpu_has(X86_FEATURE_PCID))
214 __write_cr4(__read_cr4() | X86_CR4_PCIDE); 216 cr4 |= X86_CR4_PCIDE;
217 if (static_branch_likely(&cr_pinning))
218 cr4 |= cr4_pinned_bits;
219
220 __write_cr4(cr4);
215 221
216#ifdef CONFIG_X86_32 222#ifdef CONFIG_X86_32
217 /* switch away from the initial page table */ 223 /* switch away from the initial page table */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 07c0e960b3f3..7ce29cee9f9e 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs)
37#ifdef CONFIG_FRAME_POINTER 37#ifdef CONFIG_FRAME_POINTER
38 return *(unsigned long *)(regs->bp + sizeof(long)); 38 return *(unsigned long *)(regs->bp + sizeof(long));
39#else 39#else
40 unsigned long *sp = 40 unsigned long *sp = (unsigned long *)regs->sp;
41 (unsigned long *)kernel_stack_pointer(regs);
42 /* 41 /*
43 * Return address is either directly at stack pointer 42 * Return address is either directly at stack pointer
44 * or above a saved flags. Eflags has bits 22-31 zero, 43 * or above a saved flags. Eflags has bits 22-31 zero,
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 6106760de716..a224b5ab103f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state)
70 } 70 }
71} 71}
72 72
73static size_t regs_size(struct pt_regs *regs)
74{
75 /* x86_32 regs from kernel mode are two words shorter: */
76 if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
77 return sizeof(*regs) - 2*sizeof(long);
78
79 return sizeof(*regs);
80}
81
82static bool in_entry_code(unsigned long ip) 73static bool in_entry_code(unsigned long ip)
83{ 74{
84 char *addr = (char *)ip; 75 char *addr = (char *)ip;
@@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
198} 189}
199#endif 190#endif
200 191
201#ifdef CONFIG_X86_32
202#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
203#else
204#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
205#endif
206
207static bool update_stack_state(struct unwind_state *state, 192static bool update_stack_state(struct unwind_state *state,
208 unsigned long *next_bp) 193 unsigned long *next_bp)
209{ 194{
@@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state,
214 size_t len; 199 size_t len;
215 200
216 if (state->regs) 201 if (state->regs)
217 prev_frame_end = (void *)state->regs + regs_size(state->regs); 202 prev_frame_end = (void *)state->regs + sizeof(*state->regs);
218 else 203 else
219 prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; 204 prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
220 205
@@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state,
222 regs = decode_frame_pointer(next_bp); 207 regs = decode_frame_pointer(next_bp);
223 if (regs) { 208 if (regs) {
224 frame = (unsigned long *)regs; 209 frame = (unsigned long *)regs;
225 len = KERNEL_REGS_SIZE; 210 len = sizeof(*regs);
226 state->got_irq = true; 211 state->got_irq = true;
227 } else { 212 } else {
228 frame = next_bp; 213 frame = next_bp;
@@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state,
246 frame < prev_frame_end) 231 frame < prev_frame_end)
247 return false; 232 return false;
248 233
249 /*
250 * On 32-bit with user mode regs, make sure the last two regs are safe
251 * to access:
252 */
253 if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
254 !on_stack(info, frame, len + 2*sizeof(long)))
255 return false;
256
257 /* Move state to the next frame: */ 234 /* Move state to the next frame: */
258 if (regs) { 235 if (regs) {
259 state->regs = regs; 236 state->regs = regs;
@@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
412 * Pretend that the frame is complete and that BP points to it, but save 389 * Pretend that the frame is complete and that BP points to it, but save
413 * the real BP so that we can use it when looking for the next frame. 390 * the real BP so that we can use it when looking for the next frame.
414 */ 391 */
415 if (regs && regs->ip == 0 && 392 if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) {
416 (unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
417 state->next_bp = bp; 393 state->next_bp = bp;
418 bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; 394 bp = ((unsigned long *)regs->sp) - 1;
419 } 395 }
420 396
421 /* Initialize stack info and make sure the frame data is accessible: */ 397 /* Initialize stack info and make sure the frame data is accessible: */
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 72b997eaa1fc..332ae6530fa8 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -598,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
598 goto done; 598 goto done;
599 599
600 state->ip = regs->ip; 600 state->ip = regs->ip;
601 state->sp = kernel_stack_pointer(regs); 601 state->sp = regs->sp;
602 state->bp = regs->bp; 602 state->bp = regs->bp;
603 state->regs = regs; 603 state->regs = regs;
604 state->full_regs = true; 604 state->full_regs = true;