aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 19:13:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 19:13:28 -0400
commit1a4a2bc460721bc8f91e4c1294d39b38e5af132f (patch)
treefe646d05f6e17f05601e0a32cc796bec718ab6e7 /arch/x86
parent110a9e42b68719f584879c5c5c727bbae90d15f9 (diff)
parent1ef55be16ed69538f89e0a6508be5e62fdc9851c (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar: "In this cycle this topic tree has become one of those 'super topics' that accumulated a lot of changes: - Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on x86 - preceded by an array of changes. v4.8 saw preparatory changes in this area already - this is the rest of the work. Includes the thread stack caching performance optimization. (Andy Lutomirski) - switch_to() cleanups and all around enhancements. (Brian Gerst) - A large number of dumpstack infrastructure enhancements and an unwinder abstraction. The secret long term plan is safe(r) live patching plus maybe another attempt at debuginfo based unwinding - but all these current bits are standalone enhancements in a frame pointer based debug environment as well. (Josh Poimboeuf) - More __ro_after_init and const annotations. (Kees Cook) - Enable KASLR for the vmemmap memory region. (Thomas Garnier)" [ The virtually mapped stack changes are pretty fundamental, and not x86-specific per se, even if they are only used on x86 right now. ] * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits) x86/asm: Get rid of __read_cr4_safe() thread_info: Use unsigned long for flags x86/alternatives: Add stack frame dependency to alternative_call_2() x86/dumpstack: Fix show_stack() task pointer regression x86/dumpstack: Remove dump_trace() and related callbacks x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder oprofile/x86: Convert x86_backtrace() to use the new unwinder x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder perf/x86: Convert perf_callchain_kernel() to use the new unwinder x86/unwind: Add new unwind interface and implementations x86/dumpstack: Remove NULL task pointer convention fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK lib/syscall: Pin the task stack in collect_syscall() x86/process: Pin the target stack in get_wchan() x86/dumpstack: Pin the target stack when dumping it kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function sched/core: Add try_get_task_stack() and put_task_stack() x86/entry/64: Fix a minor comment rebase error iommu/amd: Don't put completion-wait semaphore on stack ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/entry/common.c24
-rw-r--r--arch/x86/entry/entry_32.S68
-rw-r--r--arch/x86/entry/entry_64.S151
-rw-r--r--arch/x86/events/core.c36
-rw-r--r--arch/x86/include/asm/alternative.h8
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/fpu/xstate.h3
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/kaslr.h1
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/processor.h25
-rw-r--r--arch/x86/include/asm/realmode.h2
-rw-r--r--arch/x86/include/asm/smp.h3
-rw-r--r--arch/x86/include/asm/special_insns.h22
-rw-r--r--arch/x86/include/asm/stacktrace.h120
-rw-r--r--arch/x86/include/asm/switch_to.h164
-rw-r--r--arch/x86/include/asm/syscall.h20
-rw-r--r--arch/x86/include/asm/thread_info.h71
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/traps.h6
-rw-r--r--arch/x86/include/asm/unwind.h73
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c6
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c2
-rw-r--r--arch/x86/kernel/apic/msi.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c7
-rw-r--r--arch/x86/kernel/asm-offsets_32.c5
-rw-r--r--arch/x86/kernel/asm-offsets_64.c5
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h2
-rw-r--r--arch/x86/kernel/dumpstack.c258
-rw-r--r--arch/x86/kernel/dumpstack_32.c154
-rw-r--r--arch/x86/kernel/dumpstack_64.c318
-rw-r--r--arch/x86/kernel/fpu/init.c1
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/head_64.S12
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/ksysfs.c2
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process.c42
-rw-r--r--arch/x86/kernel/process_32.c33
-rw-r--r--arch/x86/kernel/process_64.c25
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/setup_percpu.c4
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/stacktrace.c79
-rw-r--r--arch/x86/kernel/traps.c61
-rw-r--r--arch/x86/kernel/unwind_frame.c93
-rw-r--r--arch/x86/kernel/unwind_guess.c43
-rw-r--r--arch/x86/kernel/x86_init.c6
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/mm/fault.c32
-rw-r--r--arch/x86/mm/kaslr.c26
-rw-r--r--arch/x86/mm/tlb.c15
-rw-r--r--arch/x86/oprofile/backtrace.c49
-rw-r--r--arch/x86/pci/pcbios.c7
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/um/ptrace_32.c8
-rw-r--r--arch/x86/xen/enlighten.c1
77 files changed, 1195 insertions, 1020 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0cc8811af4e0..ac5944fa6da2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,6 +93,7 @@ config X86
93 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 93 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
94 select HAVE_ARCH_WITHIN_STACK_FRAMES 94 select HAVE_ARCH_WITHIN_STACK_FRAMES
95 select HAVE_EBPF_JIT if X86_64 95 select HAVE_EBPF_JIT if X86_64
96 select HAVE_ARCH_VMAP_STACK if X86_64
96 select HAVE_CC_STACKPROTECTOR 97 select HAVE_CC_STACKPROTECTOR
97 select HAVE_CMPXCHG_DOUBLE 98 select HAVE_CMPXCHG_DOUBLE
98 select HAVE_CMPXCHG_LOCAL 99 select HAVE_CMPXCHG_LOCAL
@@ -109,7 +110,6 @@ config X86
109 select HAVE_EXIT_THREAD 110 select HAVE_EXIT_THREAD
110 select HAVE_FENTRY if X86_64 111 select HAVE_FENTRY if X86_64
111 select HAVE_FTRACE_MCOUNT_RECORD 112 select HAVE_FTRACE_MCOUNT_RECORD
112 select HAVE_FUNCTION_GRAPH_FP_TEST
113 select HAVE_FUNCTION_GRAPH_TRACER 113 select HAVE_FUNCTION_GRAPH_TRACER
114 select HAVE_FUNCTION_TRACER 114 select HAVE_FUNCTION_TRACER
115 select HAVE_GCC_PLUGINS 115 select HAVE_GCC_PLUGINS
@@ -157,6 +157,7 @@ config X86
157 select SPARSE_IRQ 157 select SPARSE_IRQ
158 select SRCU 158 select SRCU
159 select SYSCTL_EXCEPTION_TRACE 159 select SYSCTL_EXCEPTION_TRACE
160 select THREAD_INFO_IN_TASK
160 select USER_STACKTRACE_SUPPORT 161 select USER_STACKTRACE_SUPPORT
161 select VIRT_TO_BUS 162 select VIRT_TO_BUS
162 select X86_DEV_DMA_OPS if X86_64 163 select X86_DEV_DMA_OPS if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 1433f6b4607d..bdd9cc59d20f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -31,13 +31,6 @@
31#define CREATE_TRACE_POINTS 31#define CREATE_TRACE_POINTS
32#include <trace/events/syscalls.h> 32#include <trace/events/syscalls.h>
33 33
34static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
35{
36 unsigned long top_of_stack =
37 (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
38 return (struct thread_info *)(top_of_stack - THREAD_SIZE);
39}
40
41#ifdef CONFIG_CONTEXT_TRACKING 34#ifdef CONFIG_CONTEXT_TRACKING
42/* Called on entry from user mode with IRQs off. */ 35/* Called on entry from user mode with IRQs off. */
43__visible inline void enter_from_user_mode(void) 36__visible inline void enter_from_user_mode(void)
@@ -71,7 +64,7 @@ static long syscall_trace_enter(struct pt_regs *regs)
71{ 64{
72 u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; 65 u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
73 66
74 struct thread_info *ti = pt_regs_to_thread_info(regs); 67 struct thread_info *ti = current_thread_info();
75 unsigned long ret = 0; 68 unsigned long ret = 0;
76 bool emulated = false; 69 bool emulated = false;
77 u32 work; 70 u32 work;
@@ -173,18 +166,17 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
173 /* Disable IRQs and retry */ 166 /* Disable IRQs and retry */
174 local_irq_disable(); 167 local_irq_disable();
175 168
176 cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); 169 cached_flags = READ_ONCE(current_thread_info()->flags);
177 170
178 if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) 171 if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
179 break; 172 break;
180
181 } 173 }
182} 174}
183 175
184/* Called with IRQs disabled. */ 176/* Called with IRQs disabled. */
185__visible inline void prepare_exit_to_usermode(struct pt_regs *regs) 177__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
186{ 178{
187 struct thread_info *ti = pt_regs_to_thread_info(regs); 179 struct thread_info *ti = current_thread_info();
188 u32 cached_flags; 180 u32 cached_flags;
189 181
190 if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) 182 if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -209,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
209 * special case only applies after poking regs and before the 201 * special case only applies after poking regs and before the
210 * very next return to user mode. 202 * very next return to user mode.
211 */ 203 */
212 ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); 204 current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
213#endif 205#endif
214 206
215 user_enter_irqoff(); 207 user_enter_irqoff();
@@ -247,7 +239,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
247 */ 239 */
248__visible inline void syscall_return_slowpath(struct pt_regs *regs) 240__visible inline void syscall_return_slowpath(struct pt_regs *regs)
249{ 241{
250 struct thread_info *ti = pt_regs_to_thread_info(regs); 242 struct thread_info *ti = current_thread_info();
251 u32 cached_flags = READ_ONCE(ti->flags); 243 u32 cached_flags = READ_ONCE(ti->flags);
252 244
253 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 245 CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
@@ -270,7 +262,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
270#ifdef CONFIG_X86_64 262#ifdef CONFIG_X86_64
271__visible void do_syscall_64(struct pt_regs *regs) 263__visible void do_syscall_64(struct pt_regs *regs)
272{ 264{
273 struct thread_info *ti = pt_regs_to_thread_info(regs); 265 struct thread_info *ti = current_thread_info();
274 unsigned long nr = regs->orig_ax; 266 unsigned long nr = regs->orig_ax;
275 267
276 enter_from_user_mode(); 268 enter_from_user_mode();
@@ -303,11 +295,11 @@ __visible void do_syscall_64(struct pt_regs *regs)
303 */ 295 */
304static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) 296static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
305{ 297{
306 struct thread_info *ti = pt_regs_to_thread_info(regs); 298 struct thread_info *ti = current_thread_info();
307 unsigned int nr = (unsigned int)regs->orig_ax; 299 unsigned int nr = (unsigned int)regs->orig_ax;
308 300
309#ifdef CONFIG_IA32_EMULATION 301#ifdef CONFIG_IA32_EMULATION
310 ti->status |= TS_COMPAT; 302 current->thread.status |= TS_COMPAT;
311#endif 303#endif
312 304
313 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { 305 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 0b56666e6039..b75a8bcd2d23 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,34 +204,70 @@
204 POP_GS_EX 204 POP_GS_EX
205.endm 205.endm
206 206
207/*
208 * %eax: prev task
209 * %edx: next task
210 */
211ENTRY(__switch_to_asm)
212 /*
213 * Save callee-saved registers
214 * This must match the order in struct inactive_task_frame
215 */
216 pushl %ebp
217 pushl %ebx
218 pushl %edi
219 pushl %esi
220
221 /* switch stack */
222 movl %esp, TASK_threadsp(%eax)
223 movl TASK_threadsp(%edx), %esp
224
225#ifdef CONFIG_CC_STACKPROTECTOR
226 movl TASK_stack_canary(%edx), %ebx
227 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
228#endif
229
230 /* restore callee-saved registers */
231 popl %esi
232 popl %edi
233 popl %ebx
234 popl %ebp
235
236 jmp __switch_to
237END(__switch_to_asm)
238
239/*
240 * A newly forked process directly context switches into this address.
241 *
242 * eax: prev task we switched from
243 * ebx: kernel thread func (NULL for user thread)
244 * edi: kernel thread arg
245 */
207ENTRY(ret_from_fork) 246ENTRY(ret_from_fork)
208 pushl %eax 247 pushl %eax
209 call schedule_tail 248 call schedule_tail
210 popl %eax 249 popl %eax
211 250
251 testl %ebx, %ebx
252 jnz 1f /* kernel threads are uncommon */
253
2542:
212 /* When we fork, we trace the syscall return in the child, too. */ 255 /* When we fork, we trace the syscall return in the child, too. */
213 movl %esp, %eax 256 movl %esp, %eax
214 call syscall_return_slowpath 257 call syscall_return_slowpath
215 jmp restore_all 258 jmp restore_all
216END(ret_from_fork)
217
218ENTRY(ret_from_kernel_thread)
219 pushl %eax
220 call schedule_tail
221 popl %eax
222 movl PT_EBP(%esp), %eax
223 call *PT_EBX(%esp)
224 movl $0, PT_EAX(%esp)
225 259
260 /* kernel thread */
2611: movl %edi, %eax
262 call *%ebx
226 /* 263 /*
227 * Kernel threads return to userspace as if returning from a syscall. 264 * A kernel thread is allowed to return here after successfully
228 * We should check whether anything actually uses this path and, if so, 265 * calling do_execve(). Exit to userspace to complete the execve()
229 * consider switching it over to ret_from_fork. 266 * syscall.
230 */ 267 */
231 movl %esp, %eax 268 movl $0, PT_EAX(%esp)
232 call syscall_return_slowpath 269 jmp 2b
233 jmp restore_all 270END(ret_from_fork)
234ENDPROC(ret_from_kernel_thread)
235 271
236/* 272/*
237 * Return to user mode is not as complex as all this looks, 273 * Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 02fff3ebfb87..fee1d95902b5 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
179 * If we need to do entry work or if we guess we'll need to do 179 * If we need to do entry work or if we guess we'll need to do
180 * exit work, go straight to the slow path. 180 * exit work, go straight to the slow path.
181 */ 181 */
182 testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 182 movq PER_CPU_VAR(current_task), %r11
183 testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
183 jnz entry_SYSCALL64_slow_path 184 jnz entry_SYSCALL64_slow_path
184 185
185entry_SYSCALL_64_fastpath: 186entry_SYSCALL_64_fastpath:
@@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath:
217 */ 218 */
218 DISABLE_INTERRUPTS(CLBR_NONE) 219 DISABLE_INTERRUPTS(CLBR_NONE)
219 TRACE_IRQS_OFF 220 TRACE_IRQS_OFF
220 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 221 movq PER_CPU_VAR(current_task), %r11
222 testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
221 jnz 1f 223 jnz 1f
222 224
223 LOCKDEP_SYS_EXIT 225 LOCKDEP_SYS_EXIT
@@ -351,8 +353,7 @@ ENTRY(stub_ptregs_64)
351 jmp entry_SYSCALL64_slow_path 353 jmp entry_SYSCALL64_slow_path
352 354
3531: 3551:
354 /* Called from C */ 356 jmp *%rax /* Called from C */
355 jmp *%rax /* called from C */
356END(stub_ptregs_64) 357END(stub_ptregs_64)
357 358
358.macro ptregs_stub func 359.macro ptregs_stub func
@@ -369,41 +370,73 @@ END(ptregs_\func)
369#include <asm/syscalls_64.h> 370#include <asm/syscalls_64.h>
370 371
371/* 372/*
373 * %rdi: prev task
374 * %rsi: next task
375 */
376ENTRY(__switch_to_asm)
377 /*
378 * Save callee-saved registers
379 * This must match the order in inactive_task_frame
380 */
381 pushq %rbp
382 pushq %rbx
383 pushq %r12
384 pushq %r13
385 pushq %r14
386 pushq %r15
387
388 /* switch stack */
389 movq %rsp, TASK_threadsp(%rdi)
390 movq TASK_threadsp(%rsi), %rsp
391
392#ifdef CONFIG_CC_STACKPROTECTOR
393 movq TASK_stack_canary(%rsi), %rbx
394 movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
395#endif
396
397 /* restore callee-saved registers */
398 popq %r15
399 popq %r14
400 popq %r13
401 popq %r12
402 popq %rbx
403 popq %rbp
404
405 jmp __switch_to
406END(__switch_to_asm)
407
408/*
372 * A newly forked process directly context switches into this address. 409 * A newly forked process directly context switches into this address.
373 * 410 *
374 * rdi: prev task we switched from 411 * rax: prev task we switched from
412 * rbx: kernel thread func (NULL for user thread)
413 * r12: kernel thread arg
375 */ 414 */
376ENTRY(ret_from_fork) 415ENTRY(ret_from_fork)
377 LOCK ; btr $TIF_FORK, TI_flags(%r8) 416 movq %rax, %rdi
378
379 call schedule_tail /* rdi: 'prev' task parameter */ 417 call schedule_tail /* rdi: 'prev' task parameter */
380 418
381 testb $3, CS(%rsp) /* from kernel_thread? */ 419 testq %rbx, %rbx /* from kernel_thread? */
382 jnz 1f 420 jnz 1f /* kernel threads are uncommon */
383 421
384 /* 4222:
385 * We came from kernel_thread. This code path is quite twisted, and
386 * someone should clean it up.
387 *
388 * copy_thread_tls stashes the function pointer in RBX and the
389 * parameter to be passed in RBP. The called function is permitted
390 * to call do_execve and thereby jump to user mode.
391 */
392 movq RBP(%rsp), %rdi
393 call *RBX(%rsp)
394 movl $0, RAX(%rsp)
395
396 /*
397 * Fall through as though we're exiting a syscall. This makes a
398 * twisted sort of sense if we just called do_execve.
399 */
400
4011:
402 movq %rsp, %rdi 423 movq %rsp, %rdi
403 call syscall_return_slowpath /* returns with IRQs disabled */ 424 call syscall_return_slowpath /* returns with IRQs disabled */
404 TRACE_IRQS_ON /* user mode is traced as IRQS on */ 425 TRACE_IRQS_ON /* user mode is traced as IRQS on */
405 SWAPGS 426 SWAPGS
406 jmp restore_regs_and_iret 427 jmp restore_regs_and_iret
428
4291:
430 /* kernel thread */
431 movq %r12, %rdi
432 call *%rbx
433 /*
434 * A kernel thread is allowed to return here after successfully
435 * calling do_execve(). Exit to userspace to complete the execve()
436 * syscall.
437 */
438 movq $0, RAX(%rsp)
439 jmp 2b
407END(ret_from_fork) 440END(ret_from_fork)
408 441
409/* 442/*
@@ -555,27 +588,69 @@ native_irq_return_iret:
555 588
556#ifdef CONFIG_X86_ESPFIX64 589#ifdef CONFIG_X86_ESPFIX64
557native_irq_return_ldt: 590native_irq_return_ldt:
558 pushq %rax 591 /*
559 pushq %rdi 592 * We are running with user GSBASE. All GPRs contain their user
593 * values. We have a percpu ESPFIX stack that is eight slots
594 * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom
595 * of the ESPFIX stack.
596 *
597 * We clobber RAX and RDI in this code. We stash RDI on the
598 * normal stack and RAX on the ESPFIX stack.
599 *
600 * The ESPFIX stack layout we set up looks like this:
601 *
602 * --- top of ESPFIX stack ---
603 * SS
604 * RSP
605 * RFLAGS
606 * CS
607 * RIP <-- RSP points here when we're done
608 * RAX <-- espfix_waddr points here
609 * --- bottom of ESPFIX stack ---
610 */
611
612 pushq %rdi /* Stash user RDI */
560 SWAPGS 613 SWAPGS
561 movq PER_CPU_VAR(espfix_waddr), %rdi 614 movq PER_CPU_VAR(espfix_waddr), %rdi
562 movq %rax, (0*8)(%rdi) /* RAX */ 615 movq %rax, (0*8)(%rdi) /* user RAX */
563 movq (2*8)(%rsp), %rax /* RIP */ 616 movq (1*8)(%rsp), %rax /* user RIP */
564 movq %rax, (1*8)(%rdi) 617 movq %rax, (1*8)(%rdi)
565 movq (3*8)(%rsp), %rax /* CS */ 618 movq (2*8)(%rsp), %rax /* user CS */
566 movq %rax, (2*8)(%rdi) 619 movq %rax, (2*8)(%rdi)
567 movq (4*8)(%rsp), %rax /* RFLAGS */ 620 movq (3*8)(%rsp), %rax /* user RFLAGS */
568 movq %rax, (3*8)(%rdi) 621 movq %rax, (3*8)(%rdi)
569 movq (6*8)(%rsp), %rax /* SS */ 622 movq (5*8)(%rsp), %rax /* user SS */
570 movq %rax, (5*8)(%rdi) 623 movq %rax, (5*8)(%rdi)
571 movq (5*8)(%rsp), %rax /* RSP */ 624 movq (4*8)(%rsp), %rax /* user RSP */
572 movq %rax, (4*8)(%rdi) 625 movq %rax, (4*8)(%rdi)
573 andl $0xffff0000, %eax 626 /* Now RAX == RSP. */
574 popq %rdi 627
628 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
629 popq %rdi /* Restore user RDI */
630
631 /*
632 * espfix_stack[31:16] == 0. The page tables are set up such that
633 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
634 * espfix_waddr for any X. That is, there are 65536 RO aliases of
635 * the same page. Set up RSP so that RSP[31:16] contains the
636 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
637 * still points to an RO alias of the ESPFIX stack.
638 */
575 orq PER_CPU_VAR(espfix_stack), %rax 639 orq PER_CPU_VAR(espfix_stack), %rax
576 SWAPGS 640 SWAPGS
577 movq %rax, %rsp 641 movq %rax, %rsp
578 popq %rax 642
643 /*
644 * At this point, we cannot write to the stack any more, but we can
645 * still read.
646 */
647 popq %rax /* Restore user RAX */
648
649 /*
650 * RSP now points to an ordinary IRET frame, except that the page
651 * is read-only and RSP[31:16] are preloaded with the userspace
652 * values. We can now IRET back to userspace.
653 */
579 jmp native_irq_return_iret 654 jmp native_irq_return_iret
580#endif 655#endif
581END(common_interrupt) 656END(common_interrupt)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 18a1acf86c90..d31735f37ed7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -37,6 +37,7 @@
37#include <asm/timer.h> 37#include <asm/timer.h>
38#include <asm/desc.h> 38#include <asm/desc.h>
39#include <asm/ldt.h> 39#include <asm/ldt.h>
40#include <asm/unwind.h>
40 41
41#include "perf_event.h" 42#include "perf_event.h"
42 43
@@ -2267,39 +2268,26 @@ void arch_perf_update_userpage(struct perf_event *event,
2267 cyc2ns_read_end(data); 2268 cyc2ns_read_end(data);
2268} 2269}
2269 2270
2270/*
2271 * callchain support
2272 */
2273
2274static int backtrace_stack(void *data, char *name)
2275{
2276 return 0;
2277}
2278
2279static int backtrace_address(void *data, unsigned long addr, int reliable)
2280{
2281 struct perf_callchain_entry_ctx *entry = data;
2282
2283 return perf_callchain_store(entry, addr);
2284}
2285
2286static const struct stacktrace_ops backtrace_ops = {
2287 .stack = backtrace_stack,
2288 .address = backtrace_address,
2289 .walk_stack = print_context_stack_bp,
2290};
2291
2292void 2271void
2293perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) 2272perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
2294{ 2273{
2274 struct unwind_state state;
2275 unsigned long addr;
2276
2295 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 2277 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
2296 /* TODO: We don't support guest os callchain now */ 2278 /* TODO: We don't support guest os callchain now */
2297 return; 2279 return;
2298 } 2280 }
2299 2281
2300 perf_callchain_store(entry, regs->ip); 2282 if (perf_callchain_store(entry, regs->ip))
2283 return;
2301 2284
2302 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 2285 for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
2286 unwind_next_frame(&state)) {
2287 addr = unwind_get_return_address(&state);
2288 if (!addr || perf_callchain_store(entry, addr))
2289 return;
2290 }
2303} 2291}
2304 2292
2305static inline int 2293static inline int
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index e77a6443104f..1b020381ab38 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -217,10 +217,14 @@ static inline int alternatives_text_reserved(void *start, void *end)
217 */ 217 */
218#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ 218#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
219 output, input...) \ 219 output, input...) \
220{ \
221 register void *__sp asm(_ASM_SP); \
220 asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ 222 asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
221 "call %P[new2]", feature2) \ 223 "call %P[new2]", feature2) \
222 : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ 224 : output, "+r" (__sp) \
223 [new2] "i" (newfunc2), ## input) 225 : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
226 [new2] "i" (newfunc2), ## input); \
227}
224 228
225/* 229/*
226 * use this macro(s) if you need more than one output parameter 230 * use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4e10d73cf018..12080d87da3b 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,7 +36,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
36 36
37extern struct desc_ptr idt_descr; 37extern struct desc_ptr idt_descr;
38extern gate_desc idt_table[]; 38extern gate_desc idt_table[];
39extern struct desc_ptr debug_idt_descr; 39extern const struct desc_ptr debug_idt_descr;
40extern gate_desc debug_idt_table[]; 40extern gate_desc debug_idt_table[];
41 41
42struct gdt_page { 42struct gdt_page {
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index ae55a43e09c0..d4957ac72b48 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -45,7 +45,8 @@
45extern u64 xfeatures_mask; 45extern u64 xfeatures_mask;
46extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 46extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
47 47
48extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); 48extern void __init update_regset_xstate_info(unsigned int size,
49 u64 xstate_mask);
49 50
50void fpu__xstate_clear_all_cpu_caps(void); 51void fpu__xstate_clear_all_cpu_caps(void);
51void *get_xsave_addr(struct xregs_state *xsave, int xstate); 52void *get_xsave_addr(struct xregs_state *xsave, int xstate);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a4820d4df617..eccd0ac6bc38 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -6,6 +6,7 @@
6# define MCOUNT_ADDR ((unsigned long)(__fentry__)) 6# define MCOUNT_ADDR ((unsigned long)(__fentry__))
7#else 7#else
8# define MCOUNT_ADDR ((unsigned long)(mcount)) 8# define MCOUNT_ADDR ((unsigned long)(mcount))
9# define HAVE_FUNCTION_GRAPH_FP_TEST
9#endif 10#endif
10#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ 11#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
11 12
@@ -13,6 +14,8 @@
13#define ARCH_SUPPORTS_FTRACE_OPS 1 14#define ARCH_SUPPORTS_FTRACE_OPS 1
14#endif 15#endif
15 16
17#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
18
16#ifndef __ASSEMBLY__ 19#ifndef __ASSEMBLY__
17extern void mcount(void); 20extern void mcount(void);
18extern atomic_t modifying_ftrace_code; 21extern atomic_t modifying_ftrace_code;
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 2674ee3de748..1052a797d71d 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,6 +6,7 @@ unsigned long kaslr_get_random_long(const char *purpose);
6#ifdef CONFIG_RANDOMIZE_MEMORY 6#ifdef CONFIG_RANDOMIZE_MEMORY
7extern unsigned long page_offset_base; 7extern unsigned long page_offset_base;
8extern unsigned long vmalloc_base; 8extern unsigned long vmalloc_base;
9extern unsigned long vmemmap_base;
9 10
10void kernel_randomize_memory(void); 11void kernel_randomize_memory(void);
11#else 12#else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 1ef9d581b5d9..d31881188431 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,8 +24,6 @@ enum die_val {
24extern void printk_address(unsigned long address); 24extern void printk_address(unsigned long address);
25extern void die(const char *, struct pt_regs *,long); 25extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_trace(struct task_struct *t, struct pt_regs *regs,
28 unsigned long *sp, unsigned long bp);
29extern void show_stack_regs(struct pt_regs *regs); 27extern void show_stack_regs(struct pt_regs *regs);
30extern void __show_regs(struct pt_regs *regs, int all); 28extern void __show_regs(struct pt_regs *regs, int all);
31extern unsigned long oops_begin(void); 29extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4cd8db05301f..ce932812f142 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,10 +80,6 @@ static inline unsigned long __read_cr4(void)
80{ 80{
81 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 81 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
82} 82}
83static inline unsigned long __read_cr4_safe(void)
84{
85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
86}
87 83
88static inline void __write_cr4(unsigned long x) 84static inline void __write_cr4(unsigned long x)
89{ 85{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 60aac60ba25f..0f400c0e4979 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -108,7 +108,6 @@ struct pv_cpu_ops {
108 unsigned long (*read_cr0)(void); 108 unsigned long (*read_cr0)(void);
109 void (*write_cr0)(unsigned long); 109 void (*write_cr0)(unsigned long);
110 110
111 unsigned long (*read_cr4_safe)(void);
112 unsigned long (*read_cr4)(void); 111 unsigned long (*read_cr4)(void);
113 void (*write_cr4)(unsigned long); 112 void (*write_cr4)(unsigned long);
114 113
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6fdef9eef2d5..3a264200c62f 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -57,11 +57,13 @@ typedef struct { pteval_t pte; } pte_t;
57#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) 57#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
58#define VMALLOC_SIZE_TB _AC(32, UL) 58#define VMALLOC_SIZE_TB _AC(32, UL)
59#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) 59#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
60#define VMEMMAP_START _AC(0xffffea0000000000, UL) 60#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
61#ifdef CONFIG_RANDOMIZE_MEMORY 61#ifdef CONFIG_RANDOMIZE_MEMORY
62#define VMALLOC_START vmalloc_base 62#define VMALLOC_START vmalloc_base
63#define VMEMMAP_START vmemmap_base
63#else 64#else
64#define VMALLOC_START __VMALLOC_BASE 65#define VMALLOC_START __VMALLOC_BASE
66#define VMEMMAP_START __VMEMMAP_BASE
65#endif /* CONFIG_RANDOMIZE_MEMORY */ 67#endif /* CONFIG_RANDOMIZE_MEMORY */
66#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) 68#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
67#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) 69#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def9537a2d..984a7bf17f6a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -389,9 +389,9 @@ struct thread_struct {
389 unsigned short fsindex; 389 unsigned short fsindex;
390 unsigned short gsindex; 390 unsigned short gsindex;
391#endif 391#endif
392#ifdef CONFIG_X86_32 392
393 unsigned long ip; 393 u32 status; /* thread synchronous flags */
394#endif 394
395#ifdef CONFIG_X86_64 395#ifdef CONFIG_X86_64
396 unsigned long fsbase; 396 unsigned long fsbase;
397 unsigned long gsbase; 397 unsigned long gsbase;
@@ -438,6 +438,15 @@ struct thread_struct {
438}; 438};
439 439
440/* 440/*
441 * Thread-synchronous status.
442 *
443 * This is different from the flags in that nobody else
444 * ever touches our thread-synchronous status, so we don't
445 * have to worry about atomic accesses.
446 */
447#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
448
449/*
441 * Set IOPL bits in EFLAGS from given mask 450 * Set IOPL bits in EFLAGS from given mask
442 */ 451 */
443static inline void native_set_iopl_mask(unsigned mask) 452static inline void native_set_iopl_mask(unsigned mask)
@@ -724,8 +733,6 @@ static inline void spin_lock_prefetch(const void *x)
724 .addr_limit = KERNEL_DS, \ 733 .addr_limit = KERNEL_DS, \
725} 734}
726 735
727extern unsigned long thread_saved_pc(struct task_struct *tsk);
728
729/* 736/*
730 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. 737 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
731 * This is necessary to guarantee that the entire "struct pt_regs" 738 * This is necessary to guarantee that the entire "struct pt_regs"
@@ -776,17 +783,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
776 .addr_limit = KERNEL_DS, \ 783 .addr_limit = KERNEL_DS, \
777} 784}
778 785
779/*
780 * Return saved PC of a blocked thread.
781 * What is this good for? it will be always the scheduler or ret_from_fork.
782 */
783#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
784
785#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) 786#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
786extern unsigned long KSTK_ESP(struct task_struct *task); 787extern unsigned long KSTK_ESP(struct task_struct *task);
787 788
788#endif /* CONFIG_X86_64 */ 789#endif /* CONFIG_X86_64 */
789 790
791extern unsigned long thread_saved_pc(struct task_struct *tsk);
792
790extern void start_thread(struct pt_regs *regs, unsigned long new_ip, 793extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
791 unsigned long new_sp); 794 unsigned long new_sp);
792 795
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index b2988c0ed829..230e1903acf0 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -44,9 +44,9 @@ struct trampoline_header {
44extern struct real_mode_header *real_mode_header; 44extern struct real_mode_header *real_mode_header;
45extern unsigned char real_mode_blob_end[]; 45extern unsigned char real_mode_blob_end[];
46 46
47extern unsigned long init_rsp;
48extern unsigned long initial_code; 47extern unsigned long initial_code;
49extern unsigned long initial_gs; 48extern unsigned long initial_gs;
49extern unsigned long initial_stack;
50 50
51extern unsigned char real_mode_blob[]; 51extern unsigned char real_mode_blob[];
52extern unsigned char real_mode_relocs[]; 52extern unsigned char real_mode_relocs[];
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ebd0c164cd4e..19980b36f394 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -39,9 +39,6 @@ DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
39DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); 39DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
40#endif 40#endif
41 41
42/* Static state in head.S used to set up a CPU */
43extern unsigned long stack_start; /* Initial stack pointer address */
44
45struct task_struct; 42struct task_struct;
46 43
47struct smp_ops { 44struct smp_ops {
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 587d7914ea4b..19a2224f9e16 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -59,22 +59,19 @@ static inline void native_write_cr3(unsigned long val)
59static inline unsigned long native_read_cr4(void) 59static inline unsigned long native_read_cr4(void)
60{ 60{
61 unsigned long val; 61 unsigned long val;
62 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
63 return val;
64}
65
66static inline unsigned long native_read_cr4_safe(void)
67{
68 unsigned long val;
69 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
70 * exists, so it will never fail. */
71#ifdef CONFIG_X86_32 62#ifdef CONFIG_X86_32
63 /*
64 * This could fault if CR4 does not exist. Non-existent CR4
65 * is functionally equivalent to CR4 == 0. Keep it simple and pretend
66 * that CR4 == 0 on CPUs that don't have CR4.
67 */
72 asm volatile("1: mov %%cr4, %0\n" 68 asm volatile("1: mov %%cr4, %0\n"
73 "2:\n" 69 "2:\n"
74 _ASM_EXTABLE(1b, 2b) 70 _ASM_EXTABLE(1b, 2b)
75 : "=r" (val), "=m" (__force_order) : "0" (0)); 71 : "=r" (val), "=m" (__force_order) : "0" (0));
76#else 72#else
77 val = native_read_cr4(); 73 /* CR4 always exists on x86_64. */
74 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
78#endif 75#endif
79 return val; 76 return val;
80} 77}
@@ -182,11 +179,6 @@ static inline unsigned long __read_cr4(void)
182 return native_read_cr4(); 179 return native_read_cr4();
183} 180}
184 181
185static inline unsigned long __read_cr4_safe(void)
186{
187 return native_read_cr4_safe();
188}
189
190static inline void __write_cr4(unsigned long x) 182static inline void __write_cr4(unsigned long x)
191{ 183{
192 native_write_cr4(x); 184 native_write_cr4(x);
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 0944218af9e2..37f2e0b377ad 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -8,86 +8,86 @@
8 8
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/ptrace.h> 10#include <linux/ptrace.h>
11#include <asm/switch_to.h>
12
13enum stack_type {
14 STACK_TYPE_UNKNOWN,
15 STACK_TYPE_TASK,
16 STACK_TYPE_IRQ,
17 STACK_TYPE_SOFTIRQ,
18 STACK_TYPE_EXCEPTION,
19 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
20};
11 21
12extern int kstack_depth_to_print; 22struct stack_info {
13 23 enum stack_type type;
14struct thread_info; 24 unsigned long *begin, *end, *next_sp;
15struct stacktrace_ops;
16
17typedef unsigned long (*walk_stack_t)(struct task_struct *task,
18 unsigned long *stack,
19 unsigned long bp,
20 const struct stacktrace_ops *ops,
21 void *data,
22 unsigned long *end,
23 int *graph);
24
25extern unsigned long
26print_context_stack(struct task_struct *task,
27 unsigned long *stack, unsigned long bp,
28 const struct stacktrace_ops *ops, void *data,
29 unsigned long *end, int *graph);
30
31extern unsigned long
32print_context_stack_bp(struct task_struct *task,
33 unsigned long *stack, unsigned long bp,
34 const struct stacktrace_ops *ops, void *data,
35 unsigned long *end, int *graph);
36
37/* Generic stack tracer with callbacks */
38
39struct stacktrace_ops {
40 int (*address)(void *data, unsigned long address, int reliable);
41 /* On negative return stop dumping */
42 int (*stack)(void *data, char *name);
43 walk_stack_t walk_stack;
44}; 25};
45 26
46void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 27bool in_task_stack(unsigned long *stack, struct task_struct *task,
47 unsigned long *stack, unsigned long bp, 28 struct stack_info *info);
48 const struct stacktrace_ops *ops, void *data); 29
30int get_stack_info(unsigned long *stack, struct task_struct *task,
31 struct stack_info *info, unsigned long *visit_mask);
32
33void stack_type_str(enum stack_type type, const char **begin,
34 const char **end);
35
36static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
37{
38 void *begin = info->begin;
39 void *end = info->end;
40
41 return (info->type != STACK_TYPE_UNKNOWN &&
42 addr >= begin && addr < end &&
43 addr + len > begin && addr + len <= end);
44}
45
46extern int kstack_depth_to_print;
49 47
50#ifdef CONFIG_X86_32 48#ifdef CONFIG_X86_32
51#define STACKSLOTS_PER_LINE 8 49#define STACKSLOTS_PER_LINE 8
52#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
53#else 50#else
54#define STACKSLOTS_PER_LINE 4 51#define STACKSLOTS_PER_LINE 4
55#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
56#endif 52#endif
57 53
58#ifdef CONFIG_FRAME_POINTER 54#ifdef CONFIG_FRAME_POINTER
59static inline unsigned long 55static inline unsigned long *
60stack_frame(struct task_struct *task, struct pt_regs *regs) 56get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
61{ 57{
62 unsigned long bp;
63
64 if (regs) 58 if (regs)
65 return regs->bp; 59 return (unsigned long *)regs->bp;
66 60
67 if (task == current) { 61 if (task == current)
68 /* Grab bp right from our regs */ 62 return __builtin_frame_address(0);
69 get_bp(bp);
70 return bp;
71 }
72 63
73 /* bp is the last reg pushed by switch_to */ 64 return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
74 return *(unsigned long *)task->thread.sp;
75} 65}
76#else 66#else
77static inline unsigned long 67static inline unsigned long *
78stack_frame(struct task_struct *task, struct pt_regs *regs) 68get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
79{ 69{
80 return 0; 70 return NULL;
71}
72#endif /* CONFIG_FRAME_POINTER */
73
74static inline unsigned long *
75get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
76{
77 if (regs)
78 return (unsigned long *)kernel_stack_pointer(regs);
79
80 if (task == current)
81 return __builtin_frame_address(0);
82
83 return (unsigned long *)task->thread.sp;
81} 84}
82#endif
83 85
84extern void 86void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
85show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 87 unsigned long *stack, char *log_lvl);
86 unsigned long *stack, unsigned long bp, char *log_lvl);
87 88
88extern void 89void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
89show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 90 unsigned long *sp, char *log_lvl);
90 unsigned long *sp, unsigned long bp, char *log_lvl);
91 91
92extern unsigned int code_bytes; 92extern unsigned int code_bytes;
93 93
@@ -106,7 +106,7 @@ static inline unsigned long caller_frame_pointer(void)
106{ 106{
107 struct stack_frame *frame; 107 struct stack_frame *frame;
108 108
109 get_bp(frame); 109 frame = __builtin_frame_address(0);
110 110
111#ifdef CONFIG_FRAME_POINTER 111#ifdef CONFIG_FRAME_POINTER
112 frame = frame->next_frame; 112 frame = frame->next_frame;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1b03a1..5cb436acd463 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,130 +2,66 @@
2#define _ASM_X86_SWITCH_TO_H 2#define _ASM_X86_SWITCH_TO_H
3 3
4struct task_struct; /* one of the stranger aspects of C forward declarations */ 4struct task_struct; /* one of the stranger aspects of C forward declarations */
5
6struct task_struct *__switch_to_asm(struct task_struct *prev,
7 struct task_struct *next);
8
5__visible struct task_struct *__switch_to(struct task_struct *prev, 9__visible struct task_struct *__switch_to(struct task_struct *prev,
6 struct task_struct *next); 10 struct task_struct *next);
7struct tss_struct; 11struct tss_struct;
8void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 12void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
9 struct tss_struct *tss); 13 struct tss_struct *tss);
10 14
11#ifdef CONFIG_X86_32 15/* This runs runs on the previous thread's stack. */
16static inline void prepare_switch_to(struct task_struct *prev,
17 struct task_struct *next)
18{
19#ifdef CONFIG_VMAP_STACK
20 /*
21 * If we switch to a stack that has a top-level paging entry
22 * that is not present in the current mm, the resulting #PF will
23 * will be promoted to a double-fault and we'll panic. Probe
24 * the new stack now so that vmalloc_fault can fix up the page
25 * tables if needed. This can only happen if we use a stack
26 * in vmap space.
27 *
28 * We assume that the stack is aligned so that it never spans
29 * more than one top-level paging entry.
30 *
31 * To minimize cache pollution, just follow the stack pointer.
32 */
33 READ_ONCE(*(unsigned char *)next->thread.sp);
34#endif
35}
36
37asmlinkage void ret_from_fork(void);
38
39/* data that is pointed to by thread.sp */
40struct inactive_task_frame {
41#ifdef CONFIG_X86_64
42 unsigned long r15;
43 unsigned long r14;
44 unsigned long r13;
45 unsigned long r12;
46#else
47 unsigned long si;
48 unsigned long di;
49#endif
50 unsigned long bx;
51 unsigned long bp;
52 unsigned long ret_addr;
53};
12 54
13#ifdef CONFIG_CC_STACKPROTECTOR 55struct fork_frame {
14#define __switch_canary \ 56 struct inactive_task_frame frame;
15 "movl %P[task_canary](%[next]), %%ebx\n\t" \ 57 struct pt_regs regs;
16 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" 58};
17#define __switch_canary_oparam \
18 , [stack_canary] "=m" (stack_canary.canary)
19#define __switch_canary_iparam \
20 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
21#else /* CC_STACKPROTECTOR */
22#define __switch_canary
23#define __switch_canary_oparam
24#define __switch_canary_iparam
25#endif /* CC_STACKPROTECTOR */
26 59
27/*
28 * Saving eflags is important. It switches not only IOPL between tasks,
29 * it also protects other tasks from NT leaking through sysenter etc.
30 */
31#define switch_to(prev, next, last) \ 60#define switch_to(prev, next, last) \
32do { \ 61do { \
33 /* \ 62 prepare_switch_to(prev, next); \
34 * Context-switching clobbers all registers, so we clobber \
35 * them explicitly, via unused output variables. \
36 * (EAX and EBP is not listed because EBP is saved/restored \
37 * explicitly for wchan access and EAX is the return value of \
38 * __switch_to()) \
39 */ \
40 unsigned long ebx, ecx, edx, esi, edi; \
41 \
42 asm volatile("pushl %%ebp\n\t" /* save EBP */ \
43 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
44 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
45 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
46 "pushl %[next_ip]\n\t" /* restore EIP */ \
47 __switch_canary \
48 "jmp __switch_to\n" /* regparm call */ \
49 "1:\t" \
50 "popl %%ebp\n\t" /* restore EBP */ \
51 \
52 /* output parameters */ \
53 : [prev_sp] "=m" (prev->thread.sp), \
54 [prev_ip] "=m" (prev->thread.ip), \
55 "=a" (last), \
56 \
57 /* clobbered output registers: */ \
58 "=b" (ebx), "=c" (ecx), "=d" (edx), \
59 "=S" (esi), "=D" (edi) \
60 \
61 __switch_canary_oparam \
62 \
63 /* input parameters: */ \
64 : [next_sp] "m" (next->thread.sp), \
65 [next_ip] "m" (next->thread.ip), \
66 \
67 /* regparm parameters for __switch_to(): */ \
68 [prev] "a" (prev), \
69 [next] "d" (next) \
70 \ 63 \
71 __switch_canary_iparam \ 64 ((last) = __switch_to_asm((prev), (next))); \
72 \
73 : /* reloaded segment registers */ \
74 "memory"); \
75} while (0) 65} while (0)
76 66
77#else /* CONFIG_X86_32 */
78
79/* frame pointer must be last for get_wchan */
80#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
81#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
82
83#define __EXTRA_CLOBBER \
84 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
85 "r12", "r13", "r14", "r15", "flags"
86
87#ifdef CONFIG_CC_STACKPROTECTOR
88#define __switch_canary \
89 "movq %P[task_canary](%%rsi),%%r8\n\t" \
90 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
91#define __switch_canary_oparam \
92 , [gs_canary] "=m" (irq_stack_union.stack_canary)
93#define __switch_canary_iparam \
94 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
95#else /* CC_STACKPROTECTOR */
96#define __switch_canary
97#define __switch_canary_oparam
98#define __switch_canary_iparam
99#endif /* CC_STACKPROTECTOR */
100
101/*
102 * There is no need to save or restore flags, because flags are always
103 * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
104 * has no effect.
105 */
106#define switch_to(prev, next, last) \
107 asm volatile(SAVE_CONTEXT \
108 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
109 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
110 "call __switch_to\n\t" \
111 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
112 __switch_canary \
113 "movq %P[thread_info](%%rsi),%%r8\n\t" \
114 "movq %%rax,%%rdi\n\t" \
115 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
116 "jnz ret_from_fork\n\t" \
117 RESTORE_CONTEXT \
118 : "=a" (last) \
119 __switch_canary_oparam \
120 : [next] "S" (next), [prev] "D" (prev), \
121 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
122 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
123 [_tif_fork] "i" (_TIF_FORK), \
124 [thread_info] "i" (offsetof(struct task_struct, stack)), \
125 [current_task] "m" (current_task) \
126 __switch_canary_iparam \
127 : "memory", "cc" __EXTRA_CLOBBER)
128
129#endif /* CONFIG_X86_32 */
130
131#endif /* _ASM_X86_SWITCH_TO_H */ 67#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4e23dd15c661..e3c95e8e61c5 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
60 * TS_COMPAT is set for 32-bit syscall entries and then 60 * TS_COMPAT is set for 32-bit syscall entries and then
61 * remains set until we return to user mode. 61 * remains set until we return to user mode.
62 */ 62 */
63 if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED)) 63 if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
64 /* 64 /*
65 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO 65 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
66 * and will match correctly in comparisons. 66 * and will match correctly in comparisons.
@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
116 unsigned long *args) 116 unsigned long *args)
117{ 117{
118# ifdef CONFIG_IA32_EMULATION 118# ifdef CONFIG_IA32_EMULATION
119 if (task_thread_info(task)->status & TS_COMPAT) 119 if (task->thread.status & TS_COMPAT)
120 switch (i) { 120 switch (i) {
121 case 0: 121 case 0:
122 if (!n--) break; 122 if (!n--) break;
@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
177 const unsigned long *args) 177 const unsigned long *args)
178{ 178{
179# ifdef CONFIG_IA32_EMULATION 179# ifdef CONFIG_IA32_EMULATION
180 if (task_thread_info(task)->status & TS_COMPAT) 180 if (task->thread.status & TS_COMPAT)
181 switch (i) { 181 switch (i) {
182 case 0: 182 case 0:
183 if (!n--) break; 183 if (!n--) break;
@@ -234,18 +234,8 @@ static inline void syscall_set_arguments(struct task_struct *task,
234 234
235static inline int syscall_get_arch(void) 235static inline int syscall_get_arch(void)
236{ 236{
237#ifdef CONFIG_IA32_EMULATION 237 /* x32 tasks should be considered AUDIT_ARCH_X86_64. */
238 /* 238 return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
239 * TS_COMPAT is set for 32-bit syscall entry and then
240 * remains set until we return to user mode.
241 *
242 * x32 tasks should be considered AUDIT_ARCH_X86_64.
243 */
244 if (task_thread_info(current)->status & TS_COMPAT)
245 return AUDIT_ARCH_I386;
246#endif
247 /* Both x32 and x86_64 are considered "64-bit". */
248 return AUDIT_ARCH_X86_64;
249} 239}
250#endif /* CONFIG_X86_32 */ 240#endif /* CONFIG_X86_32 */
251 241
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8b7c8d8e0852..2aaca53c0974 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -52,21 +52,6 @@ struct task_struct;
52#include <asm/cpufeature.h> 52#include <asm/cpufeature.h>
53#include <linux/atomic.h> 53#include <linux/atomic.h>
54 54
55struct thread_info {
56 struct task_struct *task; /* main task structure */
57 __u32 flags; /* low level flags */
58 __u32 status; /* thread synchronous flags */
59 __u32 cpu; /* current CPU */
60};
61
62#define INIT_THREAD_INFO(tsk) \
63{ \
64 .task = &tsk, \
65 .flags = 0, \
66 .cpu = 0, \
67}
68
69#define init_thread_info (init_thread_union.thread_info)
70#define init_stack (init_thread_union.stack) 55#define init_stack (init_thread_union.stack)
71 56
72#else /* !__ASSEMBLY__ */ 57#else /* !__ASSEMBLY__ */
@@ -95,7 +80,6 @@ struct thread_info {
95#define TIF_UPROBE 12 /* breakpointed or singlestepping */ 80#define TIF_UPROBE 12 /* breakpointed or singlestepping */
96#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 81#define TIF_NOTSC 16 /* TSC is not accessible in userland */
97#define TIF_IA32 17 /* IA32 compatibility process */ 82#define TIF_IA32 17 /* IA32 compatibility process */
98#define TIF_FORK 18 /* ret_from_fork */
99#define TIF_NOHZ 19 /* in adaptive nohz mode */ 83#define TIF_NOHZ 19 /* in adaptive nohz mode */
100#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ 84#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
101#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ 85#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
@@ -119,7 +103,6 @@ struct thread_info {
119#define _TIF_UPROBE (1 << TIF_UPROBE) 103#define _TIF_UPROBE (1 << TIF_UPROBE)
120#define _TIF_NOTSC (1 << TIF_NOTSC) 104#define _TIF_NOTSC (1 << TIF_NOTSC)
121#define _TIF_IA32 (1 << TIF_IA32) 105#define _TIF_IA32 (1 << TIF_IA32)
122#define _TIF_FORK (1 << TIF_FORK)
123#define _TIF_NOHZ (1 << TIF_NOHZ) 106#define _TIF_NOHZ (1 << TIF_NOHZ)
124#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) 107#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
125#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 108#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
@@ -160,11 +143,6 @@ struct thread_info {
160 */ 143 */
161#ifndef __ASSEMBLY__ 144#ifndef __ASSEMBLY__
162 145
163static inline struct thread_info *current_thread_info(void)
164{
165 return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
166}
167
168static inline unsigned long current_stack_pointer(void) 146static inline unsigned long current_stack_pointer(void)
169{ 147{
170 unsigned long sp; 148 unsigned long sp;
@@ -226,60 +204,19 @@ static inline int arch_within_stack_frames(const void * const stack,
226# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 204# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
227#endif 205#endif
228 206
229/*
230 * ASM operand which evaluates to a 'thread_info' address of
231 * the current task, if it is known that "reg" is exactly "off"
232 * bytes below the top of the stack currently.
233 *
234 * ( The kernel stack's size is known at build time, it is usually
235 * 2 or 4 pages, and the bottom of the kernel stack contains
236 * the thread_info structure. So to access the thread_info very
237 * quickly from assembly code we can calculate down from the
238 * top of the kernel stack to the bottom, using constant,
239 * build-time calculations only. )
240 *
241 * For example, to fetch the current thread_info->flags value into %eax
242 * on x86-64 defconfig kernels, in syscall entry code where RSP is
243 * currently at exactly SIZEOF_PTREGS bytes away from the top of the
244 * stack:
245 *
246 * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
247 *
248 * will translate to:
249 *
250 * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
251 *
252 * which is below the current RSP by almost 16K.
253 */
254#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
255
256#endif 207#endif
257 208
258/*
259 * Thread-synchronous status.
260 *
261 * This is different from the flags in that nobody else
262 * ever touches our thread-synchronous status, so we don't
263 * have to worry about atomic accesses.
264 */
265#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
266#ifdef CONFIG_COMPAT 209#ifdef CONFIG_COMPAT
267#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ 210#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
268#endif 211#endif
269
270#ifndef __ASSEMBLY__ 212#ifndef __ASSEMBLY__
271 213
272static inline bool in_ia32_syscall(void)
273{
274#ifdef CONFIG_X86_32 214#ifdef CONFIG_X86_32
275 return true; 215#define in_ia32_syscall() true
276#endif 216#else
277#ifdef CONFIG_IA32_EMULATION 217#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
278 if (current_thread_info()->status & TS_COMPAT) 218 current->thread.status & TS_COMPAT)
279 return true;
280#endif 219#endif
281 return false;
282}
283 220
284/* 221/*
285 * Force syscall return via IRET by making it look as if there was 222 * Force syscall return via IRET by making it look as if there was
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index dee8a70382ba..6fa85944af83 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
81/* Initialize cr4 shadow for this CPU. */ 81/* Initialize cr4 shadow for this CPU. */
82static inline void cr4_init_shadow(void) 82static inline void cr4_init_shadow(void)
83{ 83{
84 this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe()); 84 this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
85} 85}
86 86
87/* Set in this cpu's CR4. */ 87/* Set in this cpu's CR4. */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c3496619740a..01fd0a7f48cd 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -117,6 +117,12 @@ extern void ist_exit(struct pt_regs *regs);
117extern void ist_begin_non_atomic(struct pt_regs *regs); 117extern void ist_begin_non_atomic(struct pt_regs *regs);
118extern void ist_end_non_atomic(void); 118extern void ist_end_non_atomic(void);
119 119
120#ifdef CONFIG_VMAP_STACK
121void __noreturn handle_stack_overflow(const char *message,
122 struct pt_regs *regs,
123 unsigned long fault_address);
124#endif
125
120/* Interrupts/Exceptions */ 126/* Interrupts/Exceptions */
121enum { 127enum {
122 X86_TRAP_DE = 0, /* 0, Divide-by-zero */ 128 X86_TRAP_DE = 0, /* 0, Divide-by-zero */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
new file mode 100644
index 000000000000..c4b6d1cafa46
--- /dev/null
+++ b/arch/x86/include/asm/unwind.h
@@ -0,0 +1,73 @@
1#ifndef _ASM_X86_UNWIND_H
2#define _ASM_X86_UNWIND_H
3
4#include <linux/sched.h>
5#include <linux/ftrace.h>
6#include <asm/ptrace.h>
7#include <asm/stacktrace.h>
8
9struct unwind_state {
10 struct stack_info stack_info;
11 unsigned long stack_mask;
12 struct task_struct *task;
13 int graph_idx;
14#ifdef CONFIG_FRAME_POINTER
15 unsigned long *bp;
16#else
17 unsigned long *sp;
18#endif
19};
20
21void __unwind_start(struct unwind_state *state, struct task_struct *task,
22 struct pt_regs *regs, unsigned long *first_frame);
23
24bool unwind_next_frame(struct unwind_state *state);
25
26static inline bool unwind_done(struct unwind_state *state)
27{
28 return state->stack_info.type == STACK_TYPE_UNKNOWN;
29}
30
31static inline
32void unwind_start(struct unwind_state *state, struct task_struct *task,
33 struct pt_regs *regs, unsigned long *first_frame)
34{
35 first_frame = first_frame ? : get_stack_pointer(task, regs);
36
37 __unwind_start(state, task, regs, first_frame);
38}
39
40#ifdef CONFIG_FRAME_POINTER
41
42static inline
43unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
44{
45 if (unwind_done(state))
46 return NULL;
47
48 return state->bp + 1;
49}
50
51unsigned long unwind_get_return_address(struct unwind_state *state);
52
53#else /* !CONFIG_FRAME_POINTER */
54
55static inline
56unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
57{
58 return NULL;
59}
60
61static inline
62unsigned long unwind_get_return_address(struct unwind_state *state)
63{
64 if (unwind_done(state))
65 return 0;
66
67 return ftrace_graph_ret_addr(state->task, &state->graph_idx,
68 *state->sp, state->sp);
69}
70
71#endif /* CONFIG_FRAME_POINTER */
72
73#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0503f5bfb18d..45257cf84370 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
125obj-$(CONFIG_PERF_EVENTS) += perf_regs.o 125obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
126obj-$(CONFIG_TRACING) += tracepoint.o 126obj-$(CONFIG_TRACING) += tracepoint.o
127 127
128ifdef CONFIG_FRAME_POINTER
129obj-y += unwind_frame.o
130else
131obj-y += unwind_guess.o
132endif
133
128### 134###
129# 64 bit specific files 135# 64 bit specific files
130ifeq ($(CONFIG_X86_64),y) 136ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index adb3eaf8fe2a..48587335ede8 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void)
99 saved_magic = 0x12345678; 99 saved_magic = 0x12345678;
100#else /* CONFIG_64BIT */ 100#else /* CONFIG_64BIT */
101#ifdef CONFIG_SMP 101#ifdef CONFIG_SMP
102 stack_start = (unsigned long)temp_stack + sizeof(temp_stack); 102 initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
103 early_gdt_descr.address = 103 early_gdt_descr.address =
104 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 104 (unsigned long)get_cpu_gdt_table(smp_processor_id());
105 initial_gs = per_cpu_offset(smp_processor_id()); 105 initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5b2ae106bd4a..8862da76ef6f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -25,7 +25,7 @@
25static struct apic apic_physflat; 25static struct apic apic_physflat;
26static struct apic apic_flat; 26static struct apic apic_flat;
27 27
28struct apic __read_mostly *apic = &apic_flat; 28struct apic *apic __ro_after_init = &apic_flat;
29EXPORT_SYMBOL_GPL(apic); 29EXPORT_SYMBOL_GPL(apic);
30 30
31static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 31static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -154,7 +154,7 @@ static int flat_probe(void)
154 return 1; 154 return 1;
155} 155}
156 156
157static struct apic apic_flat = { 157static struct apic apic_flat __ro_after_init = {
158 .name = "flat", 158 .name = "flat",
159 .probe = flat_probe, 159 .probe = flat_probe,
160 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 160 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
@@ -248,7 +248,7 @@ static int physflat_probe(void)
248 return 0; 248 return 0;
249} 249}
250 250
251static struct apic apic_physflat = { 251static struct apic apic_physflat __ro_after_init = {
252 252
253 .name = "physical flat", 253 .name = "physical flat",
254 .probe = physflat_probe, 254 .probe = physflat_probe,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index c05688b2deff..b109e4389c92 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v)
108 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); 108 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
109} 109}
110 110
111struct apic apic_noop = { 111struct apic apic_noop __ro_after_init = {
112 .name = "noop", 112 .name = "noop",
113 .probe = noop_probe, 113 .probe = noop_probe,
114 .acpi_madt_oem_check = NULL, 114 .acpi_madt_oem_check = NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 06dbaa458bfe..56012010332c 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -142,7 +142,7 @@ static int probe_bigsmp(void)
142 return dmi_bigsmp; 142 return dmi_bigsmp;
143} 143}
144 144
145static struct apic apic_bigsmp = { 145static struct apic apic_bigsmp __ro_after_init = {
146 146
147 .name = "bigsmp", 147 .name = "bigsmp",
148 .probe = probe_bigsmp, 148 .probe = probe_bigsmp,
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index ade25320df96..015bbf30e3e3 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
269 hpet_msi_write(irq_data_get_irq_handler_data(data), msg); 269 hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
270} 270}
271 271
272static struct irq_chip hpet_msi_controller = { 272static struct irq_chip hpet_msi_controller __ro_after_init = {
273 .name = "HPET-MSI", 273 .name = "HPET-MSI",
274 .irq_unmask = hpet_msi_unmask, 274 .irq_unmask = hpet_msi_unmask,
275 .irq_mask = hpet_msi_mask, 275 .irq_mask = hpet_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 563096267ca2..c48264e202fd 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -72,7 +72,7 @@ static int probe_default(void)
72 return 1; 72 return 1;
73} 73}
74 74
75static struct apic apic_default = { 75static struct apic apic_default __ro_after_init = {
76 76
77 .name = "default", 77 .name = "default",
78 .probe = probe_default, 78 .probe = probe_default,
@@ -126,7 +126,7 @@ static struct apic apic_default = {
126 126
127apic_driver(apic_default); 127apic_driver(apic_default);
128 128
129struct apic *apic = &apic_default; 129struct apic *apic __ro_after_init = &apic_default;
130EXPORT_SYMBOL_GPL(apic); 130EXPORT_SYMBOL_GPL(apic);
131 131
132static int cmdline_apic __initdata; 132static int cmdline_apic __initdata;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 54f35d988025..200af5ae9662 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
227 cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); 227 cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
228} 228}
229 229
230static struct apic apic_x2apic_cluster = { 230static struct apic apic_x2apic_cluster __ro_after_init = {
231 231
232 .name = "cluster x2apic", 232 .name = "cluster x2apic",
233 .probe = x2apic_cluster_probe, 233 .probe = x2apic_cluster_probe,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 4f13f54f1b1f..ff111f05a314 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -98,7 +98,7 @@ static int x2apic_phys_probe(void)
98 return apic == &apic_x2apic_phys; 98 return apic == &apic_x2apic_phys;
99} 99}
100 100
101static struct apic apic_x2apic_phys = { 101static struct apic apic_x2apic_phys __ro_after_init = {
102 102
103 .name = "physical x2apic", 103 .name = "physical x2apic",
104 .probe = x2apic_phys_probe, 104 .probe = x2apic_phys_probe,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index cb0673c1e940..b9f6157d4271 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -560,7 +560,7 @@ static int uv_probe(void)
560 return apic == &apic_x2apic_uv_x; 560 return apic == &apic_x2apic_uv_x;
561} 561}
562 562
563static struct apic __refdata apic_x2apic_uv_x = { 563static struct apic apic_x2apic_uv_x __ro_after_init = {
564 564
565 .name = "UV large system", 565 .name = "UV large system",
566 .probe = uv_probe, 566 .probe = uv_probe,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 2bd5c6ff7ee7..c62e015b126c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -29,10 +29,13 @@
29 29
30void common(void) { 30void common(void) {
31 BLANK(); 31 BLANK();
32 OFFSET(TI_flags, thread_info, flags); 32 OFFSET(TASK_threadsp, task_struct, thread.sp);
33 OFFSET(TI_status, thread_info, status); 33#ifdef CONFIG_CC_STACKPROTECTOR
34 OFFSET(TASK_stack_canary, task_struct, stack_canary);
35#endif
34 36
35 BLANK(); 37 BLANK();
38 OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
36 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); 39 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
37 40
38 BLANK(); 41 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ecdc1d217dc0..880aa093268d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@ void foo(void)
57 /* Size of SYSENTER_stack */ 57 /* Size of SYSENTER_stack */
58 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); 58 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
59 59
60#ifdef CONFIG_CC_STACKPROTECTOR
61 BLANK();
62 OFFSET(stack_canary_offset, stack_canary, canary);
63#endif
64
60#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) 65#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
61 BLANK(); 66 BLANK();
62 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); 67 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d875f97d4e0b..210927ee2e74 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@ int main(void)
56 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 56 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
57 BLANK(); 57 BLANK();
58 58
59#ifdef CONFIG_CC_STACKPROTECTOR
60 DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
61 BLANK();
62#endif
63
59 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); 64 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
60 DEFINE(NR_syscalls, sizeof(syscalls_64)); 65 DEFINE(NR_syscalls, sizeof(syscalls_64));
61 66
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bcc9ccc220c9..9bd910a7dd0a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1264,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg)
1264__setup("clearcpuid=", setup_disablecpuid); 1264__setup("clearcpuid=", setup_disablecpuid);
1265 1265
1266#ifdef CONFIG_X86_64 1266#ifdef CONFIG_X86_64
1267struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1267struct desc_ptr idt_descr __ro_after_init = {
1268struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, 1268 .size = NR_VECTORS * 16 - 1,
1269 (unsigned long) debug_idt_table }; 1269 .address = (unsigned long) idt_table,
1270};
1271const struct desc_ptr debug_idt_descr = {
1272 .size = NR_VECTORS * 16 - 1,
1273 .address = (unsigned long) debug_idt_table,
1274};
1270 1275
1271DEFINE_PER_CPU_FIRST(union irq_stack_union, 1276DEFINE_PER_CPU_FIRST(union irq_stack_union,
1272 irq_stack_union) __aligned(PAGE_SIZE) __visible; 1277 irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1280,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
1280EXPORT_PER_CPU_SYMBOL(current_task); 1285EXPORT_PER_CPU_SYMBOL(current_task);
1281 1286
1282DEFINE_PER_CPU(char *, irq_stack_ptr) = 1287DEFINE_PER_CPU(char *, irq_stack_ptr) =
1283 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 1288 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
1284 1289
1285DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; 1290DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1286 1291
@@ -1304,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1304/* May not be marked __init: used by software suspend */ 1309/* May not be marked __init: used by software suspend */
1305void syscall_init(void) 1310void syscall_init(void)
1306{ 1311{
1307 /*
1308 * LSTAR and STAR live in a bit strange symbiosis.
1309 * They both write to the same internal register. STAR allows to
1310 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
1311 */
1312 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1312 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1313 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1313 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1314 1314
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 28f1b54b7fad..24e87e74990d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex);
72u64 size_or_mask, size_and_mask; 72u64 size_or_mask, size_and_mask;
73static bool mtrr_aps_delayed_init; 73static bool mtrr_aps_delayed_init;
74 74
75static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 75static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
76 76
77const struct mtrr_ops *mtrr_if; 77const struct mtrr_ops *mtrr_if;
78 78
79static void set_mtrr(unsigned int reg, unsigned long base, 79static void set_mtrr(unsigned int reg, unsigned long base,
80 unsigned long size, mtrr_type type); 80 unsigned long size, mtrr_type type);
81 81
82void set_mtrr_ops(const struct mtrr_ops *ops) 82void __init set_mtrr_ops(const struct mtrr_ops *ops)
83{ 83{
84 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 84 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
85 mtrr_ops[ops->vendor] = ops; 85 mtrr_ops[ops->vendor] = ops;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 6c7ced07d16d..ad8bd763efa5 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index,
54bool get_mtrr_state(void); 54bool get_mtrr_state(void);
55void mtrr_bp_pat_init(void); 55void mtrr_bp_pat_init(void);
56 56
57extern void set_mtrr_ops(const struct mtrr_ops *ops); 57extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
58 58
59extern u64 size_or_mask, size_and_mask; 59extern u64 size_or_mask, size_and_mask;
60extern const struct mtrr_ops *mtrr_if; 60extern const struct mtrr_ops *mtrr_if;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 92e8f0a7159c..9b7cf5c28f5f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,7 +17,7 @@
17#include <linux/sysfs.h> 17#include <linux/sysfs.h>
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20#include <asm/unwind.h>
21 21
22int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
23int panic_on_io_nmi; 23int panic_on_io_nmi;
@@ -25,11 +25,29 @@ unsigned int code_bytes = 64;
25int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; 25int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
26static int die_counter; 26static int die_counter;
27 27
28bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info)
30{
31 unsigned long *begin = task_stack_page(task);
32 unsigned long *end = task_stack_page(task) + THREAD_SIZE;
33
34 if (stack < begin || stack >= end)
35 return false;
36
37 info->type = STACK_TYPE_TASK;
38 info->begin = begin;
39 info->end = end;
40 info->next_sp = NULL;
41
42 return true;
43}
44
28static void printk_stack_address(unsigned long address, int reliable, 45static void printk_stack_address(unsigned long address, int reliable,
29 void *data) 46 char *log_lvl)
30{ 47{
48 touch_nmi_watchdog();
31 printk("%s [<%p>] %s%pB\n", 49 printk("%s [<%p>] %s%pB\n",
32 (char *)data, (void *)address, reliable ? "" : "? ", 50 log_lvl, (void *)address, reliable ? "" : "? ",
33 (void *)address); 51 (void *)address);
34} 52}
35 53
@@ -38,176 +56,120 @@ void printk_address(unsigned long address)
38 pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); 56 pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
39} 57}
40 58
41#ifdef CONFIG_FUNCTION_GRAPH_TRACER 59void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
42static void 60 unsigned long *stack, char *log_lvl)
43print_ftrace_graph_addr(unsigned long addr, void *data,
44 const struct stacktrace_ops *ops,
45 struct task_struct *task, int *graph)
46{ 61{
47 unsigned long ret_addr; 62 struct unwind_state state;
48 int index; 63 struct stack_info stack_info = {0};
49 64 unsigned long visit_mask = 0;
50 if (addr != (unsigned long)return_to_handler) 65 int graph_idx = 0;
51 return;
52
53 index = task->curr_ret_stack;
54
55 if (!task->ret_stack || index < *graph)
56 return;
57
58 index -= *graph;
59 ret_addr = task->ret_stack[index].ret;
60
61 ops->address(data, ret_addr, 1);
62 66
63 (*graph)++; 67 printk("%sCall Trace:\n", log_lvl);
64}
65#else
66static inline void
67print_ftrace_graph_addr(unsigned long addr, void *data,
68 const struct stacktrace_ops *ops,
69 struct task_struct *task, int *graph)
70{ }
71#endif
72
73/*
74 * x86-64 can have up to three kernel stacks:
75 * process stack
76 * interrupt stack
77 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
78 */
79
80static inline int valid_stack_ptr(struct task_struct *task,
81 void *p, unsigned int size, void *end)
82{
83 void *t = task_stack_page(task);
84 if (end) {
85 if (p < end && p >= (end-THREAD_SIZE))
86 return 1;
87 else
88 return 0;
89 }
90 return p >= t && p < t + THREAD_SIZE - size;
91}
92 68
93unsigned long 69 unwind_start(&state, task, regs, stack);
94print_context_stack(struct task_struct *task,
95 unsigned long *stack, unsigned long bp,
96 const struct stacktrace_ops *ops, void *data,
97 unsigned long *end, int *graph)
98{
99 struct stack_frame *frame = (struct stack_frame *)bp;
100 70
101 /* 71 /*
102 * If we overflowed the stack into a guard page, jump back to the 72 * Iterate through the stacks, starting with the current stack pointer.
103 * bottom of the usable stack. 73 * Each stack has a pointer to the next one.
74 *
75 * x86-64 can have several stacks:
76 * - task stack
77 * - interrupt stack
78 * - HW exception stacks (double fault, nmi, debug, mce)
79 *
80 * x86-32 can have up to three stacks:
81 * - task stack
82 * - softirq stack
83 * - hardirq stack
104 */ 84 */
105 if ((unsigned long)task_stack_page(task) - (unsigned long)stack < 85 for (; stack; stack = stack_info.next_sp) {
106 PAGE_SIZE) 86 const char *str_begin, *str_end;
107 stack = (unsigned long *)task_stack_page(task);
108
109 while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
110 unsigned long addr;
111
112 addr = *stack;
113 if (__kernel_text_address(addr)) {
114 if ((unsigned long) stack == bp + sizeof(long)) {
115 ops->address(data, addr, 1);
116 frame = frame->next_frame;
117 bp = (unsigned long) frame;
118 } else {
119 ops->address(data, addr, 0);
120 }
121 print_ftrace_graph_addr(addr, data, ops, task, graph);
122 }
123 stack++;
124 }
125 return bp;
126}
127EXPORT_SYMBOL_GPL(print_context_stack);
128
129unsigned long
130print_context_stack_bp(struct task_struct *task,
131 unsigned long *stack, unsigned long bp,
132 const struct stacktrace_ops *ops, void *data,
133 unsigned long *end, int *graph)
134{
135 struct stack_frame *frame = (struct stack_frame *)bp;
136 unsigned long *ret_addr = &frame->return_address;
137 87
138 while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { 88 /*
139 unsigned long addr = *ret_addr; 89 * If we overflowed the task stack into a guard page, jump back
90 * to the bottom of the usable stack.
91 */
92 if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
93 stack = task_stack_page(task);
140 94
141 if (!__kernel_text_address(addr)) 95 if (get_stack_info(stack, task, &stack_info, &visit_mask))
142 break; 96 break;
143 97
144 if (ops->address(data, addr, 1)) 98 stack_type_str(stack_info.type, &str_begin, &str_end);
145 break; 99 if (str_begin)
146 frame = frame->next_frame; 100 printk("%s <%s> ", log_lvl, str_begin);
147 ret_addr = &frame->return_address; 101
148 print_ftrace_graph_addr(addr, data, ops, task, graph); 102 /*
149 } 103 * Scan the stack, printing any text addresses we find. At the
150 104 * same time, follow proper stack frames with the unwinder.
151 return (unsigned long)frame; 105 *
152} 106 * Addresses found during the scan which are not reported by
153EXPORT_SYMBOL_GPL(print_context_stack_bp); 107 * the unwinder are considered to be additional clues which are
154 108 * sometimes useful for debugging and are prefixed with '?'.
155static int print_trace_stack(void *data, char *name) 109 * This also serves as a failsafe option in case the unwinder
156{ 110 * goes off in the weeds.
157 printk("%s <%s> ", (char *)data, name); 111 */
158 return 0; 112 for (; stack < stack_info.end; stack++) {
159} 113 unsigned long real_addr;
160 114 int reliable = 0;
161/* 115 unsigned long addr = *stack;
162 * Print one address/symbol entries per line. 116 unsigned long *ret_addr_p =
163 */ 117 unwind_get_return_address_ptr(&state);
164static int print_trace_address(void *data, unsigned long addr, int reliable) 118
165{ 119 if (!__kernel_text_address(addr))
166 touch_nmi_watchdog(); 120 continue;
167 printk_stack_address(addr, reliable, data); 121
168 return 0; 122 if (stack == ret_addr_p)
169} 123 reliable = 1;
170 124
171static const struct stacktrace_ops print_trace_ops = { 125 /*
172 .stack = print_trace_stack, 126 * When function graph tracing is enabled for a
173 .address = print_trace_address, 127 * function, its return address on the stack is
174 .walk_stack = print_context_stack, 128 * replaced with the address of an ftrace handler
175}; 129 * (return_to_handler). In that case, before printing
176 130 * the "real" address, we want to print the handler
177void 131 * address as an "unreliable" hint that function graph
178show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 132 * tracing was involved.
179 unsigned long *stack, unsigned long bp, char *log_lvl) 133 */
180{ 134 real_addr = ftrace_graph_ret_addr(task, &graph_idx,
181 printk("%sCall Trace:\n", log_lvl); 135 addr, stack);
182 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 136 if (real_addr != addr)
183} 137 printk_stack_address(addr, 0, log_lvl);
138 printk_stack_address(real_addr, reliable, log_lvl);
139
140 if (!reliable)
141 continue;
142
143 /*
144 * Get the next frame from the unwinder. No need to
145 * check for an error: if anything goes wrong, the rest
146 * of the addresses will just be printed as unreliable.
147 */
148 unwind_next_frame(&state);
149 }
184 150
185void show_trace(struct task_struct *task, struct pt_regs *regs, 151 if (str_end)
186 unsigned long *stack, unsigned long bp) 152 printk("%s <%s> ", log_lvl, str_end);
187{ 153 }
188 show_trace_log_lvl(task, regs, stack, bp, "");
189} 154}
190 155
191void show_stack(struct task_struct *task, unsigned long *sp) 156void show_stack(struct task_struct *task, unsigned long *sp)
192{ 157{
193 unsigned long bp = 0; 158 task = task ? : current;
194 unsigned long stack;
195 159
196 /* 160 /*
197 * Stack frames below this one aren't interesting. Don't show them 161 * Stack frames below this one aren't interesting. Don't show them
198 * if we're printing for %current. 162 * if we're printing for %current.
199 */ 163 */
200 if (!sp && (!task || task == current)) { 164 if (!sp && task == current)
201 sp = &stack; 165 sp = get_stack_pointer(current, NULL);
202 bp = stack_frame(current, NULL);
203 }
204 166
205 show_stack_log_lvl(task, NULL, sp, bp, ""); 167 show_stack_log_lvl(task, NULL, sp, "");
206} 168}
207 169
208void show_stack_regs(struct pt_regs *regs) 170void show_stack_regs(struct pt_regs *regs)
209{ 171{
210 show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); 172 show_stack_log_lvl(current, regs, NULL, "");
211} 173}
212 174
213static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; 175static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 09675712eba8..06eb322b5f9f 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,93 +16,121 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19static void *is_irq_stack(void *p, void *irq) 19void stack_type_str(enum stack_type type, const char **begin, const char **end)
20{ 20{
21 if (p < irq || p >= (irq + THREAD_SIZE)) 21 switch (type) {
22 return NULL; 22 case STACK_TYPE_IRQ:
23 return irq + THREAD_SIZE; 23 case STACK_TYPE_SOFTIRQ:
24 *begin = "IRQ";
25 *end = "EOI";
26 break;
27 default:
28 *begin = NULL;
29 *end = NULL;
30 }
24} 31}
25 32
26 33static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
27static void *is_hardirq_stack(unsigned long *stack, int cpu)
28{ 34{
29 void *irq = per_cpu(hardirq_stack, cpu); 35 unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
36 unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
30 37
31 return is_irq_stack(stack, irq); 38 /*
32} 39 * This is a software stack, so 'end' can be a valid stack pointer.
40 * It just means the stack is empty.
41 */
42 if (stack < begin || stack > end)
43 return false;
33 44
34static void *is_softirq_stack(unsigned long *stack, int cpu) 45 info->type = STACK_TYPE_IRQ;
35{ 46 info->begin = begin;
36 void *irq = per_cpu(softirq_stack, cpu); 47 info->end = end;
37 48
38 return is_irq_stack(stack, irq); 49 /*
50 * See irq_32.c -- the next stack pointer is stored at the beginning of
51 * the stack.
52 */
53 info->next_sp = (unsigned long *)*begin;
54
55 return true;
39} 56}
40 57
41void dump_trace(struct task_struct *task, struct pt_regs *regs, 58static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
42 unsigned long *stack, unsigned long bp,
43 const struct stacktrace_ops *ops, void *data)
44{ 59{
45 const unsigned cpu = get_cpu(); 60 unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
46 int graph = 0; 61 unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
47 u32 *prev_esp;
48 62
49 if (!task) 63 /*
50 task = current; 64 * This is a software stack, so 'end' can be a valid stack pointer.
65 * It just means the stack is empty.
66 */
67 if (stack < begin || stack > end)
68 return false;
51 69
52 if (!stack) { 70 info->type = STACK_TYPE_SOFTIRQ;
53 unsigned long dummy; 71 info->begin = begin;
72 info->end = end;
54 73
55 stack = &dummy; 74 /*
56 if (task != current) 75 * The next stack pointer is stored at the beginning of the stack.
57 stack = (unsigned long *)task->thread.sp; 76 * See irq_32.c.
58 } 77 */
78 info->next_sp = (unsigned long *)*begin;
59 79
60 if (!bp) 80 return true;
61 bp = stack_frame(task, regs); 81}
62 82
63 for (;;) { 83int get_stack_info(unsigned long *stack, struct task_struct *task,
64 void *end_stack; 84 struct stack_info *info, unsigned long *visit_mask)
85{
86 if (!stack)
87 goto unknown;
65 88
66 end_stack = is_hardirq_stack(stack, cpu); 89 task = task ? : current;
67 if (!end_stack)
68 end_stack = is_softirq_stack(stack, cpu);
69 90
70 bp = ops->walk_stack(task, stack, bp, ops, data, 91 if (in_task_stack(stack, task, info))
71 end_stack, &graph); 92 goto recursion_check;
72 93
73 /* Stop if not on irq stack */ 94 if (task != current)
74 if (!end_stack) 95 goto unknown;
75 break;
76 96
77 /* The previous esp is saved on the bottom of the stack */ 97 if (in_hardirq_stack(stack, info))
78 prev_esp = (u32 *)(end_stack - THREAD_SIZE); 98 goto recursion_check;
79 stack = (unsigned long *)*prev_esp;
80 if (!stack)
81 break;
82 99
83 if (ops->stack(data, "IRQ") < 0) 100 if (in_softirq_stack(stack, info))
84 break; 101 goto recursion_check;
85 touch_nmi_watchdog(); 102
103 goto unknown;
104
105recursion_check:
106 /*
107 * Make sure we don't iterate through any given stack more than once.
108 * If it comes up a second time then there's something wrong going on:
109 * just break out and report an unknown stack type.
110 */
111 if (visit_mask) {
112 if (*visit_mask & (1UL << info->type))
113 goto unknown;
114 *visit_mask |= 1UL << info->type;
86 } 115 }
87 put_cpu(); 116
117 return 0;
118
119unknown:
120 info->type = STACK_TYPE_UNKNOWN;
121 return -EINVAL;
88} 122}
89EXPORT_SYMBOL(dump_trace);
90 123
91void 124void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 125 unsigned long *sp, char *log_lvl)
93 unsigned long *sp, unsigned long bp, char *log_lvl)
94{ 126{
95 unsigned long *stack; 127 unsigned long *stack;
96 int i; 128 int i;
97 129
98 if (sp == NULL) { 130 if (!try_get_task_stack(task))
99 if (regs) 131 return;
100 sp = (unsigned long *)regs->sp; 132
101 else if (task) 133 sp = sp ? : get_stack_pointer(task, regs);
102 sp = (unsigned long *)task->thread.sp;
103 else
104 sp = (unsigned long *)&sp;
105 }
106 134
107 stack = sp; 135 stack = sp;
108 for (i = 0; i < kstack_depth_to_print; i++) { 136 for (i = 0; i < kstack_depth_to_print; i++) {
@@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
117 touch_nmi_watchdog(); 145 touch_nmi_watchdog();
118 } 146 }
119 pr_cont("\n"); 147 pr_cont("\n");
120 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 148 show_trace_log_lvl(task, regs, sp, log_lvl);
149
150 put_task_stack(task);
121} 151}
122 152
123 153
@@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs)
139 u8 *ip; 169 u8 *ip;
140 170
141 pr_emerg("Stack:\n"); 171 pr_emerg("Stack:\n");
142 show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG); 172 show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
143 173
144 pr_emerg("Code:"); 174 pr_emerg("Code:");
145 175
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 9ee4520ce83c..36cf1a498227 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,261 +16,145 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19static char *exception_stack_names[N_EXCEPTION_STACKS] = {
20 [ DOUBLEFAULT_STACK-1 ] = "#DF",
21 [ NMI_STACK-1 ] = "NMI",
22 [ DEBUG_STACK-1 ] = "#DB",
23 [ MCE_STACK-1 ] = "#MC",
24};
19 25
20#define N_EXCEPTION_STACKS_END \ 26static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 27 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
22 28 [DEBUG_STACK - 1] = DEBUG_STKSZ
23static char x86_stack_ids[][8] = {
24 [ DEBUG_STACK-1 ] = "#DB",
25 [ NMI_STACK-1 ] = "NMI",
26 [ DOUBLEFAULT_STACK-1 ] = "#DF",
27 [ MCE_STACK-1 ] = "#MC",
28#if DEBUG_STKSZ > EXCEPTION_STKSZ
29 [ N_EXCEPTION_STACKS ...
30 N_EXCEPTION_STACKS_END ] = "#DB[?]"
31#endif
32}; 29};
33 30
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 31void stack_type_str(enum stack_type type, const char **begin, const char **end)
35 unsigned *usedp, char **idp)
36{ 32{
37 unsigned k; 33 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
38 34
39 /* 35 switch (type) {
40 * Iterate over all exception stacks, and figure out whether 36 case STACK_TYPE_IRQ:
41 * 'stack' is in one of them: 37 *begin = "IRQ";
42 */ 38 *end = "EOI";
43 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 39 break;
44 unsigned long end = per_cpu(orig_ist, cpu).ist[k]; 40 case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
45 /* 41 *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
46 * Is 'stack' above this exception frame's end? 42 *end = "EOE";
47 * If yes then skip to the next frame. 43 break;
48 */ 44 default:
49 if (stack >= end) 45 *begin = NULL;
50 continue; 46 *end = NULL;
51 /*
52 * Is 'stack' above this exception frame's start address?
53 * If yes then we found the right frame.
54 */
55 if (stack >= end - EXCEPTION_STKSZ) {
56 /*
57 * Make sure we only iterate through an exception
58 * stack once. If it comes up for the second time
59 * then there's something wrong going on - just
60 * break out and return NULL:
61 */
62 if (*usedp & (1U << k))
63 break;
64 *usedp |= 1U << k;
65 *idp = x86_stack_ids[k];
66 return (unsigned long *)end;
67 }
68 /*
69 * If this is a debug stack, and if it has a larger size than
70 * the usual exception stacks, then 'stack' might still
71 * be within the lower portion of the debug stack:
72 */
73#if DEBUG_STKSZ > EXCEPTION_STKSZ
74 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
75 unsigned j = N_EXCEPTION_STACKS - 1;
76
77 /*
78 * Black magic. A large debug stack is composed of
79 * multiple exception stack entries, which we
80 * iterate through now. Dont look:
81 */
82 do {
83 ++j;
84 end -= EXCEPTION_STKSZ;
85 x86_stack_ids[j][4] = '1' +
86 (j - N_EXCEPTION_STACKS);
87 } while (stack < end - EXCEPTION_STKSZ);
88 if (*usedp & (1U << j))
89 break;
90 *usedp |= 1U << j;
91 *idp = x86_stack_ids[j];
92 return (unsigned long *)end;
93 }
94#endif
95 } 47 }
96 return NULL;
97} 48}
98 49
99static inline int 50static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
100in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
101 unsigned long *irq_stack_end)
102{ 51{
103 return (stack >= irq_stack && stack < irq_stack_end); 52 unsigned long *begin, *end;
104} 53 struct pt_regs *regs;
105 54 unsigned k;
106static const unsigned long irq_stack_size =
107 (IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
108
109enum stack_type {
110 STACK_IS_UNKNOWN,
111 STACK_IS_NORMAL,
112 STACK_IS_EXCEPTION,
113 STACK_IS_IRQ,
114};
115
116static enum stack_type
117analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
118 unsigned long **stack_end, unsigned long *irq_stack,
119 unsigned *used, char **id)
120{
121 unsigned long addr;
122 55
123 addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); 56 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
124 if ((unsigned long)task_stack_page(task) == addr)
125 return STACK_IS_NORMAL;
126 57
127 *stack_end = in_exception_stack(cpu, (unsigned long)stack, 58 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
128 used, id); 59 end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
129 if (*stack_end) 60 begin = end - (exception_stack_sizes[k] / sizeof(long));
130 return STACK_IS_EXCEPTION; 61 regs = (struct pt_regs *)end - 1;
131 62
132 if (!irq_stack) 63 if (stack < begin || stack >= end)
133 return STACK_IS_NORMAL; 64 continue;
134 65
135 *stack_end = irq_stack; 66 info->type = STACK_TYPE_EXCEPTION + k;
136 irq_stack = irq_stack - irq_stack_size; 67 info->begin = begin;
68 info->end = end;
69 info->next_sp = (unsigned long *)regs->sp;
137 70
138 if (in_irq_stack(stack, irq_stack, *stack_end)) 71 return true;
139 return STACK_IS_IRQ; 72 }
140 73
141 return STACK_IS_UNKNOWN; 74 return false;
142} 75}
143 76
144/* 77static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
145 * x86-64 can have up to three kernel stacks:
146 * process stack
147 * interrupt stack
148 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
149 */
150
151void dump_trace(struct task_struct *task, struct pt_regs *regs,
152 unsigned long *stack, unsigned long bp,
153 const struct stacktrace_ops *ops, void *data)
154{ 78{
155 const unsigned cpu = get_cpu(); 79 unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
156 unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); 80 unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
157 unsigned long dummy;
158 unsigned used = 0;
159 int graph = 0;
160 int done = 0;
161
162 if (!task)
163 task = current;
164
165 if (!stack) {
166 if (regs)
167 stack = (unsigned long *)regs->sp;
168 else if (task != current)
169 stack = (unsigned long *)task->thread.sp;
170 else
171 stack = &dummy;
172 }
173 81
174 if (!bp)
175 bp = stack_frame(task, regs);
176 /* 82 /*
177 * Print function call entries in all stacks, starting at the 83 * This is a software stack, so 'end' can be a valid stack pointer.
178 * current stack address. If the stacks consist of nested 84 * It just means the stack is empty.
179 * exceptions
180 */ 85 */
181 while (!done) { 86 if (stack < begin || stack > end)
182 unsigned long *stack_end; 87 return false;
183 enum stack_type stype;
184 char *id;
185 88
186 stype = analyze_stack(cpu, task, stack, &stack_end, 89 info->type = STACK_TYPE_IRQ;
187 irq_stack, &used, &id); 90 info->begin = begin;
91 info->end = end;
188 92
189 /* Default finish unless specified to continue */ 93 /*
190 done = 1; 94 * The next stack pointer is the first thing pushed by the entry code
95 * after switching to the irq stack.
96 */
97 info->next_sp = (unsigned long *)*(end - 1);
191 98
192 switch (stype) { 99 return true;
100}
193 101
194 /* Break out early if we are on the thread stack */ 102int get_stack_info(unsigned long *stack, struct task_struct *task,
195 case STACK_IS_NORMAL: 103 struct stack_info *info, unsigned long *visit_mask)
196 break; 104{
105 if (!stack)
106 goto unknown;
197 107
198 case STACK_IS_EXCEPTION: 108 task = task ? : current;
199 109
200 if (ops->stack(data, id) < 0) 110 if (in_task_stack(stack, task, info))
201 break; 111 goto recursion_check;
202 112
203 bp = ops->walk_stack(task, stack, bp, ops, 113 if (task != current)
204 data, stack_end, &graph); 114 goto unknown;
205 ops->stack(data, "<EOE>");
206 /*
207 * We link to the next stack via the
208 * second-to-last pointer (index -2 to end) in the
209 * exception stack:
210 */
211 stack = (unsigned long *) stack_end[-2];
212 done = 0;
213 break;
214 115
215 case STACK_IS_IRQ: 116 if (in_exception_stack(stack, info))
117 goto recursion_check;
216 118
217 if (ops->stack(data, "IRQ") < 0) 119 if (in_irq_stack(stack, info))
218 break; 120 goto recursion_check;
219 bp = ops->walk_stack(task, stack, bp,
220 ops, data, stack_end, &graph);
221 /*
222 * We link to the next stack (which would be
223 * the process stack normally) the last
224 * pointer (index -1 to end) in the IRQ stack:
225 */
226 stack = (unsigned long *) (stack_end[-1]);
227 irq_stack = NULL;
228 ops->stack(data, "EOI");
229 done = 0;
230 break;
231 121
232 case STACK_IS_UNKNOWN: 122 goto unknown;
233 ops->stack(data, "UNK");
234 break;
235 }
236 }
237 123
124recursion_check:
238 /* 125 /*
239 * This handles the process stack: 126 * Make sure we don't iterate through any given stack more than once.
127 * If it comes up a second time then there's something wrong going on:
128 * just break out and report an unknown stack type.
240 */ 129 */
241 bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); 130 if (visit_mask) {
242 put_cpu(); 131 if (*visit_mask & (1UL << info->type))
132 goto unknown;
133 *visit_mask |= 1UL << info->type;
134 }
135
136 return 0;
137
138unknown:
139 info->type = STACK_TYPE_UNKNOWN;
140 return -EINVAL;
243} 141}
244EXPORT_SYMBOL(dump_trace);
245 142
246void 143void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
247show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 144 unsigned long *sp, char *log_lvl)
248 unsigned long *sp, unsigned long bp, char *log_lvl)
249{ 145{
250 unsigned long *irq_stack_end; 146 unsigned long *irq_stack_end;
251 unsigned long *irq_stack; 147 unsigned long *irq_stack;
252 unsigned long *stack; 148 unsigned long *stack;
253 int cpu;
254 int i; 149 int i;
255 150
256 preempt_disable(); 151 if (!try_get_task_stack(task))
257 cpu = smp_processor_id(); 152 return;
258 153
259 irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); 154 irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
260 irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); 155 irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
261 156
262 /* 157 sp = sp ? : get_stack_pointer(task, regs);
263 * Debugging aid: "show_stack(NULL, NULL);" prints the
264 * back trace for this cpu:
265 */
266 if (sp == NULL) {
267 if (regs)
268 sp = (unsigned long *)regs->sp;
269 else if (task)
270 sp = (unsigned long *)task->thread.sp;
271 else
272 sp = (unsigned long *)&sp;
273 }
274 158
275 stack = sp; 159 stack = sp;
276 for (i = 0; i < kstack_depth_to_print; i++) { 160 for (i = 0; i < kstack_depth_to_print; i++) {
@@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
299 stack++; 183 stack++;
300 touch_nmi_watchdog(); 184 touch_nmi_watchdog();
301 } 185 }
302 preempt_enable();
303 186
304 pr_cont("\n"); 187 pr_cont("\n");
305 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 188 show_trace_log_lvl(task, regs, sp, log_lvl);
189
190 put_task_stack(task);
306} 191}
307 192
308void show_regs(struct pt_regs *regs) 193void show_regs(struct pt_regs *regs)
309{ 194{
310 int i; 195 int i;
311 unsigned long sp;
312 196
313 sp = regs->sp;
314 show_regs_print_info(KERN_DEFAULT); 197 show_regs_print_info(KERN_DEFAULT);
315 __show_regs(regs, 1); 198 __show_regs(regs, 1);
316 199
@@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs)
325 u8 *ip; 208 u8 *ip;
326 209
327 printk(KERN_DEFAULT "Stack:\n"); 210 printk(KERN_DEFAULT "Stack:\n");
328 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 211 show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
329 0, KERN_DEFAULT);
330 212
331 printk(KERN_DEFAULT "Code: "); 213 printk(KERN_DEFAULT "Code: ");
332 214
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 93982aebb398..2f2b8c7ccb85 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void)
317 on_boot_cpu = 0; 317 on_boot_cpu = 0;
318 318
319 WARN_ON_FPU(current->thread.fpu.fpstate_active); 319 WARN_ON_FPU(current->thread.fpu.fpstate_active);
320 current_thread_info()->status = 0;
321 320
322 if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) 321 if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
323 eagerfpu = ENABLE; 322 eagerfpu = ENABLE;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d036cfb4495d..8639bb2ae058 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
1029 } 1029 }
1030 1030
1031 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 1031 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
1032 frame_pointer) == -EBUSY) { 1032 frame_pointer, parent) == -EBUSY) {
1033 *parent = old; 1033 *parent = old;
1034 return; 1034 return;
1035 } 1035 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6f8902b0d151..5f401262f12d 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
94 */ 94 */
95__HEAD 95__HEAD
96ENTRY(startup_32) 96ENTRY(startup_32)
97 movl pa(stack_start),%ecx 97 movl pa(initial_stack),%ecx
98 98
99 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 99 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
100 us to not reload segments */ 100 us to not reload segments */
@@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4
286 * start_secondary(). 286 * start_secondary().
287 */ 287 */
288ENTRY(start_cpu0) 288ENTRY(start_cpu0)
289 movl stack_start, %ecx 289 movl initial_stack, %ecx
290 movl %ecx, %esp 290 movl %ecx, %esp
291 jmp *(initial_code) 291 jmp *(initial_code)
292ENDPROC(start_cpu0) 292ENDPROC(start_cpu0)
@@ -307,7 +307,7 @@ ENTRY(startup_32_smp)
307 movl %eax,%es 307 movl %eax,%es
308 movl %eax,%fs 308 movl %eax,%fs
309 movl %eax,%gs 309 movl %eax,%gs
310 movl pa(stack_start),%ecx 310 movl pa(initial_stack),%ecx
311 movl %eax,%ss 311 movl %eax,%ss
312 leal -__PAGE_OFFSET(%ecx),%esp 312 leal -__PAGE_OFFSET(%ecx),%esp
313 313
@@ -703,7 +703,7 @@ ENTRY(initial_page_table)
703 703
704.data 704.data
705.balign 4 705.balign 4
706ENTRY(stack_start) 706ENTRY(initial_stack)
707 .long init_thread_union+THREAD_SIZE 707 .long init_thread_union+THREAD_SIZE
708 708
709__INITRODATA 709__INITRODATA
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 9f8efc9f0075..c98a559c346e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,7 +66,7 @@ startup_64:
66 */ 66 */
67 67
68 /* 68 /*
69 * Setup stack for verify_cpu(). "-8" because stack_start is defined 69 * Setup stack for verify_cpu(). "-8" because initial_stack is defined
70 * this way, see below. Our best guess is a NULL ptr for stack 70 * this way, see below. Our best guess is a NULL ptr for stack
71 * termination heuristics and we don't want to break anything which 71 * termination heuristics and we don't want to break anything which
72 * might depend on it (kgdb, ...). 72 * might depend on it (kgdb, ...).
@@ -226,7 +226,7 @@ ENTRY(secondary_startup_64)
226 movq %rax, %cr0 226 movq %rax, %cr0
227 227
228 /* Setup a boot time stack */ 228 /* Setup a boot time stack */
229 movq stack_start(%rip), %rsp 229 movq initial_stack(%rip), %rsp
230 230
231 /* zero EFLAGS after setting rsp */ 231 /* zero EFLAGS after setting rsp */
232 pushq $0 232 pushq $0
@@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64)
310 * start_secondary(). 310 * start_secondary().
311 */ 311 */
312ENTRY(start_cpu0) 312ENTRY(start_cpu0)
313 movq stack_start(%rip),%rsp 313 movq initial_stack(%rip),%rsp
314 movq initial_code(%rip),%rax 314 movq initial_code(%rip),%rax
315 pushq $0 # fake return address to stop unwinder 315 pushq $0 # fake return address to stop unwinder
316 pushq $__KERNEL_CS # set correct cs 316 pushq $__KERNEL_CS # set correct cs
@@ -319,17 +319,15 @@ ENTRY(start_cpu0)
319ENDPROC(start_cpu0) 319ENDPROC(start_cpu0)
320#endif 320#endif
321 321
322 /* SMP bootup changes these two */ 322 /* Both SMP bootup and ACPI suspend change these variables */
323 __REFDATA 323 __REFDATA
324 .balign 8 324 .balign 8
325 GLOBAL(initial_code) 325 GLOBAL(initial_code)
326 .quad x86_64_start_kernel 326 .quad x86_64_start_kernel
327 GLOBAL(initial_gs) 327 GLOBAL(initial_gs)
328 .quad INIT_PER_CPU_VAR(irq_stack_union) 328 .quad INIT_PER_CPU_VAR(irq_stack_union)
329 329 GLOBAL(initial_stack)
330 GLOBAL(stack_start)
331 .quad init_thread_union+THREAD_SIZE-8 330 .quad init_thread_union+THREAD_SIZE-8
332 .word 0
333 __FINITDATA 331 __FINITDATA
334 332
335bad_address: 333bad_address:
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4a7903714065..9ebd0b0e73d9 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
40 if (user_mode(regs)) 40 if (user_mode(regs))
41 return; 41 return;
42 42
43 if (regs->sp >= curbase + sizeof(struct thread_info) + 43 if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
44 sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
45 regs->sp <= curbase + THREAD_SIZE) 44 regs->sp <= curbase + THREAD_SIZE)
46 return; 45 return;
47 46
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 04cde527d728..8e36f249646e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
50#include <asm/apicdef.h> 50#include <asm/apicdef.h>
51#include <asm/apic.h> 51#include <asm/apic.h>
52#include <asm/nmi.h> 52#include <asm/nmi.h>
53#include <asm/switch_to.h>
53 54
54struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = 55struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
55{ 56{
@@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
166 gdb_regs[GDB_DX] = 0; 167 gdb_regs[GDB_DX] = 0;
167 gdb_regs[GDB_SI] = 0; 168 gdb_regs[GDB_SI] = 0;
168 gdb_regs[GDB_DI] = 0; 169 gdb_regs[GDB_DI] = 0;
169 gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; 170 gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp;
170#ifdef CONFIG_X86_32 171#ifdef CONFIG_X86_32
171 gdb_regs[GDB_DS] = __KERNEL_DS; 172 gdb_regs[GDB_DS] = __KERNEL_DS;
172 gdb_regs[GDB_ES] = __KERNEL_DS; 173 gdb_regs[GDB_ES] = __KERNEL_DS;
173 gdb_regs[GDB_PS] = 0; 174 gdb_regs[GDB_PS] = 0;
174 gdb_regs[GDB_CS] = __KERNEL_CS; 175 gdb_regs[GDB_CS] = __KERNEL_CS;
175 gdb_regs[GDB_PC] = p->thread.ip;
176 gdb_regs[GDB_SS] = __KERNEL_DS; 176 gdb_regs[GDB_SS] = __KERNEL_DS;
177 gdb_regs[GDB_FS] = 0xFFFF; 177 gdb_regs[GDB_FS] = 0xFFFF;
178 gdb_regs[GDB_GS] = 0xFFFF; 178 gdb_regs[GDB_GS] = 0xFFFF;
179#else 179#else
180 gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); 180 gdb_regs32[GDB_PS] = 0;
181 gdb_regs32[GDB_CS] = __KERNEL_CS; 181 gdb_regs32[GDB_CS] = __KERNEL_CS;
182 gdb_regs32[GDB_SS] = __KERNEL_DS; 182 gdb_regs32[GDB_SS] = __KERNEL_DS;
183 gdb_regs[GDB_PC] = 0;
184 gdb_regs[GDB_R8] = 0; 183 gdb_regs[GDB_R8] = 0;
185 gdb_regs[GDB_R9] = 0; 184 gdb_regs[GDB_R9] = 0;
186 gdb_regs[GDB_R10] = 0; 185 gdb_regs[GDB_R10] = 0;
@@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
190 gdb_regs[GDB_R14] = 0; 189 gdb_regs[GDB_R14] = 0;
191 gdb_regs[GDB_R15] = 0; 190 gdb_regs[GDB_R15] = 0;
192#endif 191#endif
192 gdb_regs[GDB_PC] = 0;
193 gdb_regs[GDB_SP] = p->thread.sp; 193 gdb_regs[GDB_SP] = p->thread.sp;
194} 194}
195 195
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index c2bedaea11f7..4afc67f5facc 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -184,7 +184,7 @@ out:
184 184
185static struct kobj_attribute type_attr = __ATTR_RO(type); 185static struct kobj_attribute type_attr = __ATTR_RO(type);
186 186
187static struct bin_attribute data_attr = { 187static struct bin_attribute data_attr __ro_after_init = {
188 .attr = { 188 .attr = {
189 .name = "data", 189 .name = "data",
190 .mode = S_IRUGO, 190 .mode = S_IRUGO,
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 3692249a70f1..60b9949f1e65 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,7 +29,7 @@
29#include <asm/x86_init.h> 29#include <asm/x86_init.h>
30#include <asm/reboot.h> 30#include <asm/reboot.h>
31 31
32static int kvmclock = 1; 32static int kvmclock __ro_after_init = 1;
33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
35static cycle_t kvm_sched_clock_offset; 35static cycle_t kvm_sched_clock_offset;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1acfd76e3e26..bbf3d5933eaa 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
332 .read_cr0 = native_read_cr0, 332 .read_cr0 = native_read_cr0,
333 .write_cr0 = native_write_cr0, 333 .write_cr0 = native_write_cr0,
334 .read_cr4 = native_read_cr4, 334 .read_cr4 = native_read_cr4,
335 .read_cr4_safe = native_read_cr4_safe,
336 .write_cr4 = native_write_cr4, 335 .write_cr4 = native_write_cr4,
337#ifdef CONFIG_X86_64 336#ifdef CONFIG_X86_64
338 .read_cr8 = native_read_cr8, 337 .read_cr8 = native_read_cr8,
@@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt);
389#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) 388#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
390#endif 389#endif
391 390
392struct pv_mmu_ops pv_mmu_ops = { 391struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
393 392
394 .read_cr2 = native_read_cr2, 393 .read_cr2 = native_read_cr2,
395 .write_cr2 = native_write_cr2, 394 .write_cr2 = native_write_cr2,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0ea2ce4..4002b475171c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/mce.h> 33#include <asm/mce.h>
34#include <asm/vm86.h> 34#include <asm/vm86.h>
35#include <asm/switch_to.h>
35 36
36/* 37/*
37 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 38 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -513,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
513} 514}
514 515
515/* 516/*
517 * Return saved PC of a blocked thread.
518 * What is this good for? it will be always the scheduler or ret_from_fork.
519 */
520unsigned long thread_saved_pc(struct task_struct *tsk)
521{
522 struct inactive_task_frame *frame =
523 (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
524 return READ_ONCE_NOCHECK(frame->ret_addr);
525}
526
527/*
516 * Called from fs/proc with a reference on @p to find the function 528 * Called from fs/proc with a reference on @p to find the function
517 * which called into schedule(). This needs to be done carefully 529 * which called into schedule(). This needs to be done carefully
518 * because the task might wake up and we might look at a stack 530 * because the task might wake up and we might look at a stack
@@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
520 */ 532 */
521unsigned long get_wchan(struct task_struct *p) 533unsigned long get_wchan(struct task_struct *p)
522{ 534{
523 unsigned long start, bottom, top, sp, fp, ip; 535 unsigned long start, bottom, top, sp, fp, ip, ret = 0;
524 int count = 0; 536 int count = 0;
525 537
526 if (!p || p == current || p->state == TASK_RUNNING) 538 if (!p || p == current || p->state == TASK_RUNNING)
527 return 0; 539 return 0;
528 540
541 if (!try_get_task_stack(p))
542 return 0;
543
529 start = (unsigned long)task_stack_page(p); 544 start = (unsigned long)task_stack_page(p);
530 if (!start) 545 if (!start)
531 return 0; 546 goto out;
532 547
533 /* 548 /*
534 * Layout of the stack page: 549 * Layout of the stack page:
@@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p)
537 * PADDING 552 * PADDING
538 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING 553 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
539 * stack 554 * stack
540 * ----------- bottom = start + sizeof(thread_info) 555 * ----------- bottom = start
541 * thread_info
542 * ----------- start
543 * 556 *
544 * The tasks stack pointer points at the location where the 557 * The tasks stack pointer points at the location where the
545 * framepointer is stored. The data on the stack is: 558 * framepointer is stored. The data on the stack is:
@@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p)
550 */ 563 */
551 top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; 564 top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
552 top -= 2 * sizeof(unsigned long); 565 top -= 2 * sizeof(unsigned long);
553 bottom = start + sizeof(struct thread_info); 566 bottom = start;
554 567
555 sp = READ_ONCE(p->thread.sp); 568 sp = READ_ONCE(p->thread.sp);
556 if (sp < bottom || sp > top) 569 if (sp < bottom || sp > top)
557 return 0; 570 goto out;
558 571
559 fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); 572 fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
560 do { 573 do {
561 if (fp < bottom || fp > top) 574 if (fp < bottom || fp > top)
562 return 0; 575 goto out;
563 ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); 576 ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
564 if (!in_sched_functions(ip)) 577 if (!in_sched_functions(ip)) {
565 return ip; 578 ret = ip;
579 goto out;
580 }
566 fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); 581 fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
567 } while (count++ < 16 && p->state != TASK_RUNNING); 582 } while (count++ < 16 && p->state != TASK_RUNNING);
568 return 0; 583
584out:
585 put_task_stack(p);
586 return ret;
569} 587}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29c38c7..bd7be8efdc4c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,17 +55,6 @@
55#include <asm/switch_to.h> 55#include <asm/switch_to.h>
56#include <asm/vm86.h> 56#include <asm/vm86.h>
57 57
58asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
59asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
60
61/*
62 * Return saved PC of a blocked thread.
63 */
64unsigned long thread_saved_pc(struct task_struct *tsk)
65{
66 return ((unsigned long *)tsk->thread.sp)[3];
67}
68
69void __show_regs(struct pt_regs *regs, int all) 58void __show_regs(struct pt_regs *regs, int all)
70{ 59{
71 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 60 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all)
101 cr0 = read_cr0(); 90 cr0 = read_cr0();
102 cr2 = read_cr2(); 91 cr2 = read_cr2();
103 cr3 = read_cr3(); 92 cr3 = read_cr3();
104 cr4 = __read_cr4_safe(); 93 cr4 = __read_cr4();
105 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 94 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
106 cr0, cr2, cr3, cr4); 95 cr0, cr2, cr3, cr4);
107 96
@@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
133 unsigned long arg, struct task_struct *p, unsigned long tls) 122 unsigned long arg, struct task_struct *p, unsigned long tls)
134{ 123{
135 struct pt_regs *childregs = task_pt_regs(p); 124 struct pt_regs *childregs = task_pt_regs(p);
125 struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
126 struct inactive_task_frame *frame = &fork_frame->frame;
136 struct task_struct *tsk; 127 struct task_struct *tsk;
137 int err; 128 int err;
138 129
139 p->thread.sp = (unsigned long) childregs; 130 frame->bp = 0;
131 frame->ret_addr = (unsigned long) ret_from_fork;
132 p->thread.sp = (unsigned long) fork_frame;
140 p->thread.sp0 = (unsigned long) (childregs+1); 133 p->thread.sp0 = (unsigned long) (childregs+1);
141 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 134 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
142 135
143 if (unlikely(p->flags & PF_KTHREAD)) { 136 if (unlikely(p->flags & PF_KTHREAD)) {
144 /* kernel thread */ 137 /* kernel thread */
145 memset(childregs, 0, sizeof(struct pt_regs)); 138 memset(childregs, 0, sizeof(struct pt_regs));
146 p->thread.ip = (unsigned long) ret_from_kernel_thread; 139 frame->bx = sp; /* function */
147 task_user_gs(p) = __KERNEL_STACK_CANARY; 140 frame->di = arg;
148 childregs->ds = __USER_DS;
149 childregs->es = __USER_DS;
150 childregs->fs = __KERNEL_PERCPU;
151 childregs->bx = sp; /* function */
152 childregs->bp = arg;
153 childregs->orig_ax = -1;
154 childregs->cs = __KERNEL_CS | get_kernel_rpl();
155 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
156 p->thread.io_bitmap_ptr = NULL; 141 p->thread.io_bitmap_ptr = NULL;
157 return 0; 142 return 0;
158 } 143 }
144 frame->bx = 0;
159 *childregs = *current_pt_regs(); 145 *childregs = *current_pt_regs();
160 childregs->ax = 0; 146 childregs->ax = 0;
161 if (sp) 147 if (sp)
162 childregs->sp = sp; 148 childregs->sp = sp;
163 149
164 p->thread.ip = (unsigned long) ret_from_fork;
165 task_user_gs(p) = get_user_gs(current_pt_regs()); 150 task_user_gs(p) = get_user_gs(current_pt_regs());
166 151
167 p->thread.io_bitmap_ptr = NULL; 152 p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8f84bf..de9acaf2d371 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -50,8 +50,6 @@
50#include <asm/switch_to.h> 50#include <asm/switch_to.h>
51#include <asm/xen/hypervisor.h> 51#include <asm/xen/hypervisor.h>
52 52
53asmlinkage extern void ret_from_fork(void);
54
55__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); 53__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
56 54
57/* Prints also some state that isn't saved in the pt_regs */ 55/* Prints also some state that isn't saved in the pt_regs */
@@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
141{ 139{
142 int err; 140 int err;
143 struct pt_regs *childregs; 141 struct pt_regs *childregs;
142 struct fork_frame *fork_frame;
143 struct inactive_task_frame *frame;
144 struct task_struct *me = current; 144 struct task_struct *me = current;
145 145
146 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; 146 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
147 childregs = task_pt_regs(p); 147 childregs = task_pt_regs(p);
148 p->thread.sp = (unsigned long) childregs; 148 fork_frame = container_of(childregs, struct fork_frame, regs);
149 set_tsk_thread_flag(p, TIF_FORK); 149 frame = &fork_frame->frame;
150 frame->bp = 0;
151 frame->ret_addr = (unsigned long) ret_from_fork;
152 p->thread.sp = (unsigned long) fork_frame;
150 p->thread.io_bitmap_ptr = NULL; 153 p->thread.io_bitmap_ptr = NULL;
151 154
152 savesegment(gs, p->thread.gsindex); 155 savesegment(gs, p->thread.gsindex);
@@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
160 if (unlikely(p->flags & PF_KTHREAD)) { 163 if (unlikely(p->flags & PF_KTHREAD)) {
161 /* kernel thread */ 164 /* kernel thread */
162 memset(childregs, 0, sizeof(struct pt_regs)); 165 memset(childregs, 0, sizeof(struct pt_regs));
163 childregs->sp = (unsigned long)childregs; 166 frame->bx = sp; /* function */
164 childregs->ss = __KERNEL_DS; 167 frame->r12 = arg;
165 childregs->bx = sp; /* function */
166 childregs->bp = arg;
167 childregs->orig_ax = -1;
168 childregs->cs = __KERNEL_CS | get_kernel_rpl();
169 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
170 return 0; 168 return 0;
171 } 169 }
170 frame->bx = 0;
172 *childregs = *current_pt_regs(); 171 *childregs = *current_pt_regs();
173 172
174 childregs->ax = 0; 173 childregs->ax = 0;
@@ -511,7 +510,7 @@ void set_personality_ia32(bool x32)
511 current->personality &= ~READ_IMPLIES_EXEC; 510 current->personality &= ~READ_IMPLIES_EXEC;
512 /* in_compat_syscall() uses the presence of the x32 511 /* in_compat_syscall() uses the presence of the x32
513 syscall bit flag to determine compat status */ 512 syscall bit flag to determine compat status */
514 current_thread_info()->status &= ~TS_COMPAT; 513 current->thread.status &= ~TS_COMPAT;
515 } else { 514 } else {
516 set_thread_flag(TIF_IA32); 515 set_thread_flag(TIF_IA32);
517 clear_thread_flag(TIF_X32); 516 clear_thread_flag(TIF_X32);
@@ -519,7 +518,7 @@ void set_personality_ia32(bool x32)
519 current->mm->context.ia32_compat = TIF_IA32; 518 current->mm->context.ia32_compat = TIF_IA32;
520 current->personality |= force_personality32; 519 current->personality |= force_personality32;
521 /* Prepare the first "return" to user space */ 520 /* Prepare the first "return" to user space */
522 current_thread_info()->status |= TS_COMPAT; 521 current->thread.status |= TS_COMPAT;
523 } 522 }
524} 523}
525EXPORT_SYMBOL_GPL(set_personality_ia32); 524EXPORT_SYMBOL_GPL(set_personality_ia32);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a541ff..ce94c38cf4d6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
173 return sp; 173 return sp;
174 174
175 prev_esp = (u32 *)(context); 175 prev_esp = (u32 *)(context);
176 if (prev_esp) 176 if (*prev_esp)
177 return (unsigned long)prev_esp; 177 return (unsigned long)*prev_esp;
178 178
179 return (unsigned long)regs; 179 return (unsigned long)regs;
180} 180}
@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
934 */ 934 */
935 regs->orig_ax = value; 935 regs->orig_ax = value;
936 if (syscall_get_nr(child, regs) >= 0) 936 if (syscall_get_nr(child, regs) >= 0)
937 task_thread_info(child)->status |= TS_I386_REGS_POKED; 937 child->thread.status |= TS_I386_REGS_POKED;
938 break; 938 break;
939 939
940 case offsetof(struct user32, regs.eflags): 940 case offsetof(struct user32, regs.eflags):
@@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1250 1250
1251#ifdef CONFIG_X86_64 1251#ifdef CONFIG_X86_64
1252 1252
1253static struct user_regset x86_64_regsets[] __read_mostly = { 1253static struct user_regset x86_64_regsets[] __ro_after_init = {
1254 [REGSET_GENERAL] = { 1254 [REGSET_GENERAL] = {
1255 .core_note_type = NT_PRSTATUS, 1255 .core_note_type = NT_PRSTATUS,
1256 .n = sizeof(struct user_regs_struct) / sizeof(long), 1256 .n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = {
1291#endif /* CONFIG_X86_64 */ 1291#endif /* CONFIG_X86_64 */
1292 1292
1293#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1293#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1294static struct user_regset x86_32_regsets[] __read_mostly = { 1294static struct user_regset x86_32_regsets[] __ro_after_init = {
1295 [REGSET_GENERAL] = { 1295 [REGSET_GENERAL] = {
1296 .core_note_type = NT_PRSTATUS, 1296 .core_note_type = NT_PRSTATUS,
1297 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1297 .n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = {
1344 */ 1344 */
1345u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 1345u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1346 1346
1347void update_regset_xstate_info(unsigned int size, u64 xstate_mask) 1347void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1348{ 1348{
1349#ifdef CONFIG_X86_64 1349#ifdef CONFIG_X86_64
1350 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); 1350 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 63bf27d972b7..e244c19a2451 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -705,7 +705,7 @@ static void native_machine_power_off(void)
705 tboot_shutdown(TB_SHUTDOWN_HALT); 705 tboot_shutdown(TB_SHUTDOWN_HALT);
706} 706}
707 707
708struct machine_ops machine_ops = { 708struct machine_ops machine_ops __ro_after_init = {
709 .power_off = native_machine_power_off, 709 .power_off = native_machine_power_off,
710 .shutdown = native_machine_shutdown, 710 .shutdown = native_machine_shutdown,
711 .emergency_restart = native_machine_emergency_restart, 711 .emergency_restart = native_machine_emergency_restart,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2c4bc85dfe90..eeb094ea794a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data);
210 210
211 211
212#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) 212#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
213__visible unsigned long mmu_cr4_features; 213__visible unsigned long mmu_cr4_features __ro_after_init;
214#else 214#else
215__visible unsigned long mmu_cr4_features = X86_CR4_PAE; 215__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
216#endif 216#endif
217 217
218/* Boot loader ID and version as integers, for the benefit of proc_dointvec */ 218/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p)
1137 * auditing all the early-boot CR4 manipulation would be needed to 1137 * auditing all the early-boot CR4 manipulation would be needed to
1138 * rule it out. 1138 * rule it out.
1139 */ 1139 */
1140 mmu_cr4_features = __read_cr4_safe(); 1140 mmu_cr4_features = __read_cr4();
1141 1141
1142 memblock_set_current_limit(get_max_mapped()); 1142 memblock_set_current_limit(get_max_mapped());
1143 1143
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7a40e068302d..2bbd27f89802 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
33DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; 33DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
34EXPORT_PER_CPU_SYMBOL(this_cpu_off); 34EXPORT_PER_CPU_SYMBOL(this_cpu_off);
35 35
36unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { 36unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
37 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, 37 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
38}; 38};
39EXPORT_SYMBOL(__per_cpu_offset); 39EXPORT_SYMBOL(__per_cpu_offset);
@@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void)
246#ifdef CONFIG_X86_64 246#ifdef CONFIG_X86_64
247 per_cpu(irq_stack_ptr, cpu) = 247 per_cpu(irq_stack_ptr, cpu) =
248 per_cpu(irq_stack_union.irq_stack, cpu) + 248 per_cpu(irq_stack_union.irq_stack, cpu) +
249 IRQ_STACK_SIZE - 64; 249 IRQ_STACK_SIZE;
250#endif 250#endif
251#ifdef CONFIG_NUMA 251#ifdef CONFIG_NUMA
252 per_cpu(x86_cpu_to_node_map, cpu) = 252 per_cpu(x86_cpu_to_node_map, cpu) =
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 04cb3212db2d..da20ecb5397a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
783 * than the tracee. 783 * than the tracee.
784 */ 784 */
785#ifdef CONFIG_IA32_EMULATION 785#ifdef CONFIG_IA32_EMULATION
786 if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) 786 if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
787 return __NR_ia32_restart_syscall; 787 return __NR_ia32_restart_syscall;
788#endif 788#endif
789#ifdef CONFIG_X86_X32_ABI 789#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 54e2f1a968a4..7249dcf2cbcb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -943,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
943 per_cpu(cpu_current_top_of_stack, cpu) = 943 per_cpu(cpu_current_top_of_stack, cpu) =
944 (unsigned long)task_stack_page(idle) + THREAD_SIZE; 944 (unsigned long)task_stack_page(idle) + THREAD_SIZE;
945#else 945#else
946 clear_tsk_thread_flag(idle, TIF_FORK);
947 initial_gs = per_cpu_offset(cpu); 946 initial_gs = per_cpu_offset(cpu);
948#endif 947#endif
949} 948}
@@ -970,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
970 969
971 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 970 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
972 initial_code = (unsigned long)start_secondary; 971 initial_code = (unsigned long)start_secondary;
973 stack_start = idle->thread.sp; 972 initial_stack = idle->thread.sp;
974 973
975 /* 974 /*
976 * Enable the espfix hack for this CPU 975 * Enable the espfix hack for this CPU
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4738f5e0f2ab..0653788026e2 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -8,80 +8,69 @@
8#include <linux/export.h> 8#include <linux/export.h>
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
11#include <asm/unwind.h>
11 12
12static int save_stack_stack(void *data, char *name) 13static int save_stack_address(struct stack_trace *trace, unsigned long addr,
14 bool nosched)
13{ 15{
14 return 0;
15}
16
17static int
18__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
19{
20 struct stack_trace *trace = data;
21#ifdef CONFIG_FRAME_POINTER
22 if (!reliable)
23 return 0;
24#endif
25 if (nosched && in_sched_functions(addr)) 16 if (nosched && in_sched_functions(addr))
26 return 0; 17 return 0;
18
27 if (trace->skip > 0) { 19 if (trace->skip > 0) {
28 trace->skip--; 20 trace->skip--;
29 return 0; 21 return 0;
30 } 22 }
31 if (trace->nr_entries < trace->max_entries) {
32 trace->entries[trace->nr_entries++] = addr;
33 return 0;
34 } else {
35 return -1; /* no more room, stop walking the stack */
36 }
37}
38 23
39static int save_stack_address(void *data, unsigned long addr, int reliable) 24 if (trace->nr_entries >= trace->max_entries)
40{ 25 return -1;
41 return __save_stack_address(data, addr, reliable, false); 26
27 trace->entries[trace->nr_entries++] = addr;
28 return 0;
42} 29}
43 30
44static int 31static void __save_stack_trace(struct stack_trace *trace,
45save_stack_address_nosched(void *data, unsigned long addr, int reliable) 32 struct task_struct *task, struct pt_regs *regs,
33 bool nosched)
46{ 34{
47 return __save_stack_address(data, addr, reliable, true); 35 struct unwind_state state;
48} 36 unsigned long addr;
49 37
50static const struct stacktrace_ops save_stack_ops = { 38 if (regs)
51 .stack = save_stack_stack, 39 save_stack_address(trace, regs->ip, nosched);
52 .address = save_stack_address,
53 .walk_stack = print_context_stack,
54};
55 40
56static const struct stacktrace_ops save_stack_ops_nosched = { 41 for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
57 .stack = save_stack_stack, 42 unwind_next_frame(&state)) {
58 .address = save_stack_address_nosched, 43 addr = unwind_get_return_address(&state);
59 .walk_stack = print_context_stack, 44 if (!addr || save_stack_address(trace, addr, nosched))
60}; 45 break;
46 }
47
48 if (trace->nr_entries < trace->max_entries)
49 trace->entries[trace->nr_entries++] = ULONG_MAX;
50}
61 51
62/* 52/*
63 * Save stack-backtrace addresses into a stack_trace buffer. 53 * Save stack-backtrace addresses into a stack_trace buffer.
64 */ 54 */
65void save_stack_trace(struct stack_trace *trace) 55void save_stack_trace(struct stack_trace *trace)
66{ 56{
67 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 57 __save_stack_trace(trace, current, NULL, false);
68 if (trace->nr_entries < trace->max_entries)
69 trace->entries[trace->nr_entries++] = ULONG_MAX;
70} 58}
71EXPORT_SYMBOL_GPL(save_stack_trace); 59EXPORT_SYMBOL_GPL(save_stack_trace);
72 60
73void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) 61void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
74{ 62{
75 dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); 63 __save_stack_trace(trace, current, regs, false);
76 if (trace->nr_entries < trace->max_entries)
77 trace->entries[trace->nr_entries++] = ULONG_MAX;
78} 64}
79 65
80void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 66void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81{ 67{
82 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 68 if (!try_get_task_stack(tsk))
83 if (trace->nr_entries < trace->max_entries) 69 return;
84 trace->entries[trace->nr_entries++] = ULONG_MAX; 70
71 __save_stack_trace(trace, tsk, NULL, true);
72
73 put_task_stack(tsk);
85} 74}
86EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 75EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87 76
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b70ca12dd389..bd4e3d4d3625 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
292DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 292DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
293DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 293DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
294 294
295#ifdef CONFIG_VMAP_STACK
296__visible void __noreturn handle_stack_overflow(const char *message,
297 struct pt_regs *regs,
298 unsigned long fault_address)
299{
300 printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
301 (void *)fault_address, current->stack,
302 (char *)current->stack + THREAD_SIZE - 1);
303 die(message, regs, 0);
304
305 /* Be absolutely certain we don't return. */
306 panic(message);
307}
308#endif
309
295#ifdef CONFIG_X86_64 310#ifdef CONFIG_X86_64
296/* Runs on IST stack */ 311/* Runs on IST stack */
297dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 312dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
298{ 313{
299 static const char str[] = "double fault"; 314 static const char str[] = "double fault";
300 struct task_struct *tsk = current; 315 struct task_struct *tsk = current;
316#ifdef CONFIG_VMAP_STACK
317 unsigned long cr2;
318#endif
301 319
302#ifdef CONFIG_X86_ESPFIX64 320#ifdef CONFIG_X86_ESPFIX64
303 extern unsigned char native_irq_return_iret[]; 321 extern unsigned char native_irq_return_iret[];
@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
332 tsk->thread.error_code = error_code; 350 tsk->thread.error_code = error_code;
333 tsk->thread.trap_nr = X86_TRAP_DF; 351 tsk->thread.trap_nr = X86_TRAP_DF;
334 352
353#ifdef CONFIG_VMAP_STACK
354 /*
355 * If we overflow the stack into a guard page, the CPU will fail
356 * to deliver #PF and will send #DF instead. Similarly, if we
357 * take any non-IST exception while too close to the bottom of
358 * the stack, the processor will get a page fault while
359 * delivering the exception and will generate a double fault.
360 *
361 * According to the SDM (footnote in 6.15 under "Interrupt 14 -
362 * Page-Fault Exception (#PF):
363 *
364 * Processors update CR2 whenever a page fault is detected. If a
365 * second page fault occurs while an earlier page fault is being
366 * deliv- ered, the faulting linear address of the second fault will
367 * overwrite the contents of CR2 (replacing the previous
368 * address). These updates to CR2 occur even if the page fault
369 * results in a double fault or occurs during the delivery of a
370 * double fault.
371 *
372 * The logic below has a small possibility of incorrectly diagnosing
373 * some errors as stack overflows. For example, if the IDT or GDT
374 * gets corrupted such that #GP delivery fails due to a bad descriptor
375 * causing #GP and we hit this condition while CR2 coincidentally
376 * points to the stack guard page, we'll think we overflowed the
377 * stack. Given that we're going to panic one way or another
378 * if this happens, this isn't necessarily worth fixing.
379 *
380 * If necessary, we could improve the test by only diagnosing
381 * a stack overflow if the saved RSP points within 47 bytes of
382 * the bottom of the stack: if RSP == tsk_stack + 48 and we
383 * take an exception, the stack is already aligned and there
384 * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
385 * possible error code, so a stack overflow would *not* double
386 * fault. With any less space left, exception delivery could
387 * fail, and, as a practical matter, we've overflowed the
388 * stack even if the actual trigger for the double fault was
389 * something else.
390 */
391 cr2 = read_cr2();
392 if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
393 handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
394#endif
395
335#ifdef CONFIG_DOUBLEFAULT 396#ifdef CONFIG_DOUBLEFAULT
336 df_debug(regs, error_code); 397 df_debug(regs, error_code);
337#endif 398#endif
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
new file mode 100644
index 000000000000..a2456d4d286a
--- /dev/null
+++ b/arch/x86/kernel/unwind_frame.c
@@ -0,0 +1,93 @@
1#include <linux/sched.h>
2#include <asm/ptrace.h>
3#include <asm/bitops.h>
4#include <asm/stacktrace.h>
5#include <asm/unwind.h>
6
7#define FRAME_HEADER_SIZE (sizeof(long) * 2)
8
9unsigned long unwind_get_return_address(struct unwind_state *state)
10{
11 unsigned long addr;
12 unsigned long *addr_p = unwind_get_return_address_ptr(state);
13
14 if (unwind_done(state))
15 return 0;
16
17 addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
18 addr_p);
19
20 return __kernel_text_address(addr) ? addr : 0;
21}
22EXPORT_SYMBOL_GPL(unwind_get_return_address);
23
24static bool update_stack_state(struct unwind_state *state, void *addr,
25 size_t len)
26{
27 struct stack_info *info = &state->stack_info;
28
29 /*
30 * If addr isn't on the current stack, switch to the next one.
31 *
32 * We may have to traverse multiple stacks to deal with the possibility
33 * that 'info->next_sp' could point to an empty stack and 'addr' could
34 * be on a subsequent stack.
35 */
36 while (!on_stack(info, addr, len))
37 if (get_stack_info(info->next_sp, state->task, info,
38 &state->stack_mask))
39 return false;
40
41 return true;
42}
43
44bool unwind_next_frame(struct unwind_state *state)
45{
46 unsigned long *next_bp;
47
48 if (unwind_done(state))
49 return false;
50
51 next_bp = (unsigned long *)*state->bp;
52
53 /* make sure the next frame's data is accessible */
54 if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
55 return false;
56
57 /* move to the next frame */
58 state->bp = next_bp;
59 return true;
60}
61EXPORT_SYMBOL_GPL(unwind_next_frame);
62
63void __unwind_start(struct unwind_state *state, struct task_struct *task,
64 struct pt_regs *regs, unsigned long *first_frame)
65{
66 memset(state, 0, sizeof(*state));
67 state->task = task;
68
69 /* don't even attempt to start from user mode regs */
70 if (regs && user_mode(regs)) {
71 state->stack_info.type = STACK_TYPE_UNKNOWN;
72 return;
73 }
74
75 /* set up the starting stack frame */
76 state->bp = get_frame_pointer(task, regs);
77
78 /* initialize stack info and make sure the frame data is accessible */
79 get_stack_info(state->bp, state->task, &state->stack_info,
80 &state->stack_mask);
81 update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
82
83 /*
84 * The caller can provide the address of the first frame directly
85 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
86 * to start unwinding at. Skip ahead until we reach it.
87 */
88 while (!unwind_done(state) &&
89 (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
90 state->bp < first_frame))
91 unwind_next_frame(state);
92}
93EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
new file mode 100644
index 000000000000..b5a834c93065
--- /dev/null
+++ b/arch/x86/kernel/unwind_guess.c
@@ -0,0 +1,43 @@
1#include <linux/sched.h>
2#include <linux/ftrace.h>
3#include <asm/ptrace.h>
4#include <asm/bitops.h>
5#include <asm/stacktrace.h>
6#include <asm/unwind.h>
7
8bool unwind_next_frame(struct unwind_state *state)
9{
10 struct stack_info *info = &state->stack_info;
11
12 if (unwind_done(state))
13 return false;
14
15 do {
16 for (state->sp++; state->sp < info->end; state->sp++)
17 if (__kernel_text_address(*state->sp))
18 return true;
19
20 state->sp = info->next_sp;
21
22 } while (!get_stack_info(state->sp, state->task, info,
23 &state->stack_mask));
24
25 return false;
26}
27EXPORT_SYMBOL_GPL(unwind_next_frame);
28
29void __unwind_start(struct unwind_state *state, struct task_struct *task,
30 struct pt_regs *regs, unsigned long *first_frame)
31{
32 memset(state, 0, sizeof(*state));
33
34 state->task = task;
35 state->sp = first_frame;
36
37 get_stack_info(first_frame, state->task, &state->stack_info,
38 &state->stack_mask);
39
40 if (!__kernel_text_address(*first_frame))
41 unwind_next_frame(state);
42}
43EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 76c5e52436c4..0bd9f1287f39 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
91static void default_nmi_init(void) { }; 91static void default_nmi_init(void) { };
92static int default_i8042_detect(void) { return 1; }; 92static int default_i8042_detect(void) { return 1; };
93 93
94struct x86_platform_ops x86_platform = { 94struct x86_platform_ops x86_platform __ro_after_init = {
95 .calibrate_cpu = native_calibrate_cpu, 95 .calibrate_cpu = native_calibrate_cpu,
96 .calibrate_tsc = native_calibrate_tsc, 96 .calibrate_tsc = native_calibrate_tsc,
97 .get_wallclock = mach_get_cmos_time, 97 .get_wallclock = mach_get_cmos_time,
@@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = {
108EXPORT_SYMBOL_GPL(x86_platform); 108EXPORT_SYMBOL_GPL(x86_platform);
109 109
110#if defined(CONFIG_PCI_MSI) 110#if defined(CONFIG_PCI_MSI)
111struct x86_msi_ops x86_msi = { 111struct x86_msi_ops x86_msi __ro_after_init = {
112 .setup_msi_irqs = native_setup_msi_irqs, 112 .setup_msi_irqs = native_setup_msi_irqs,
113 .teardown_msi_irq = native_teardown_msi_irq, 113 .teardown_msi_irq = native_teardown_msi_irq,
114 .teardown_msi_irqs = default_teardown_msi_irqs, 114 .teardown_msi_irqs = default_teardown_msi_irqs,
@@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
137} 137}
138#endif 138#endif
139 139
140struct x86_io_apic_ops x86_io_apic_ops = { 140struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
141 .read = native_io_apic_read, 141 .read = native_io_apic_read,
142 .disable = native_disable_io_apic, 142 .disable = native_disable_io_apic,
143}; 143};
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af523d84d102..1e6b84b96ea6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4961,7 +4961,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
4961 avic_handle_ldr_update(vcpu); 4961 avic_handle_ldr_update(vcpu);
4962} 4962}
4963 4963
4964static struct kvm_x86_ops svm_x86_ops = { 4964static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
4965 .cpu_has_kvm_support = has_svm, 4965 .cpu_has_kvm_support = has_svm,
4966 .disabled_by_bios = is_disabled, 4966 .disabled_by_bios = is_disabled,
4967 .hardware_setup = svm_hardware_setup, 4967 .hardware_setup = svm_hardware_setup,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5cede40e2552..121fdf6e9ed0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11177,7 +11177,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
11177 ~FEATURE_CONTROL_LMCE; 11177 ~FEATURE_CONTROL_LMCE;
11178} 11178}
11179 11179
11180static struct kvm_x86_ops vmx_x86_ops = { 11180static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
11181 .cpu_has_kvm_support = cpu_has_kvm_support, 11181 .cpu_has_kvm_support = cpu_has_kvm_support,
11182 .disabled_by_bios = vmx_disabled_by_bios, 11182 .disabled_by_bios = vmx_disabled_by_bios,
11183 .hardware_setup = hardware_setup, 11183 .hardware_setup = hardware_setup,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index dc8023060456..0b92fce3e6c0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code,
753 return; 753 return;
754 } 754 }
755 755
756#ifdef CONFIG_VMAP_STACK
757 /*
758 * Stack overflow? During boot, we can fault near the initial
759 * stack in the direct map, but that's not an overflow -- check
760 * that we're in vmalloc space to avoid this.
761 */
762 if (is_vmalloc_addr((void *)address) &&
763 (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
764 address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
765 register void *__sp asm("rsp");
766 unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
767 /*
768 * We're likely to be running with very little stack space
769 * left. It's plausible that we'd hit this condition but
770 * double-fault even before we get this far, in which case
771 * we're fine: the double-fault handler will deal with it.
772 *
773 * We don't want to make it all the way into the oops code
774 * and then double-fault, though, because we're likely to
775 * break the console driver and lose most of the stack dump.
776 */
777 asm volatile ("movq %[stack], %%rsp\n\t"
778 "call handle_stack_overflow\n\t"
779 "1: jmp 1b"
780 : "+r" (__sp)
781 : "D" ("kernel stack overflow (page fault)"),
782 "S" (regs), "d" (address),
783 [stack] "rm" (stack));
784 unreachable();
785 }
786#endif
787
756 /* 788 /*
757 * 32-bit: 789 * 32-bit:
758 * 790 *
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index bda8d5eef04d..ddd2661c4502 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -40,17 +40,26 @@
40 * You need to add an if/def entry if you introduce a new memory region 40 * You need to add an if/def entry if you introduce a new memory region
41 * compatible with KASLR. Your entry must be in logical order with memory 41 * compatible with KASLR. Your entry must be in logical order with memory
42 * layout. For example, ESPFIX is before EFI because its virtual address is 42 * layout. For example, ESPFIX is before EFI because its virtual address is
43 * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to 43 * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
44 * ensure that this order is correct and won't be changed. 44 * ensure that this order is correct and won't be changed.
45 */ 45 */
46static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; 46static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
47static const unsigned long vaddr_end = VMEMMAP_START; 47
48#if defined(CONFIG_X86_ESPFIX64)
49static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
50#elif defined(CONFIG_EFI)
51static const unsigned long vaddr_end = EFI_VA_START;
52#else
53static const unsigned long vaddr_end = __START_KERNEL_map;
54#endif
48 55
49/* Default values */ 56/* Default values */
50unsigned long page_offset_base = __PAGE_OFFSET_BASE; 57unsigned long page_offset_base = __PAGE_OFFSET_BASE;
51EXPORT_SYMBOL(page_offset_base); 58EXPORT_SYMBOL(page_offset_base);
52unsigned long vmalloc_base = __VMALLOC_BASE; 59unsigned long vmalloc_base = __VMALLOC_BASE;
53EXPORT_SYMBOL(vmalloc_base); 60EXPORT_SYMBOL(vmalloc_base);
61unsigned long vmemmap_base = __VMEMMAP_BASE;
62EXPORT_SYMBOL(vmemmap_base);
54 63
55/* 64/*
56 * Memory regions randomized by KASLR (except modules that use a separate logic 65 * Memory regions randomized by KASLR (except modules that use a separate logic
@@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region {
63} kaslr_regions[] = { 72} kaslr_regions[] = {
64 { &page_offset_base, 64/* Maximum */ }, 73 { &page_offset_base, 64/* Maximum */ },
65 { &vmalloc_base, VMALLOC_SIZE_TB }, 74 { &vmalloc_base, VMALLOC_SIZE_TB },
75 { &vmemmap_base, 1 },
66}; 76};
67 77
68/* Get size in bytes used by the memory region */ 78/* Get size in bytes used by the memory region */
@@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void)
89 struct rnd_state rand_state; 99 struct rnd_state rand_state;
90 unsigned long remain_entropy; 100 unsigned long remain_entropy;
91 101
102 /*
103 * All these BUILD_BUG_ON checks ensures the memory layout is
104 * consistent with the vaddr_start/vaddr_end variables.
105 */
106 BUILD_BUG_ON(vaddr_start >= vaddr_end);
107 BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
108 vaddr_end >= EFI_VA_START);
109 BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
110 config_enabled(CONFIG_EFI)) &&
111 vaddr_end >= __START_KERNEL_map);
112 BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
113
92 if (!kaslr_memory_enabled()) 114 if (!kaslr_memory_enabled())
93 return; 115 return;
94 116
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4dbe65622810..a7655f6caf7d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
77 unsigned cpu = smp_processor_id(); 77 unsigned cpu = smp_processor_id();
78 78
79 if (likely(prev != next)) { 79 if (likely(prev != next)) {
80 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
81 /*
82 * If our current stack is in vmalloc space and isn't
83 * mapped in the new pgd, we'll double-fault. Forcibly
84 * map it.
85 */
86 unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
87
88 pgd_t *pgd = next->pgd + stack_pgd_index;
89
90 if (unlikely(pgd_none(*pgd)))
91 set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
92 }
93
80#ifdef CONFIG_SMP 94#ifdef CONFIG_SMP
81 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); 95 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
82 this_cpu_write(cpu_tlbstate.active_mm, next); 96 this_cpu_write(cpu_tlbstate.active_mm, next);
83#endif 97#endif
98
84 cpumask_set_cpu(cpu, mm_cpumask(next)); 99 cpumask_set_cpu(cpu, mm_cpumask(next));
85 100
86 /* 101 /*
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index cb31a4440e58..a2488b6e27d6 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,27 +16,7 @@
16 16
17#include <asm/ptrace.h> 17#include <asm/ptrace.h>
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19#include <asm/unwind.h>
20static int backtrace_stack(void *data, char *name)
21{
22 /* Yes, we want all stacks */
23 return 0;
24}
25
26static int backtrace_address(void *data, unsigned long addr, int reliable)
27{
28 unsigned int *depth = data;
29
30 if ((*depth)--)
31 oprofile_add_trace(addr);
32 return 0;
33}
34
35static struct stacktrace_ops backtrace_ops = {
36 .stack = backtrace_stack,
37 .address = backtrace_address,
38 .walk_stack = print_context_stack,
39};
40 20
41#ifdef CONFIG_COMPAT 21#ifdef CONFIG_COMPAT
42static struct stack_frame_ia32 * 22static struct stack_frame_ia32 *
@@ -113,10 +93,29 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
113 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); 93 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
114 94
115 if (!user_mode(regs)) { 95 if (!user_mode(regs)) {
116 unsigned long stack = kernel_stack_pointer(regs); 96 struct unwind_state state;
117 if (depth) 97 unsigned long addr;
118 dump_trace(NULL, regs, (unsigned long *)stack, 0, 98
119 &backtrace_ops, &depth); 99 if (!depth)
100 return;
101
102 oprofile_add_trace(regs->ip);
103
104 if (!--depth)
105 return;
106
107 for (unwind_start(&state, current, regs, NULL);
108 !unwind_done(&state); unwind_next_frame(&state)) {
109 addr = unwind_get_return_address(&state);
110 if (!addr)
111 break;
112
113 oprofile_add_trace(addr);
114
115 if (!--depth)
116 break;
117 }
118
120 return; 119 return;
121 } 120 }
122 121
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 9770e55e768f..1d97cea3b3a4 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -120,9 +120,12 @@ static unsigned long __init bios32_service(unsigned long service)
120static struct { 120static struct {
121 unsigned long address; 121 unsigned long address;
122 unsigned short segment; 122 unsigned short segment;
123} pci_indirect = { 0, __KERNEL_CS }; 123} pci_indirect __ro_after_init = {
124 .address = 0,
125 .segment = __KERNEL_CS,
126};
124 127
125static int pci_bios_present; 128static int pci_bios_present __ro_after_init;
126 129
127static int __init check_pcibios(void) 130static int __init check_pcibios(void)
128{ 131{
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index b12c26e2e309..53cace2ec0e2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -130,7 +130,7 @@ static void __save_processor_state(struct saved_context *ctxt)
130 ctxt->cr0 = read_cr0(); 130 ctxt->cr0 = read_cr0();
131 ctxt->cr2 = read_cr2(); 131 ctxt->cr2 = read_cr2();
132 ctxt->cr3 = read_cr3(); 132 ctxt->cr3 = read_cr3();
133 ctxt->cr4 = __read_cr4_safe(); 133 ctxt->cr4 = __read_cr4();
134#ifdef CONFIG_X86_64 134#ifdef CONFIG_X86_64
135 ctxt->cr8 = read_cr8(); 135 ctxt->cr8 = read_cr8();
136#endif 136#endif
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index a7ef7b131e25..5766ead6fdb9 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -194,7 +194,7 @@ int peek_user(struct task_struct *child, long addr, long data)
194 194
195static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) 195static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
196{ 196{
197 int err, n, cpu = ((struct thread_info *) child->stack)->cpu; 197 int err, n, cpu = task_cpu(child);
198 struct user_i387_struct fpregs; 198 struct user_i387_struct fpregs;
199 199
200 err = save_i387_registers(userspace_pid[cpu], 200 err = save_i387_registers(userspace_pid[cpu],
@@ -211,7 +211,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
211 211
212static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) 212static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
213{ 213{
214 int n, cpu = ((struct thread_info *) child->stack)->cpu; 214 int n, cpu = task_cpu(child);
215 struct user_i387_struct fpregs; 215 struct user_i387_struct fpregs;
216 216
217 n = copy_from_user(&fpregs, buf, sizeof(fpregs)); 217 n = copy_from_user(&fpregs, buf, sizeof(fpregs));
@@ -224,7 +224,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
224 224
225static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) 225static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
226{ 226{
227 int err, n, cpu = ((struct thread_info *) child->stack)->cpu; 227 int err, n, cpu = task_cpu(child);
228 struct user_fxsr_struct fpregs; 228 struct user_fxsr_struct fpregs;
229 229
230 err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); 230 err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
@@ -240,7 +240,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *
240 240
241static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) 241static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
242{ 242{
243 int n, cpu = ((struct thread_info *) child->stack)->cpu; 243 int n, cpu = task_cpu(child);
244 struct user_fxsr_struct fpregs; 244 struct user_fxsr_struct fpregs;
245 245
246 n = copy_from_user(&fpregs, buf, sizeof(fpregs)); 246 n = copy_from_user(&fpregs, buf, sizeof(fpregs));
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bc9aaba01a22..f1d2182e071f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1237,7 +1237,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
1237 .write_cr0 = xen_write_cr0, 1237 .write_cr0 = xen_write_cr0,
1238 1238
1239 .read_cr4 = native_read_cr4, 1239 .read_cr4 = native_read_cr4,
1240 .read_cr4_safe = native_read_cr4_safe,
1241 .write_cr4 = xen_write_cr4, 1240 .write_cr4 = xen_write_cr4,
1242 1241
1243#ifdef CONFIG_X86_64 1242#ifdef CONFIG_X86_64