aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 19:13:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 19:13:28 -0400
commit1a4a2bc460721bc8f91e4c1294d39b38e5af132f (patch)
treefe646d05f6e17f05601e0a32cc796bec718ab6e7
parent110a9e42b68719f584879c5c5c727bbae90d15f9 (diff)
parent1ef55be16ed69538f89e0a6508be5e62fdc9851c (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar: "In this cycle this topic tree has become one of those 'super topics' that accumulated a lot of changes: - Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on x86 - preceded by an array of changes. v4.8 saw preparatory changes in this area already - this is the rest of the work. Includes the thread stack caching performance optimization. (Andy Lutomirski) - switch_to() cleanups and all around enhancements. (Brian Gerst) - A large number of dumpstack infrastructure enhancements and an unwinder abstraction. The secret long term plan is safe(r) live patching plus maybe another attempt at debuginfo based unwinding - but all these current bits are standalone enhancements in a frame pointer based debug environment as well. (Josh Poimboeuf) - More __ro_after_init and const annotations. (Kees Cook) - Enable KASLR for the vmemmap memory region. (Thomas Garnier)" [ The virtually mapped stack changes are pretty fundamental, and not x86-specific per se, even if they are only used on x86 right now. ] * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits) x86/asm: Get rid of __read_cr4_safe() thread_info: Use unsigned long for flags x86/alternatives: Add stack frame dependency to alternative_call_2() x86/dumpstack: Fix show_stack() task pointer regression x86/dumpstack: Remove dump_trace() and related callbacks x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder oprofile/x86: Convert x86_backtrace() to use the new unwinder x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder perf/x86: Convert perf_callchain_kernel() to use the new unwinder x86/unwind: Add new unwind interface and implementations x86/dumpstack: Remove NULL task pointer convention fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK lib/syscall: Pin the task stack in collect_syscall() x86/process: Pin the target stack in get_wchan() x86/dumpstack: Pin the target stack when dumping it kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function sched/core: Add try_get_task_stack() and put_task_stack() x86/entry/64: Fix a minor comment rebase error iommu/amd: Don't put completion-wait semaphore on stack ...
-rw-r--r--Documentation/trace/ftrace-design.txt11
-rw-r--r--arch/Kconfig34
-rw-r--r--arch/arm/kernel/ftrace.c2
-rw-r--r--arch/arm64/kernel/entry-ftrace.S2
-rw-r--r--arch/arm64/kernel/ftrace.c2
-rw-r--r--arch/blackfin/kernel/ftrace-entry.S4
-rw-r--r--arch/blackfin/kernel/ftrace.c2
-rw-r--r--arch/ia64/include/asm/thread_info.h2
-rw-r--r--arch/microblaze/kernel/ftrace.c2
-rw-r--r--arch/mips/kernel/ftrace.c4
-rw-r--r--arch/parisc/kernel/ftrace.c2
-rw-r--r--arch/powerpc/kernel/ftrace.c3
-rw-r--r--arch/s390/kernel/ftrace.c3
-rw-r--r--arch/sh/kernel/ftrace.c2
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/ftrace.h4
-rw-r--r--arch/sparc/kernel/ftrace.c2
-rw-r--r--arch/tile/kernel/ftrace.c2
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/entry/common.c24
-rw-r--r--arch/x86/entry/entry_32.S68
-rw-r--r--arch/x86/entry/entry_64.S151
-rw-r--r--arch/x86/events/core.c36
-rw-r--r--arch/x86/include/asm/alternative.h8
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/fpu/xstate.h3
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/kaslr.h1
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/processor.h25
-rw-r--r--arch/x86/include/asm/realmode.h2
-rw-r--r--arch/x86/include/asm/smp.h3
-rw-r--r--arch/x86/include/asm/special_insns.h22
-rw-r--r--arch/x86/include/asm/stacktrace.h120
-rw-r--r--arch/x86/include/asm/switch_to.h164
-rw-r--r--arch/x86/include/asm/syscall.h20
-rw-r--r--arch/x86/include/asm/thread_info.h71
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/traps.h6
-rw-r--r--arch/x86/include/asm/unwind.h73
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c6
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c2
-rw-r--r--arch/x86/kernel/apic/msi.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c7
-rw-r--r--arch/x86/kernel/asm-offsets_32.c5
-rw-r--r--arch/x86/kernel/asm-offsets_64.c5
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h2
-rw-r--r--arch/x86/kernel/dumpstack.c258
-rw-r--r--arch/x86/kernel/dumpstack_32.c154
-rw-r--r--arch/x86/kernel/dumpstack_64.c318
-rw-r--r--arch/x86/kernel/fpu/init.c1
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/head_64.S12
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/ksysfs.c2
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process.c42
-rw-r--r--arch/x86/kernel/process_32.c33
-rw-r--r--arch/x86/kernel/process_64.c25
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/setup_percpu.c4
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/stacktrace.c79
-rw-r--r--arch/x86/kernel/traps.c61
-rw-r--r--arch/x86/kernel/unwind_frame.c93
-rw-r--r--arch/x86/kernel/unwind_guess.c43
-rw-r--r--arch/x86/kernel/x86_init.c6
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/mm/fault.c32
-rw-r--r--arch/x86/mm/kaslr.c26
-rw-r--r--arch/x86/mm/tlb.c15
-rw-r--r--arch/x86/oprofile/backtrace.c49
-rw-r--r--arch/x86/pci/pcbios.c7
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/um/ptrace_32.c8
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--drivers/iommu/amd_iommu.c51
-rw-r--r--drivers/iommu/amd_iommu_types.h2
-rw-r--r--fs/proc/base.c2
-rw-r--r--include/linux/ftrace.h17
-rw-r--r--include/linux/init_task.h11
-rw-r--r--include/linux/sched.h81
-rw-r--r--include/linux/thread_info.h15
-rw-r--r--init/Kconfig10
-rw-r--r--init/init_task.c7
-rw-r--r--kernel/fork.c175
-rw-r--r--kernel/kthread.c8
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/trace/Kconfig5
-rw-r--r--kernel/trace/trace_functions_graph.c67
-rw-r--r--lib/dma-debug.c36
-rw-r--r--lib/syscall.c15
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c4
-rw-r--r--tools/testing/selftests/x86/sigreturn.c16
114 files changed, 1722 insertions, 1108 deletions
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index dd5f916b351d..a273dd0bbaaa 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -203,6 +203,17 @@ along to ftrace_push_return_trace() instead of a stub value of 0.
203 203
204Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer. 204Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
205 205
206HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
207--------------------------------
208
209An arch may pass in a pointer to the return address on the stack. This
210prevents potential stack unwinding issues where the unwinder gets out of
211sync with ret_stack and the wrong addresses are reported by
212ftrace_graph_ret_addr().
213
214Adding support for it is easy: just define the macro in asm/ftrace.h and
215pass the return address pointer as the 'retp' argument to
216ftrace_push_return_trace().
206 217
207HAVE_FTRACE_NMI_ENTER 218HAVE_FTRACE_NMI_ENTER
208--------------------- 219---------------------
diff --git a/arch/Kconfig b/arch/Kconfig
index fd6e9712af81..180ea33164dc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -696,4 +696,38 @@ config ARCH_NO_COHERENT_DMA_MMAP
696config CPU_NO_EFFICIENT_FFS 696config CPU_NO_EFFICIENT_FFS
697 def_bool n 697 def_bool n
698 698
699config HAVE_ARCH_VMAP_STACK
700 def_bool n
701 help
702 An arch should select this symbol if it can support kernel stacks
703 in vmalloc space. This means:
704
705 - vmalloc space must be large enough to hold many kernel stacks.
706 This may rule out many 32-bit architectures.
707
708 - Stacks in vmalloc space need to work reliably. For example, if
709 vmap page tables are created on demand, either this mechanism
710 needs to work while the stack points to a virtual address with
711 unpopulated page tables or arch code (switch_to() and switch_mm(),
712 most likely) needs to ensure that the stack's page table entries
713 are populated before running on a possibly unpopulated stack.
714
715 - If the stack overflows into a guard page, something reasonable
716 should happen. The definition of "reasonable" is flexible, but
717 instantly rebooting without logging anything would be unfriendly.
718
719config VMAP_STACK
720 default y
721 bool "Use a virtually-mapped stack"
722 depends on HAVE_ARCH_VMAP_STACK && !KASAN
723 ---help---
724 Enable this if you want the use virtually-mapped kernel stacks
725 with guard pages. This causes kernel stack overflows to be
726 caught immediately rather than causing difficult-to-diagnose
727 corruption.
728
729 This is presently incompatible with KASAN because KASAN expects
730 the stack to map directly to the KASAN shadow map using a formula
731 that is incorrect if the stack is in vmalloc space.
732
699source "kernel/gcov/Kconfig" 733source "kernel/gcov/Kconfig"
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 709ee1d6d4df..3f1759411d51 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -218,7 +218,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
218 } 218 }
219 219
220 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 220 err = ftrace_push_return_trace(old, self_addr, &trace.depth,
221 frame_pointer); 221 frame_pointer, NULL);
222 if (err == -EBUSY) { 222 if (err == -EBUSY) {
223 *parent = old; 223 *parent = old;
224 return; 224 return;
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 0f03a8fe2314..aef02d2af3b5 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -219,7 +219,7 @@ ENDPROC(ftrace_graph_caller)
219 * 219 *
220 * Run ftrace_return_to_handler() before going back to parent. 220 * Run ftrace_return_to_handler() before going back to parent.
221 * @fp is checked against the value passed by ftrace_graph_caller() 221 * @fp is checked against the value passed by ftrace_graph_caller()
222 * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. 222 * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
223 */ 223 */
224ENTRY(return_to_handler) 224ENTRY(return_to_handler)
225 save_return_regs 225 save_return_regs
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index ebecf9aa33d1..40ad08ac569a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -138,7 +138,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
138 return; 138 return;
139 139
140 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 140 err = ftrace_push_return_trace(old, self_addr, &trace.depth,
141 frame_pointer); 141 frame_pointer, NULL);
142 if (err == -EBUSY) 142 if (err == -EBUSY)
143 return; 143 return;
144 else 144 else
diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S
index 28d059540424..3b8bdcbb7da3 100644
--- a/arch/blackfin/kernel/ftrace-entry.S
+++ b/arch/blackfin/kernel/ftrace-entry.S
@@ -169,7 +169,7 @@ ENTRY(_ftrace_graph_caller)
169 r0 = sp; /* unsigned long *parent */ 169 r0 = sp; /* unsigned long *parent */
170 r1 = [sp]; /* unsigned long self_addr */ 170 r1 = [sp]; /* unsigned long self_addr */
171# endif 171# endif
172# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST 172# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
173 r2 = fp; /* unsigned long frame_pointer */ 173 r2 = fp; /* unsigned long frame_pointer */
174# endif 174# endif
175 r0 += 16; /* skip the 4 local regs on stack */ 175 r0 += 16; /* skip the 4 local regs on stack */
@@ -190,7 +190,7 @@ ENTRY(_return_to_handler)
190 [--sp] = r1; 190 [--sp] = r1;
191 191
192 /* get original return address */ 192 /* get original return address */
193# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST 193# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
194 r0 = fp; /* Blackfin is sane, so omit this */ 194 r0 = fp; /* Blackfin is sane, so omit this */
195# endif 195# endif
196 call _ftrace_return_to_handler; 196 call _ftrace_return_to_handler;
diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c
index 095de0fa044d..8dad7589b843 100644
--- a/arch/blackfin/kernel/ftrace.c
+++ b/arch/blackfin/kernel/ftrace.c
@@ -107,7 +107,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
107 return; 107 return;
108 108
109 if (ftrace_push_return_trace(*parent, self_addr, &trace.depth, 109 if (ftrace_push_return_trace(*parent, self_addr, &trace.depth,
110 frame_pointer) == -EBUSY) 110 frame_pointer, NULL) == -EBUSY)
111 return; 111 return;
112 112
113 trace.func = self_addr; 113 trace.func = self_addr;
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 29bd59790d6c..c7026429816b 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -56,7 +56,7 @@ struct thread_info {
56#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) 56#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
57#define task_thread_info(tsk) ((struct thread_info *) 0) 57#define task_thread_info(tsk) ((struct thread_info *) 0)
58#endif 58#endif
59#define free_thread_stack(ti) /* nothing */ 59#define free_thread_stack(tsk) /* nothing */
60#define task_stack_page(tsk) ((void *)(tsk)) 60#define task_stack_page(tsk) ((void *)(tsk))
61 61
62#define __HAVE_THREAD_FUNCTIONS 62#define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
index fc7b48a52cd5..d57563c58a26 100644
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -63,7 +63,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
63 return; 63 return;
64 } 64 }
65 65
66 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0); 66 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
67 if (err == -EBUSY) { 67 if (err == -EBUSY) {
68 *parent = old; 68 *parent = old;
69 return; 69 return;
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 937c54bc8ccc..30a3b75e88eb 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -382,8 +382,8 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
382 if (unlikely(faulted)) 382 if (unlikely(faulted))
383 goto out; 383 goto out;
384 384
385 if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp) 385 if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
386 == -EBUSY) { 386 NULL) == -EBUSY) {
387 *parent_ra_addr = old_parent_ra; 387 *parent_ra_addr = old_parent_ra;
388 return; 388 return;
389 } 389 }
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index a828a0adf52c..5a5506a35395 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -48,7 +48,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
48 return; 48 return;
49 49
50 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 50 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
51 0 ) == -EBUSY) 51 0, NULL) == -EBUSY)
52 return; 52 return;
53 53
54 /* activate parisc_return_to_handler() as return point */ 54 /* activate parisc_return_to_handler() as return point */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index cc52d9795f88..a95639b8d4ac 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -593,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
593 if (!ftrace_graph_entry(&trace)) 593 if (!ftrace_graph_entry(&trace))
594 goto out; 594 goto out;
595 595
596 if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) 596 if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
597 NULL) == -EBUSY)
597 goto out; 598 goto out;
598 599
599 parent = return_hooker; 600 parent = return_hooker;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0f7bfeba6da6..60a8a4e207ed 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -209,7 +209,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
209 /* Only trace if the calling function expects to. */ 209 /* Only trace if the calling function expects to. */
210 if (!ftrace_graph_entry(&trace)) 210 if (!ftrace_graph_entry(&trace))
211 goto out; 211 goto out;
212 if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) 212 if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
213 NULL) == -EBUSY)
213 goto out; 214 goto out;
214 parent = (unsigned long) return_to_handler; 215 parent = (unsigned long) return_to_handler;
215out: 216out:
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 38993e09ef03..95eccd49672f 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -382,7 +382,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
382 return; 382 return;
383 } 383 }
384 384
385 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0); 385 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
386 if (err == -EBUSY) { 386 if (err == -EBUSY) {
387 __raw_writel(old, parent); 387 __raw_writel(old, parent);
388 return; 388 return;
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 59b09600dd32..f5d60f14a0bc 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -56,7 +56,6 @@ config SPARC64
56 def_bool 64BIT 56 def_bool 64BIT
57 select HAVE_FUNCTION_TRACER 57 select HAVE_FUNCTION_TRACER
58 select HAVE_FUNCTION_GRAPH_TRACER 58 select HAVE_FUNCTION_GRAPH_TRACER
59 select HAVE_FUNCTION_GRAPH_FP_TEST
60 select HAVE_KRETPROBES 59 select HAVE_KRETPROBES
61 select HAVE_KPROBES 60 select HAVE_KPROBES
62 select HAVE_RCU_TABLE_FREE if SMP 61 select HAVE_RCU_TABLE_FREE if SMP
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index 3192a8e42fd6..62755a339a59 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -9,6 +9,10 @@
9void _mcount(void); 9void _mcount(void);
10#endif 10#endif
11 11
12#endif /* CONFIG_MCOUNT */
13
14#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY)
15#define HAVE_FUNCTION_GRAPH_FP_TEST
12#endif 16#endif
13 17
14#ifdef CONFIG_DYNAMIC_FTRACE 18#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 0a2d2ddff543..6bcff698069b 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -131,7 +131,7 @@ unsigned long prepare_ftrace_return(unsigned long parent,
131 return parent + 8UL; 131 return parent + 8UL;
132 132
133 if (ftrace_push_return_trace(parent, self_addr, &trace.depth, 133 if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
134 frame_pointer) == -EBUSY) 134 frame_pointer, NULL) == -EBUSY)
135 return parent + 8UL; 135 return parent + 8UL;
136 136
137 trace.func = self_addr; 137 trace.func = self_addr;
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
index 4a572088b270..b827a418b155 100644
--- a/arch/tile/kernel/ftrace.c
+++ b/arch/tile/kernel/ftrace.c
@@ -184,7 +184,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
184 *parent = return_hooker; 184 *parent = return_hooker;
185 185
186 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 186 err = ftrace_push_return_trace(old, self_addr, &trace.depth,
187 frame_pointer); 187 frame_pointer, NULL);
188 if (err == -EBUSY) { 188 if (err == -EBUSY) {
189 *parent = old; 189 *parent = old;
190 return; 190 return;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0cc8811af4e0..ac5944fa6da2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,6 +93,7 @@ config X86
93 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 93 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
94 select HAVE_ARCH_WITHIN_STACK_FRAMES 94 select HAVE_ARCH_WITHIN_STACK_FRAMES
95 select HAVE_EBPF_JIT if X86_64 95 select HAVE_EBPF_JIT if X86_64
96 select HAVE_ARCH_VMAP_STACK if X86_64
96 select HAVE_CC_STACKPROTECTOR 97 select HAVE_CC_STACKPROTECTOR
97 select HAVE_CMPXCHG_DOUBLE 98 select HAVE_CMPXCHG_DOUBLE
98 select HAVE_CMPXCHG_LOCAL 99 select HAVE_CMPXCHG_LOCAL
@@ -109,7 +110,6 @@ config X86
109 select HAVE_EXIT_THREAD 110 select HAVE_EXIT_THREAD
110 select HAVE_FENTRY if X86_64 111 select HAVE_FENTRY if X86_64
111 select HAVE_FTRACE_MCOUNT_RECORD 112 select HAVE_FTRACE_MCOUNT_RECORD
112 select HAVE_FUNCTION_GRAPH_FP_TEST
113 select HAVE_FUNCTION_GRAPH_TRACER 113 select HAVE_FUNCTION_GRAPH_TRACER
114 select HAVE_FUNCTION_TRACER 114 select HAVE_FUNCTION_TRACER
115 select HAVE_GCC_PLUGINS 115 select HAVE_GCC_PLUGINS
@@ -157,6 +157,7 @@ config X86
157 select SPARSE_IRQ 157 select SPARSE_IRQ
158 select SRCU 158 select SRCU
159 select SYSCTL_EXCEPTION_TRACE 159 select SYSCTL_EXCEPTION_TRACE
160 select THREAD_INFO_IN_TASK
160 select USER_STACKTRACE_SUPPORT 161 select USER_STACKTRACE_SUPPORT
161 select VIRT_TO_BUS 162 select VIRT_TO_BUS
162 select X86_DEV_DMA_OPS if X86_64 163 select X86_DEV_DMA_OPS if X86_64
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 1433f6b4607d..bdd9cc59d20f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -31,13 +31,6 @@
31#define CREATE_TRACE_POINTS 31#define CREATE_TRACE_POINTS
32#include <trace/events/syscalls.h> 32#include <trace/events/syscalls.h>
33 33
34static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
35{
36 unsigned long top_of_stack =
37 (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
38 return (struct thread_info *)(top_of_stack - THREAD_SIZE);
39}
40
41#ifdef CONFIG_CONTEXT_TRACKING 34#ifdef CONFIG_CONTEXT_TRACKING
42/* Called on entry from user mode with IRQs off. */ 35/* Called on entry from user mode with IRQs off. */
43__visible inline void enter_from_user_mode(void) 36__visible inline void enter_from_user_mode(void)
@@ -71,7 +64,7 @@ static long syscall_trace_enter(struct pt_regs *regs)
71{ 64{
72 u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; 65 u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
73 66
74 struct thread_info *ti = pt_regs_to_thread_info(regs); 67 struct thread_info *ti = current_thread_info();
75 unsigned long ret = 0; 68 unsigned long ret = 0;
76 bool emulated = false; 69 bool emulated = false;
77 u32 work; 70 u32 work;
@@ -173,18 +166,17 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
173 /* Disable IRQs and retry */ 166 /* Disable IRQs and retry */
174 local_irq_disable(); 167 local_irq_disable();
175 168
176 cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); 169 cached_flags = READ_ONCE(current_thread_info()->flags);
177 170
178 if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) 171 if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
179 break; 172 break;
180
181 } 173 }
182} 174}
183 175
184/* Called with IRQs disabled. */ 176/* Called with IRQs disabled. */
185__visible inline void prepare_exit_to_usermode(struct pt_regs *regs) 177__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
186{ 178{
187 struct thread_info *ti = pt_regs_to_thread_info(regs); 179 struct thread_info *ti = current_thread_info();
188 u32 cached_flags; 180 u32 cached_flags;
189 181
190 if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) 182 if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -209,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
209 * special case only applies after poking regs and before the 201 * special case only applies after poking regs and before the
210 * very next return to user mode. 202 * very next return to user mode.
211 */ 203 */
212 ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); 204 current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
213#endif 205#endif
214 206
215 user_enter_irqoff(); 207 user_enter_irqoff();
@@ -247,7 +239,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
247 */ 239 */
248__visible inline void syscall_return_slowpath(struct pt_regs *regs) 240__visible inline void syscall_return_slowpath(struct pt_regs *regs)
249{ 241{
250 struct thread_info *ti = pt_regs_to_thread_info(regs); 242 struct thread_info *ti = current_thread_info();
251 u32 cached_flags = READ_ONCE(ti->flags); 243 u32 cached_flags = READ_ONCE(ti->flags);
252 244
253 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 245 CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
@@ -270,7 +262,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
270#ifdef CONFIG_X86_64 262#ifdef CONFIG_X86_64
271__visible void do_syscall_64(struct pt_regs *regs) 263__visible void do_syscall_64(struct pt_regs *regs)
272{ 264{
273 struct thread_info *ti = pt_regs_to_thread_info(regs); 265 struct thread_info *ti = current_thread_info();
274 unsigned long nr = regs->orig_ax; 266 unsigned long nr = regs->orig_ax;
275 267
276 enter_from_user_mode(); 268 enter_from_user_mode();
@@ -303,11 +295,11 @@ __visible void do_syscall_64(struct pt_regs *regs)
303 */ 295 */
304static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) 296static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
305{ 297{
306 struct thread_info *ti = pt_regs_to_thread_info(regs); 298 struct thread_info *ti = current_thread_info();
307 unsigned int nr = (unsigned int)regs->orig_ax; 299 unsigned int nr = (unsigned int)regs->orig_ax;
308 300
309#ifdef CONFIG_IA32_EMULATION 301#ifdef CONFIG_IA32_EMULATION
310 ti->status |= TS_COMPAT; 302 current->thread.status |= TS_COMPAT;
311#endif 303#endif
312 304
313 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { 305 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 0b56666e6039..b75a8bcd2d23 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,34 +204,70 @@
204 POP_GS_EX 204 POP_GS_EX
205.endm 205.endm
206 206
207/*
208 * %eax: prev task
209 * %edx: next task
210 */
211ENTRY(__switch_to_asm)
212 /*
213 * Save callee-saved registers
214 * This must match the order in struct inactive_task_frame
215 */
216 pushl %ebp
217 pushl %ebx
218 pushl %edi
219 pushl %esi
220
221 /* switch stack */
222 movl %esp, TASK_threadsp(%eax)
223 movl TASK_threadsp(%edx), %esp
224
225#ifdef CONFIG_CC_STACKPROTECTOR
226 movl TASK_stack_canary(%edx), %ebx
227 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
228#endif
229
230 /* restore callee-saved registers */
231 popl %esi
232 popl %edi
233 popl %ebx
234 popl %ebp
235
236 jmp __switch_to
237END(__switch_to_asm)
238
239/*
240 * A newly forked process directly context switches into this address.
241 *
242 * eax: prev task we switched from
243 * ebx: kernel thread func (NULL for user thread)
244 * edi: kernel thread arg
245 */
207ENTRY(ret_from_fork) 246ENTRY(ret_from_fork)
208 pushl %eax 247 pushl %eax
209 call schedule_tail 248 call schedule_tail
210 popl %eax 249 popl %eax
211 250
251 testl %ebx, %ebx
252 jnz 1f /* kernel threads are uncommon */
253
2542:
212 /* When we fork, we trace the syscall return in the child, too. */ 255 /* When we fork, we trace the syscall return in the child, too. */
213 movl %esp, %eax 256 movl %esp, %eax
214 call syscall_return_slowpath 257 call syscall_return_slowpath
215 jmp restore_all 258 jmp restore_all
216END(ret_from_fork)
217
218ENTRY(ret_from_kernel_thread)
219 pushl %eax
220 call schedule_tail
221 popl %eax
222 movl PT_EBP(%esp), %eax
223 call *PT_EBX(%esp)
224 movl $0, PT_EAX(%esp)
225 259
260 /* kernel thread */
2611: movl %edi, %eax
262 call *%ebx
226 /* 263 /*
227 * Kernel threads return to userspace as if returning from a syscall. 264 * A kernel thread is allowed to return here after successfully
228 * We should check whether anything actually uses this path and, if so, 265 * calling do_execve(). Exit to userspace to complete the execve()
229 * consider switching it over to ret_from_fork. 266 * syscall.
230 */ 267 */
231 movl %esp, %eax 268 movl $0, PT_EAX(%esp)
232 call syscall_return_slowpath 269 jmp 2b
233 jmp restore_all 270END(ret_from_fork)
234ENDPROC(ret_from_kernel_thread)
235 271
236/* 272/*
237 * Return to user mode is not as complex as all this looks, 273 * Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 02fff3ebfb87..fee1d95902b5 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
179 * If we need to do entry work or if we guess we'll need to do 179 * If we need to do entry work or if we guess we'll need to do
180 * exit work, go straight to the slow path. 180 * exit work, go straight to the slow path.
181 */ 181 */
182 testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 182 movq PER_CPU_VAR(current_task), %r11
183 testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
183 jnz entry_SYSCALL64_slow_path 184 jnz entry_SYSCALL64_slow_path
184 185
185entry_SYSCALL_64_fastpath: 186entry_SYSCALL_64_fastpath:
@@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath:
217 */ 218 */
218 DISABLE_INTERRUPTS(CLBR_NONE) 219 DISABLE_INTERRUPTS(CLBR_NONE)
219 TRACE_IRQS_OFF 220 TRACE_IRQS_OFF
220 testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) 221 movq PER_CPU_VAR(current_task), %r11
222 testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
221 jnz 1f 223 jnz 1f
222 224
223 LOCKDEP_SYS_EXIT 225 LOCKDEP_SYS_EXIT
@@ -351,8 +353,7 @@ ENTRY(stub_ptregs_64)
351 jmp entry_SYSCALL64_slow_path 353 jmp entry_SYSCALL64_slow_path
352 354
3531: 3551:
354 /* Called from C */ 356 jmp *%rax /* Called from C */
355 jmp *%rax /* called from C */
356END(stub_ptregs_64) 357END(stub_ptregs_64)
357 358
358.macro ptregs_stub func 359.macro ptregs_stub func
@@ -369,41 +370,73 @@ END(ptregs_\func)
369#include <asm/syscalls_64.h> 370#include <asm/syscalls_64.h>
370 371
371/* 372/*
373 * %rdi: prev task
374 * %rsi: next task
375 */
376ENTRY(__switch_to_asm)
377 /*
378 * Save callee-saved registers
379 * This must match the order in inactive_task_frame
380 */
381 pushq %rbp
382 pushq %rbx
383 pushq %r12
384 pushq %r13
385 pushq %r14
386 pushq %r15
387
388 /* switch stack */
389 movq %rsp, TASK_threadsp(%rdi)
390 movq TASK_threadsp(%rsi), %rsp
391
392#ifdef CONFIG_CC_STACKPROTECTOR
393 movq TASK_stack_canary(%rsi), %rbx
394 movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
395#endif
396
397 /* restore callee-saved registers */
398 popq %r15
399 popq %r14
400 popq %r13
401 popq %r12
402 popq %rbx
403 popq %rbp
404
405 jmp __switch_to
406END(__switch_to_asm)
407
408/*
372 * A newly forked process directly context switches into this address. 409 * A newly forked process directly context switches into this address.
373 * 410 *
374 * rdi: prev task we switched from 411 * rax: prev task we switched from
412 * rbx: kernel thread func (NULL for user thread)
413 * r12: kernel thread arg
375 */ 414 */
376ENTRY(ret_from_fork) 415ENTRY(ret_from_fork)
377 LOCK ; btr $TIF_FORK, TI_flags(%r8) 416 movq %rax, %rdi
378
379 call schedule_tail /* rdi: 'prev' task parameter */ 417 call schedule_tail /* rdi: 'prev' task parameter */
380 418
381 testb $3, CS(%rsp) /* from kernel_thread? */ 419 testq %rbx, %rbx /* from kernel_thread? */
382 jnz 1f 420 jnz 1f /* kernel threads are uncommon */
383 421
384 /* 4222:
385 * We came from kernel_thread. This code path is quite twisted, and
386 * someone should clean it up.
387 *
388 * copy_thread_tls stashes the function pointer in RBX and the
389 * parameter to be passed in RBP. The called function is permitted
390 * to call do_execve and thereby jump to user mode.
391 */
392 movq RBP(%rsp), %rdi
393 call *RBX(%rsp)
394 movl $0, RAX(%rsp)
395
396 /*
397 * Fall through as though we're exiting a syscall. This makes a
398 * twisted sort of sense if we just called do_execve.
399 */
400
4011:
402 movq %rsp, %rdi 423 movq %rsp, %rdi
403 call syscall_return_slowpath /* returns with IRQs disabled */ 424 call syscall_return_slowpath /* returns with IRQs disabled */
404 TRACE_IRQS_ON /* user mode is traced as IRQS on */ 425 TRACE_IRQS_ON /* user mode is traced as IRQS on */
405 SWAPGS 426 SWAPGS
406 jmp restore_regs_and_iret 427 jmp restore_regs_and_iret
428
4291:
430 /* kernel thread */
431 movq %r12, %rdi
432 call *%rbx
433 /*
434 * A kernel thread is allowed to return here after successfully
435 * calling do_execve(). Exit to userspace to complete the execve()
436 * syscall.
437 */
438 movq $0, RAX(%rsp)
439 jmp 2b
407END(ret_from_fork) 440END(ret_from_fork)
408 441
409/* 442/*
@@ -555,27 +588,69 @@ native_irq_return_iret:
555 588
556#ifdef CONFIG_X86_ESPFIX64 589#ifdef CONFIG_X86_ESPFIX64
557native_irq_return_ldt: 590native_irq_return_ldt:
558 pushq %rax 591 /*
559 pushq %rdi 592 * We are running with user GSBASE. All GPRs contain their user
593 * values. We have a percpu ESPFIX stack that is eight slots
594 * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom
595 * of the ESPFIX stack.
596 *
597 * We clobber RAX and RDI in this code. We stash RDI on the
598 * normal stack and RAX on the ESPFIX stack.
599 *
600 * The ESPFIX stack layout we set up looks like this:
601 *
602 * --- top of ESPFIX stack ---
603 * SS
604 * RSP
605 * RFLAGS
606 * CS
607 * RIP <-- RSP points here when we're done
608 * RAX <-- espfix_waddr points here
609 * --- bottom of ESPFIX stack ---
610 */
611
612 pushq %rdi /* Stash user RDI */
560 SWAPGS 613 SWAPGS
561 movq PER_CPU_VAR(espfix_waddr), %rdi 614 movq PER_CPU_VAR(espfix_waddr), %rdi
562 movq %rax, (0*8)(%rdi) /* RAX */ 615 movq %rax, (0*8)(%rdi) /* user RAX */
563 movq (2*8)(%rsp), %rax /* RIP */ 616 movq (1*8)(%rsp), %rax /* user RIP */
564 movq %rax, (1*8)(%rdi) 617 movq %rax, (1*8)(%rdi)
565 movq (3*8)(%rsp), %rax /* CS */ 618 movq (2*8)(%rsp), %rax /* user CS */
566 movq %rax, (2*8)(%rdi) 619 movq %rax, (2*8)(%rdi)
567 movq (4*8)(%rsp), %rax /* RFLAGS */ 620 movq (3*8)(%rsp), %rax /* user RFLAGS */
568 movq %rax, (3*8)(%rdi) 621 movq %rax, (3*8)(%rdi)
569 movq (6*8)(%rsp), %rax /* SS */ 622 movq (5*8)(%rsp), %rax /* user SS */
570 movq %rax, (5*8)(%rdi) 623 movq %rax, (5*8)(%rdi)
571 movq (5*8)(%rsp), %rax /* RSP */ 624 movq (4*8)(%rsp), %rax /* user RSP */
572 movq %rax, (4*8)(%rdi) 625 movq %rax, (4*8)(%rdi)
573 andl $0xffff0000, %eax 626 /* Now RAX == RSP. */
574 popq %rdi 627
628 andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
629 popq %rdi /* Restore user RDI */
630
631 /*
632 * espfix_stack[31:16] == 0. The page tables are set up such that
633 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
634 * espfix_waddr for any X. That is, there are 65536 RO aliases of
635 * the same page. Set up RSP so that RSP[31:16] contains the
636 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
637 * still points to an RO alias of the ESPFIX stack.
638 */
575 orq PER_CPU_VAR(espfix_stack), %rax 639 orq PER_CPU_VAR(espfix_stack), %rax
576 SWAPGS 640 SWAPGS
577 movq %rax, %rsp 641 movq %rax, %rsp
578 popq %rax 642
643 /*
644 * At this point, we cannot write to the stack any more, but we can
645 * still read.
646 */
647 popq %rax /* Restore user RAX */
648
649 /*
650 * RSP now points to an ordinary IRET frame, except that the page
651 * is read-only and RSP[31:16] are preloaded with the userspace
652 * values. We can now IRET back to userspace.
653 */
579 jmp native_irq_return_iret 654 jmp native_irq_return_iret
580#endif 655#endif
581END(common_interrupt) 656END(common_interrupt)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 18a1acf86c90..d31735f37ed7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -37,6 +37,7 @@
37#include <asm/timer.h> 37#include <asm/timer.h>
38#include <asm/desc.h> 38#include <asm/desc.h>
39#include <asm/ldt.h> 39#include <asm/ldt.h>
40#include <asm/unwind.h>
40 41
41#include "perf_event.h" 42#include "perf_event.h"
42 43
@@ -2267,39 +2268,26 @@ void arch_perf_update_userpage(struct perf_event *event,
2267 cyc2ns_read_end(data); 2268 cyc2ns_read_end(data);
2268} 2269}
2269 2270
2270/*
2271 * callchain support
2272 */
2273
2274static int backtrace_stack(void *data, char *name)
2275{
2276 return 0;
2277}
2278
2279static int backtrace_address(void *data, unsigned long addr, int reliable)
2280{
2281 struct perf_callchain_entry_ctx *entry = data;
2282
2283 return perf_callchain_store(entry, addr);
2284}
2285
2286static const struct stacktrace_ops backtrace_ops = {
2287 .stack = backtrace_stack,
2288 .address = backtrace_address,
2289 .walk_stack = print_context_stack_bp,
2290};
2291
2292void 2271void
2293perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) 2272perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
2294{ 2273{
2274 struct unwind_state state;
2275 unsigned long addr;
2276
2295 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 2277 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
2296 /* TODO: We don't support guest os callchain now */ 2278 /* TODO: We don't support guest os callchain now */
2297 return; 2279 return;
2298 } 2280 }
2299 2281
2300 perf_callchain_store(entry, regs->ip); 2282 if (perf_callchain_store(entry, regs->ip))
2283 return;
2301 2284
2302 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 2285 for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
2286 unwind_next_frame(&state)) {
2287 addr = unwind_get_return_address(&state);
2288 if (!addr || perf_callchain_store(entry, addr))
2289 return;
2290 }
2303} 2291}
2304 2292
2305static inline int 2293static inline int
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index e77a6443104f..1b020381ab38 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -217,10 +217,14 @@ static inline int alternatives_text_reserved(void *start, void *end)
217 */ 217 */
218#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ 218#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
219 output, input...) \ 219 output, input...) \
220{ \
221 register void *__sp asm(_ASM_SP); \
220 asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ 222 asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
221 "call %P[new2]", feature2) \ 223 "call %P[new2]", feature2) \
222 : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ 224 : output, "+r" (__sp) \
223 [new2] "i" (newfunc2), ## input) 225 : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
226 [new2] "i" (newfunc2), ## input); \
227}
224 228
225/* 229/*
226 * use this macro(s) if you need more than one output parameter 230 * use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4e10d73cf018..12080d87da3b 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,7 +36,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
36 36
37extern struct desc_ptr idt_descr; 37extern struct desc_ptr idt_descr;
38extern gate_desc idt_table[]; 38extern gate_desc idt_table[];
39extern struct desc_ptr debug_idt_descr; 39extern const struct desc_ptr debug_idt_descr;
40extern gate_desc debug_idt_table[]; 40extern gate_desc debug_idt_table[];
41 41
42struct gdt_page { 42struct gdt_page {
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index ae55a43e09c0..d4957ac72b48 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -45,7 +45,8 @@
45extern u64 xfeatures_mask; 45extern u64 xfeatures_mask;
46extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 46extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
47 47
48extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); 48extern void __init update_regset_xstate_info(unsigned int size,
49 u64 xstate_mask);
49 50
50void fpu__xstate_clear_all_cpu_caps(void); 51void fpu__xstate_clear_all_cpu_caps(void);
51void *get_xsave_addr(struct xregs_state *xsave, int xstate); 52void *get_xsave_addr(struct xregs_state *xsave, int xstate);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a4820d4df617..eccd0ac6bc38 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -6,6 +6,7 @@
6# define MCOUNT_ADDR ((unsigned long)(__fentry__)) 6# define MCOUNT_ADDR ((unsigned long)(__fentry__))
7#else 7#else
8# define MCOUNT_ADDR ((unsigned long)(mcount)) 8# define MCOUNT_ADDR ((unsigned long)(mcount))
9# define HAVE_FUNCTION_GRAPH_FP_TEST
9#endif 10#endif
10#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ 11#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
11 12
@@ -13,6 +14,8 @@
13#define ARCH_SUPPORTS_FTRACE_OPS 1 14#define ARCH_SUPPORTS_FTRACE_OPS 1
14#endif 15#endif
15 16
17#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
18
16#ifndef __ASSEMBLY__ 19#ifndef __ASSEMBLY__
17extern void mcount(void); 20extern void mcount(void);
18extern atomic_t modifying_ftrace_code; 21extern atomic_t modifying_ftrace_code;
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 2674ee3de748..1052a797d71d 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,6 +6,7 @@ unsigned long kaslr_get_random_long(const char *purpose);
6#ifdef CONFIG_RANDOMIZE_MEMORY 6#ifdef CONFIG_RANDOMIZE_MEMORY
7extern unsigned long page_offset_base; 7extern unsigned long page_offset_base;
8extern unsigned long vmalloc_base; 8extern unsigned long vmalloc_base;
9extern unsigned long vmemmap_base;
9 10
10void kernel_randomize_memory(void); 11void kernel_randomize_memory(void);
11#else 12#else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 1ef9d581b5d9..d31881188431 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,8 +24,6 @@ enum die_val {
24extern void printk_address(unsigned long address); 24extern void printk_address(unsigned long address);
25extern void die(const char *, struct pt_regs *,long); 25extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_trace(struct task_struct *t, struct pt_regs *regs,
28 unsigned long *sp, unsigned long bp);
29extern void show_stack_regs(struct pt_regs *regs); 27extern void show_stack_regs(struct pt_regs *regs);
30extern void __show_regs(struct pt_regs *regs, int all); 28extern void __show_regs(struct pt_regs *regs, int all);
31extern unsigned long oops_begin(void); 29extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4cd8db05301f..ce932812f142 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,10 +80,6 @@ static inline unsigned long __read_cr4(void)
80{ 80{
81 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 81 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
82} 82}
83static inline unsigned long __read_cr4_safe(void)
84{
85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
86}
87 83
88static inline void __write_cr4(unsigned long x) 84static inline void __write_cr4(unsigned long x)
89{ 85{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 60aac60ba25f..0f400c0e4979 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -108,7 +108,6 @@ struct pv_cpu_ops {
108 unsigned long (*read_cr0)(void); 108 unsigned long (*read_cr0)(void);
109 void (*write_cr0)(unsigned long); 109 void (*write_cr0)(unsigned long);
110 110
111 unsigned long (*read_cr4_safe)(void);
112 unsigned long (*read_cr4)(void); 111 unsigned long (*read_cr4)(void);
113 void (*write_cr4)(unsigned long); 112 void (*write_cr4)(unsigned long);
114 113
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6fdef9eef2d5..3a264200c62f 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -57,11 +57,13 @@ typedef struct { pteval_t pte; } pte_t;
57#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) 57#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
58#define VMALLOC_SIZE_TB _AC(32, UL) 58#define VMALLOC_SIZE_TB _AC(32, UL)
59#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) 59#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
60#define VMEMMAP_START _AC(0xffffea0000000000, UL) 60#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
61#ifdef CONFIG_RANDOMIZE_MEMORY 61#ifdef CONFIG_RANDOMIZE_MEMORY
62#define VMALLOC_START vmalloc_base 62#define VMALLOC_START vmalloc_base
63#define VMEMMAP_START vmemmap_base
63#else 64#else
64#define VMALLOC_START __VMALLOC_BASE 65#define VMALLOC_START __VMALLOC_BASE
66#define VMEMMAP_START __VMEMMAP_BASE
65#endif /* CONFIG_RANDOMIZE_MEMORY */ 67#endif /* CONFIG_RANDOMIZE_MEMORY */
66#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) 68#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
67#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) 69#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def9537a2d..984a7bf17f6a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -389,9 +389,9 @@ struct thread_struct {
389 unsigned short fsindex; 389 unsigned short fsindex;
390 unsigned short gsindex; 390 unsigned short gsindex;
391#endif 391#endif
392#ifdef CONFIG_X86_32 392
393 unsigned long ip; 393 u32 status; /* thread synchronous flags */
394#endif 394
395#ifdef CONFIG_X86_64 395#ifdef CONFIG_X86_64
396 unsigned long fsbase; 396 unsigned long fsbase;
397 unsigned long gsbase; 397 unsigned long gsbase;
@@ -438,6 +438,15 @@ struct thread_struct {
438}; 438};
439 439
440/* 440/*
441 * Thread-synchronous status.
442 *
443 * This is different from the flags in that nobody else
444 * ever touches our thread-synchronous status, so we don't
445 * have to worry about atomic accesses.
446 */
447#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
448
449/*
441 * Set IOPL bits in EFLAGS from given mask 450 * Set IOPL bits in EFLAGS from given mask
442 */ 451 */
443static inline void native_set_iopl_mask(unsigned mask) 452static inline void native_set_iopl_mask(unsigned mask)
@@ -724,8 +733,6 @@ static inline void spin_lock_prefetch(const void *x)
724 .addr_limit = KERNEL_DS, \ 733 .addr_limit = KERNEL_DS, \
725} 734}
726 735
727extern unsigned long thread_saved_pc(struct task_struct *tsk);
728
729/* 736/*
730 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. 737 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
731 * This is necessary to guarantee that the entire "struct pt_regs" 738 * This is necessary to guarantee that the entire "struct pt_regs"
@@ -776,17 +783,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
776 .addr_limit = KERNEL_DS, \ 783 .addr_limit = KERNEL_DS, \
777} 784}
778 785
779/*
780 * Return saved PC of a blocked thread.
781 * What is this good for? it will be always the scheduler or ret_from_fork.
782 */
783#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
784
785#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) 786#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
786extern unsigned long KSTK_ESP(struct task_struct *task); 787extern unsigned long KSTK_ESP(struct task_struct *task);
787 788
788#endif /* CONFIG_X86_64 */ 789#endif /* CONFIG_X86_64 */
789 790
791extern unsigned long thread_saved_pc(struct task_struct *tsk);
792
790extern void start_thread(struct pt_regs *regs, unsigned long new_ip, 793extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
791 unsigned long new_sp); 794 unsigned long new_sp);
792 795
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index b2988c0ed829..230e1903acf0 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -44,9 +44,9 @@ struct trampoline_header {
44extern struct real_mode_header *real_mode_header; 44extern struct real_mode_header *real_mode_header;
45extern unsigned char real_mode_blob_end[]; 45extern unsigned char real_mode_blob_end[];
46 46
47extern unsigned long init_rsp;
48extern unsigned long initial_code; 47extern unsigned long initial_code;
49extern unsigned long initial_gs; 48extern unsigned long initial_gs;
49extern unsigned long initial_stack;
50 50
51extern unsigned char real_mode_blob[]; 51extern unsigned char real_mode_blob[];
52extern unsigned char real_mode_relocs[]; 52extern unsigned char real_mode_relocs[];
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ebd0c164cd4e..19980b36f394 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -39,9 +39,6 @@ DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
39DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); 39DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
40#endif 40#endif
41 41
42/* Static state in head.S used to set up a CPU */
43extern unsigned long stack_start; /* Initial stack pointer address */
44
45struct task_struct; 42struct task_struct;
46 43
47struct smp_ops { 44struct smp_ops {
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 587d7914ea4b..19a2224f9e16 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -59,22 +59,19 @@ static inline void native_write_cr3(unsigned long val)
59static inline unsigned long native_read_cr4(void) 59static inline unsigned long native_read_cr4(void)
60{ 60{
61 unsigned long val; 61 unsigned long val;
62 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
63 return val;
64}
65
66static inline unsigned long native_read_cr4_safe(void)
67{
68 unsigned long val;
69 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
70 * exists, so it will never fail. */
71#ifdef CONFIG_X86_32 62#ifdef CONFIG_X86_32
63 /*
64 * This could fault if CR4 does not exist. Non-existent CR4
65 * is functionally equivalent to CR4 == 0. Keep it simple and pretend
66 * that CR4 == 0 on CPUs that don't have CR4.
67 */
72 asm volatile("1: mov %%cr4, %0\n" 68 asm volatile("1: mov %%cr4, %0\n"
73 "2:\n" 69 "2:\n"
74 _ASM_EXTABLE(1b, 2b) 70 _ASM_EXTABLE(1b, 2b)
75 : "=r" (val), "=m" (__force_order) : "0" (0)); 71 : "=r" (val), "=m" (__force_order) : "0" (0));
76#else 72#else
77 val = native_read_cr4(); 73 /* CR4 always exists on x86_64. */
74 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
78#endif 75#endif
79 return val; 76 return val;
80} 77}
@@ -182,11 +179,6 @@ static inline unsigned long __read_cr4(void)
182 return native_read_cr4(); 179 return native_read_cr4();
183} 180}
184 181
185static inline unsigned long __read_cr4_safe(void)
186{
187 return native_read_cr4_safe();
188}
189
190static inline void __write_cr4(unsigned long x) 182static inline void __write_cr4(unsigned long x)
191{ 183{
192 native_write_cr4(x); 184 native_write_cr4(x);
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 0944218af9e2..37f2e0b377ad 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -8,86 +8,86 @@
8 8
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/ptrace.h> 10#include <linux/ptrace.h>
11#include <asm/switch_to.h>
12
13enum stack_type {
14 STACK_TYPE_UNKNOWN,
15 STACK_TYPE_TASK,
16 STACK_TYPE_IRQ,
17 STACK_TYPE_SOFTIRQ,
18 STACK_TYPE_EXCEPTION,
19 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
20};
11 21
12extern int kstack_depth_to_print; 22struct stack_info {
13 23 enum stack_type type;
14struct thread_info; 24 unsigned long *begin, *end, *next_sp;
15struct stacktrace_ops;
16
17typedef unsigned long (*walk_stack_t)(struct task_struct *task,
18 unsigned long *stack,
19 unsigned long bp,
20 const struct stacktrace_ops *ops,
21 void *data,
22 unsigned long *end,
23 int *graph);
24
25extern unsigned long
26print_context_stack(struct task_struct *task,
27 unsigned long *stack, unsigned long bp,
28 const struct stacktrace_ops *ops, void *data,
29 unsigned long *end, int *graph);
30
31extern unsigned long
32print_context_stack_bp(struct task_struct *task,
33 unsigned long *stack, unsigned long bp,
34 const struct stacktrace_ops *ops, void *data,
35 unsigned long *end, int *graph);
36
37/* Generic stack tracer with callbacks */
38
39struct stacktrace_ops {
40 int (*address)(void *data, unsigned long address, int reliable);
41 /* On negative return stop dumping */
42 int (*stack)(void *data, char *name);
43 walk_stack_t walk_stack;
44}; 25};
45 26
46void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 27bool in_task_stack(unsigned long *stack, struct task_struct *task,
47 unsigned long *stack, unsigned long bp, 28 struct stack_info *info);
48 const struct stacktrace_ops *ops, void *data); 29
30int get_stack_info(unsigned long *stack, struct task_struct *task,
31 struct stack_info *info, unsigned long *visit_mask);
32
33void stack_type_str(enum stack_type type, const char **begin,
34 const char **end);
35
36static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
37{
38 void *begin = info->begin;
39 void *end = info->end;
40
41 return (info->type != STACK_TYPE_UNKNOWN &&
42 addr >= begin && addr < end &&
43 addr + len > begin && addr + len <= end);
44}
45
46extern int kstack_depth_to_print;
49 47
50#ifdef CONFIG_X86_32 48#ifdef CONFIG_X86_32
51#define STACKSLOTS_PER_LINE 8 49#define STACKSLOTS_PER_LINE 8
52#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
53#else 50#else
54#define STACKSLOTS_PER_LINE 4 51#define STACKSLOTS_PER_LINE 4
55#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
56#endif 52#endif
57 53
58#ifdef CONFIG_FRAME_POINTER 54#ifdef CONFIG_FRAME_POINTER
59static inline unsigned long 55static inline unsigned long *
60stack_frame(struct task_struct *task, struct pt_regs *regs) 56get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
61{ 57{
62 unsigned long bp;
63
64 if (regs) 58 if (regs)
65 return regs->bp; 59 return (unsigned long *)regs->bp;
66 60
67 if (task == current) { 61 if (task == current)
68 /* Grab bp right from our regs */ 62 return __builtin_frame_address(0);
69 get_bp(bp);
70 return bp;
71 }
72 63
73 /* bp is the last reg pushed by switch_to */ 64 return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
74 return *(unsigned long *)task->thread.sp;
75} 65}
76#else 66#else
77static inline unsigned long 67static inline unsigned long *
78stack_frame(struct task_struct *task, struct pt_regs *regs) 68get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
79{ 69{
80 return 0; 70 return NULL;
71}
72#endif /* CONFIG_FRAME_POINTER */
73
74static inline unsigned long *
75get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
76{
77 if (regs)
78 return (unsigned long *)kernel_stack_pointer(regs);
79
80 if (task == current)
81 return __builtin_frame_address(0);
82
83 return (unsigned long *)task->thread.sp;
81} 84}
82#endif
83 85
84extern void 86void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
85show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 87 unsigned long *stack, char *log_lvl);
86 unsigned long *stack, unsigned long bp, char *log_lvl);
87 88
88extern void 89void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
89show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 90 unsigned long *sp, char *log_lvl);
90 unsigned long *sp, unsigned long bp, char *log_lvl);
91 91
92extern unsigned int code_bytes; 92extern unsigned int code_bytes;
93 93
@@ -106,7 +106,7 @@ static inline unsigned long caller_frame_pointer(void)
106{ 106{
107 struct stack_frame *frame; 107 struct stack_frame *frame;
108 108
109 get_bp(frame); 109 frame = __builtin_frame_address(0);
110 110
111#ifdef CONFIG_FRAME_POINTER 111#ifdef CONFIG_FRAME_POINTER
112 frame = frame->next_frame; 112 frame = frame->next_frame;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1b03a1..5cb436acd463 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,130 +2,66 @@
2#define _ASM_X86_SWITCH_TO_H 2#define _ASM_X86_SWITCH_TO_H
3 3
4struct task_struct; /* one of the stranger aspects of C forward declarations */ 4struct task_struct; /* one of the stranger aspects of C forward declarations */
5
6struct task_struct *__switch_to_asm(struct task_struct *prev,
7 struct task_struct *next);
8
5__visible struct task_struct *__switch_to(struct task_struct *prev, 9__visible struct task_struct *__switch_to(struct task_struct *prev,
6 struct task_struct *next); 10 struct task_struct *next);
7struct tss_struct; 11struct tss_struct;
8void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 12void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
9 struct tss_struct *tss); 13 struct tss_struct *tss);
10 14
11#ifdef CONFIG_X86_32 15/* This runs runs on the previous thread's stack. */
16static inline void prepare_switch_to(struct task_struct *prev,
17 struct task_struct *next)
18{
19#ifdef CONFIG_VMAP_STACK
20 /*
21 * If we switch to a stack that has a top-level paging entry
22 * that is not present in the current mm, the resulting #PF will
23 * will be promoted to a double-fault and we'll panic. Probe
24 * the new stack now so that vmalloc_fault can fix up the page
25 * tables if needed. This can only happen if we use a stack
26 * in vmap space.
27 *
28 * We assume that the stack is aligned so that it never spans
29 * more than one top-level paging entry.
30 *
31 * To minimize cache pollution, just follow the stack pointer.
32 */
33 READ_ONCE(*(unsigned char *)next->thread.sp);
34#endif
35}
36
37asmlinkage void ret_from_fork(void);
38
39/* data that is pointed to by thread.sp */
40struct inactive_task_frame {
41#ifdef CONFIG_X86_64
42 unsigned long r15;
43 unsigned long r14;
44 unsigned long r13;
45 unsigned long r12;
46#else
47 unsigned long si;
48 unsigned long di;
49#endif
50 unsigned long bx;
51 unsigned long bp;
52 unsigned long ret_addr;
53};
12 54
13#ifdef CONFIG_CC_STACKPROTECTOR 55struct fork_frame {
14#define __switch_canary \ 56 struct inactive_task_frame frame;
15 "movl %P[task_canary](%[next]), %%ebx\n\t" \ 57 struct pt_regs regs;
16 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" 58};
17#define __switch_canary_oparam \
18 , [stack_canary] "=m" (stack_canary.canary)
19#define __switch_canary_iparam \
20 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
21#else /* CC_STACKPROTECTOR */
22#define __switch_canary
23#define __switch_canary_oparam
24#define __switch_canary_iparam
25#endif /* CC_STACKPROTECTOR */
26 59
27/*
28 * Saving eflags is important. It switches not only IOPL between tasks,
29 * it also protects other tasks from NT leaking through sysenter etc.
30 */
31#define switch_to(prev, next, last) \ 60#define switch_to(prev, next, last) \
32do { \ 61do { \
33 /* \ 62 prepare_switch_to(prev, next); \
34 * Context-switching clobbers all registers, so we clobber \
35 * them explicitly, via unused output variables. \
36 * (EAX and EBP is not listed because EBP is saved/restored \
37 * explicitly for wchan access and EAX is the return value of \
38 * __switch_to()) \
39 */ \
40 unsigned long ebx, ecx, edx, esi, edi; \
41 \
42 asm volatile("pushl %%ebp\n\t" /* save EBP */ \
43 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
44 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
45 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
46 "pushl %[next_ip]\n\t" /* restore EIP */ \
47 __switch_canary \
48 "jmp __switch_to\n" /* regparm call */ \
49 "1:\t" \
50 "popl %%ebp\n\t" /* restore EBP */ \
51 \
52 /* output parameters */ \
53 : [prev_sp] "=m" (prev->thread.sp), \
54 [prev_ip] "=m" (prev->thread.ip), \
55 "=a" (last), \
56 \
57 /* clobbered output registers: */ \
58 "=b" (ebx), "=c" (ecx), "=d" (edx), \
59 "=S" (esi), "=D" (edi) \
60 \
61 __switch_canary_oparam \
62 \
63 /* input parameters: */ \
64 : [next_sp] "m" (next->thread.sp), \
65 [next_ip] "m" (next->thread.ip), \
66 \
67 /* regparm parameters for __switch_to(): */ \
68 [prev] "a" (prev), \
69 [next] "d" (next) \
70 \ 63 \
71 __switch_canary_iparam \ 64 ((last) = __switch_to_asm((prev), (next))); \
72 \
73 : /* reloaded segment registers */ \
74 "memory"); \
75} while (0) 65} while (0)
76 66
77#else /* CONFIG_X86_32 */
78
79/* frame pointer must be last for get_wchan */
80#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
81#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
82
83#define __EXTRA_CLOBBER \
84 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
85 "r12", "r13", "r14", "r15", "flags"
86
87#ifdef CONFIG_CC_STACKPROTECTOR
88#define __switch_canary \
89 "movq %P[task_canary](%%rsi),%%r8\n\t" \
90 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
91#define __switch_canary_oparam \
92 , [gs_canary] "=m" (irq_stack_union.stack_canary)
93#define __switch_canary_iparam \
94 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
95#else /* CC_STACKPROTECTOR */
96#define __switch_canary
97#define __switch_canary_oparam
98#define __switch_canary_iparam
99#endif /* CC_STACKPROTECTOR */
100
101/*
102 * There is no need to save or restore flags, because flags are always
103 * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
104 * has no effect.
105 */
106#define switch_to(prev, next, last) \
107 asm volatile(SAVE_CONTEXT \
108 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
109 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
110 "call __switch_to\n\t" \
111 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
112 __switch_canary \
113 "movq %P[thread_info](%%rsi),%%r8\n\t" \
114 "movq %%rax,%%rdi\n\t" \
115 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
116 "jnz ret_from_fork\n\t" \
117 RESTORE_CONTEXT \
118 : "=a" (last) \
119 __switch_canary_oparam \
120 : [next] "S" (next), [prev] "D" (prev), \
121 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
122 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
123 [_tif_fork] "i" (_TIF_FORK), \
124 [thread_info] "i" (offsetof(struct task_struct, stack)), \
125 [current_task] "m" (current_task) \
126 __switch_canary_iparam \
127 : "memory", "cc" __EXTRA_CLOBBER)
128
129#endif /* CONFIG_X86_32 */
130
131#endif /* _ASM_X86_SWITCH_TO_H */ 67#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4e23dd15c661..e3c95e8e61c5 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
60 * TS_COMPAT is set for 32-bit syscall entries and then 60 * TS_COMPAT is set for 32-bit syscall entries and then
61 * remains set until we return to user mode. 61 * remains set until we return to user mode.
62 */ 62 */
63 if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED)) 63 if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
64 /* 64 /*
65 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO 65 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
66 * and will match correctly in comparisons. 66 * and will match correctly in comparisons.
@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
116 unsigned long *args) 116 unsigned long *args)
117{ 117{
118# ifdef CONFIG_IA32_EMULATION 118# ifdef CONFIG_IA32_EMULATION
119 if (task_thread_info(task)->status & TS_COMPAT) 119 if (task->thread.status & TS_COMPAT)
120 switch (i) { 120 switch (i) {
121 case 0: 121 case 0:
122 if (!n--) break; 122 if (!n--) break;
@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
177 const unsigned long *args) 177 const unsigned long *args)
178{ 178{
179# ifdef CONFIG_IA32_EMULATION 179# ifdef CONFIG_IA32_EMULATION
180 if (task_thread_info(task)->status & TS_COMPAT) 180 if (task->thread.status & TS_COMPAT)
181 switch (i) { 181 switch (i) {
182 case 0: 182 case 0:
183 if (!n--) break; 183 if (!n--) break;
@@ -234,18 +234,8 @@ static inline void syscall_set_arguments(struct task_struct *task,
234 234
235static inline int syscall_get_arch(void) 235static inline int syscall_get_arch(void)
236{ 236{
237#ifdef CONFIG_IA32_EMULATION 237 /* x32 tasks should be considered AUDIT_ARCH_X86_64. */
238 /* 238 return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
239 * TS_COMPAT is set for 32-bit syscall entry and then
240 * remains set until we return to user mode.
241 *
242 * x32 tasks should be considered AUDIT_ARCH_X86_64.
243 */
244 if (task_thread_info(current)->status & TS_COMPAT)
245 return AUDIT_ARCH_I386;
246#endif
247 /* Both x32 and x86_64 are considered "64-bit". */
248 return AUDIT_ARCH_X86_64;
249} 239}
250#endif /* CONFIG_X86_32 */ 240#endif /* CONFIG_X86_32 */
251 241
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8b7c8d8e0852..2aaca53c0974 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -52,21 +52,6 @@ struct task_struct;
52#include <asm/cpufeature.h> 52#include <asm/cpufeature.h>
53#include <linux/atomic.h> 53#include <linux/atomic.h>
54 54
55struct thread_info {
56 struct task_struct *task; /* main task structure */
57 __u32 flags; /* low level flags */
58 __u32 status; /* thread synchronous flags */
59 __u32 cpu; /* current CPU */
60};
61
62#define INIT_THREAD_INFO(tsk) \
63{ \
64 .task = &tsk, \
65 .flags = 0, \
66 .cpu = 0, \
67}
68
69#define init_thread_info (init_thread_union.thread_info)
70#define init_stack (init_thread_union.stack) 55#define init_stack (init_thread_union.stack)
71 56
72#else /* !__ASSEMBLY__ */ 57#else /* !__ASSEMBLY__ */
@@ -95,7 +80,6 @@ struct thread_info {
95#define TIF_UPROBE 12 /* breakpointed or singlestepping */ 80#define TIF_UPROBE 12 /* breakpointed or singlestepping */
96#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 81#define TIF_NOTSC 16 /* TSC is not accessible in userland */
97#define TIF_IA32 17 /* IA32 compatibility process */ 82#define TIF_IA32 17 /* IA32 compatibility process */
98#define TIF_FORK 18 /* ret_from_fork */
99#define TIF_NOHZ 19 /* in adaptive nohz mode */ 83#define TIF_NOHZ 19 /* in adaptive nohz mode */
100#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ 84#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
101#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ 85#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
@@ -119,7 +103,6 @@ struct thread_info {
119#define _TIF_UPROBE (1 << TIF_UPROBE) 103#define _TIF_UPROBE (1 << TIF_UPROBE)
120#define _TIF_NOTSC (1 << TIF_NOTSC) 104#define _TIF_NOTSC (1 << TIF_NOTSC)
121#define _TIF_IA32 (1 << TIF_IA32) 105#define _TIF_IA32 (1 << TIF_IA32)
122#define _TIF_FORK (1 << TIF_FORK)
123#define _TIF_NOHZ (1 << TIF_NOHZ) 106#define _TIF_NOHZ (1 << TIF_NOHZ)
124#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) 107#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
125#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 108#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
@@ -160,11 +143,6 @@ struct thread_info {
160 */ 143 */
161#ifndef __ASSEMBLY__ 144#ifndef __ASSEMBLY__
162 145
163static inline struct thread_info *current_thread_info(void)
164{
165 return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
166}
167
168static inline unsigned long current_stack_pointer(void) 146static inline unsigned long current_stack_pointer(void)
169{ 147{
170 unsigned long sp; 148 unsigned long sp;
@@ -226,60 +204,19 @@ static inline int arch_within_stack_frames(const void * const stack,
226# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) 204# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
227#endif 205#endif
228 206
229/*
230 * ASM operand which evaluates to a 'thread_info' address of
231 * the current task, if it is known that "reg" is exactly "off"
232 * bytes below the top of the stack currently.
233 *
234 * ( The kernel stack's size is known at build time, it is usually
235 * 2 or 4 pages, and the bottom of the kernel stack contains
236 * the thread_info structure. So to access the thread_info very
237 * quickly from assembly code we can calculate down from the
238 * top of the kernel stack to the bottom, using constant,
239 * build-time calculations only. )
240 *
241 * For example, to fetch the current thread_info->flags value into %eax
242 * on x86-64 defconfig kernels, in syscall entry code where RSP is
243 * currently at exactly SIZEOF_PTREGS bytes away from the top of the
244 * stack:
245 *
246 * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
247 *
248 * will translate to:
249 *
250 * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
251 *
252 * which is below the current RSP by almost 16K.
253 */
254#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
255
256#endif 207#endif
257 208
258/*
259 * Thread-synchronous status.
260 *
261 * This is different from the flags in that nobody else
262 * ever touches our thread-synchronous status, so we don't
263 * have to worry about atomic accesses.
264 */
265#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
266#ifdef CONFIG_COMPAT 209#ifdef CONFIG_COMPAT
267#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ 210#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
268#endif 211#endif
269
270#ifndef __ASSEMBLY__ 212#ifndef __ASSEMBLY__
271 213
272static inline bool in_ia32_syscall(void)
273{
274#ifdef CONFIG_X86_32 214#ifdef CONFIG_X86_32
275 return true; 215#define in_ia32_syscall() true
276#endif 216#else
277#ifdef CONFIG_IA32_EMULATION 217#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
278 if (current_thread_info()->status & TS_COMPAT) 218 current->thread.status & TS_COMPAT)
279 return true;
280#endif 219#endif
281 return false;
282}
283 220
284/* 221/*
285 * Force syscall return via IRET by making it look as if there was 222 * Force syscall return via IRET by making it look as if there was
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index dee8a70382ba..6fa85944af83 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
81/* Initialize cr4 shadow for this CPU. */ 81/* Initialize cr4 shadow for this CPU. */
82static inline void cr4_init_shadow(void) 82static inline void cr4_init_shadow(void)
83{ 83{
84 this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe()); 84 this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
85} 85}
86 86
87/* Set in this cpu's CR4. */ 87/* Set in this cpu's CR4. */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c3496619740a..01fd0a7f48cd 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -117,6 +117,12 @@ extern void ist_exit(struct pt_regs *regs);
117extern void ist_begin_non_atomic(struct pt_regs *regs); 117extern void ist_begin_non_atomic(struct pt_regs *regs);
118extern void ist_end_non_atomic(void); 118extern void ist_end_non_atomic(void);
119 119
120#ifdef CONFIG_VMAP_STACK
121void __noreturn handle_stack_overflow(const char *message,
122 struct pt_regs *regs,
123 unsigned long fault_address);
124#endif
125
120/* Interrupts/Exceptions */ 126/* Interrupts/Exceptions */
121enum { 127enum {
122 X86_TRAP_DE = 0, /* 0, Divide-by-zero */ 128 X86_TRAP_DE = 0, /* 0, Divide-by-zero */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
new file mode 100644
index 000000000000..c4b6d1cafa46
--- /dev/null
+++ b/arch/x86/include/asm/unwind.h
@@ -0,0 +1,73 @@
1#ifndef _ASM_X86_UNWIND_H
2#define _ASM_X86_UNWIND_H
3
4#include <linux/sched.h>
5#include <linux/ftrace.h>
6#include <asm/ptrace.h>
7#include <asm/stacktrace.h>
8
9struct unwind_state {
10 struct stack_info stack_info;
11 unsigned long stack_mask;
12 struct task_struct *task;
13 int graph_idx;
14#ifdef CONFIG_FRAME_POINTER
15 unsigned long *bp;
16#else
17 unsigned long *sp;
18#endif
19};
20
21void __unwind_start(struct unwind_state *state, struct task_struct *task,
22 struct pt_regs *regs, unsigned long *first_frame);
23
24bool unwind_next_frame(struct unwind_state *state);
25
26static inline bool unwind_done(struct unwind_state *state)
27{
28 return state->stack_info.type == STACK_TYPE_UNKNOWN;
29}
30
31static inline
32void unwind_start(struct unwind_state *state, struct task_struct *task,
33 struct pt_regs *regs, unsigned long *first_frame)
34{
35 first_frame = first_frame ? : get_stack_pointer(task, regs);
36
37 __unwind_start(state, task, regs, first_frame);
38}
39
40#ifdef CONFIG_FRAME_POINTER
41
42static inline
43unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
44{
45 if (unwind_done(state))
46 return NULL;
47
48 return state->bp + 1;
49}
50
51unsigned long unwind_get_return_address(struct unwind_state *state);
52
53#else /* !CONFIG_FRAME_POINTER */
54
55static inline
56unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
57{
58 return NULL;
59}
60
61static inline
62unsigned long unwind_get_return_address(struct unwind_state *state)
63{
64 if (unwind_done(state))
65 return 0;
66
67 return ftrace_graph_ret_addr(state->task, &state->graph_idx,
68 *state->sp, state->sp);
69}
70
71#endif /* CONFIG_FRAME_POINTER */
72
73#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0503f5bfb18d..45257cf84370 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
125obj-$(CONFIG_PERF_EVENTS) += perf_regs.o 125obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
126obj-$(CONFIG_TRACING) += tracepoint.o 126obj-$(CONFIG_TRACING) += tracepoint.o
127 127
128ifdef CONFIG_FRAME_POINTER
129obj-y += unwind_frame.o
130else
131obj-y += unwind_guess.o
132endif
133
128### 134###
129# 64 bit specific files 135# 64 bit specific files
130ifeq ($(CONFIG_X86_64),y) 136ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index adb3eaf8fe2a..48587335ede8 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void)
99 saved_magic = 0x12345678; 99 saved_magic = 0x12345678;
100#else /* CONFIG_64BIT */ 100#else /* CONFIG_64BIT */
101#ifdef CONFIG_SMP 101#ifdef CONFIG_SMP
102 stack_start = (unsigned long)temp_stack + sizeof(temp_stack); 102 initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
103 early_gdt_descr.address = 103 early_gdt_descr.address =
104 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 104 (unsigned long)get_cpu_gdt_table(smp_processor_id());
105 initial_gs = per_cpu_offset(smp_processor_id()); 105 initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5b2ae106bd4a..8862da76ef6f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -25,7 +25,7 @@
25static struct apic apic_physflat; 25static struct apic apic_physflat;
26static struct apic apic_flat; 26static struct apic apic_flat;
27 27
28struct apic __read_mostly *apic = &apic_flat; 28struct apic *apic __ro_after_init = &apic_flat;
29EXPORT_SYMBOL_GPL(apic); 29EXPORT_SYMBOL_GPL(apic);
30 30
31static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 31static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -154,7 +154,7 @@ static int flat_probe(void)
154 return 1; 154 return 1;
155} 155}
156 156
157static struct apic apic_flat = { 157static struct apic apic_flat __ro_after_init = {
158 .name = "flat", 158 .name = "flat",
159 .probe = flat_probe, 159 .probe = flat_probe,
160 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 160 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
@@ -248,7 +248,7 @@ static int physflat_probe(void)
248 return 0; 248 return 0;
249} 249}
250 250
251static struct apic apic_physflat = { 251static struct apic apic_physflat __ro_after_init = {
252 252
253 .name = "physical flat", 253 .name = "physical flat",
254 .probe = physflat_probe, 254 .probe = physflat_probe,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index c05688b2deff..b109e4389c92 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v)
108 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); 108 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
109} 109}
110 110
111struct apic apic_noop = { 111struct apic apic_noop __ro_after_init = {
112 .name = "noop", 112 .name = "noop",
113 .probe = noop_probe, 113 .probe = noop_probe,
114 .acpi_madt_oem_check = NULL, 114 .acpi_madt_oem_check = NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 06dbaa458bfe..56012010332c 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -142,7 +142,7 @@ static int probe_bigsmp(void)
142 return dmi_bigsmp; 142 return dmi_bigsmp;
143} 143}
144 144
145static struct apic apic_bigsmp = { 145static struct apic apic_bigsmp __ro_after_init = {
146 146
147 .name = "bigsmp", 147 .name = "bigsmp",
148 .probe = probe_bigsmp, 148 .probe = probe_bigsmp,
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index ade25320df96..015bbf30e3e3 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
269 hpet_msi_write(irq_data_get_irq_handler_data(data), msg); 269 hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
270} 270}
271 271
272static struct irq_chip hpet_msi_controller = { 272static struct irq_chip hpet_msi_controller __ro_after_init = {
273 .name = "HPET-MSI", 273 .name = "HPET-MSI",
274 .irq_unmask = hpet_msi_unmask, 274 .irq_unmask = hpet_msi_unmask,
275 .irq_mask = hpet_msi_mask, 275 .irq_mask = hpet_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 563096267ca2..c48264e202fd 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -72,7 +72,7 @@ static int probe_default(void)
72 return 1; 72 return 1;
73} 73}
74 74
75static struct apic apic_default = { 75static struct apic apic_default __ro_after_init = {
76 76
77 .name = "default", 77 .name = "default",
78 .probe = probe_default, 78 .probe = probe_default,
@@ -126,7 +126,7 @@ static struct apic apic_default = {
126 126
127apic_driver(apic_default); 127apic_driver(apic_default);
128 128
129struct apic *apic = &apic_default; 129struct apic *apic __ro_after_init = &apic_default;
130EXPORT_SYMBOL_GPL(apic); 130EXPORT_SYMBOL_GPL(apic);
131 131
132static int cmdline_apic __initdata; 132static int cmdline_apic __initdata;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 54f35d988025..200af5ae9662 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
227 cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); 227 cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
228} 228}
229 229
230static struct apic apic_x2apic_cluster = { 230static struct apic apic_x2apic_cluster __ro_after_init = {
231 231
232 .name = "cluster x2apic", 232 .name = "cluster x2apic",
233 .probe = x2apic_cluster_probe, 233 .probe = x2apic_cluster_probe,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 4f13f54f1b1f..ff111f05a314 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -98,7 +98,7 @@ static int x2apic_phys_probe(void)
98 return apic == &apic_x2apic_phys; 98 return apic == &apic_x2apic_phys;
99} 99}
100 100
101static struct apic apic_x2apic_phys = { 101static struct apic apic_x2apic_phys __ro_after_init = {
102 102
103 .name = "physical x2apic", 103 .name = "physical x2apic",
104 .probe = x2apic_phys_probe, 104 .probe = x2apic_phys_probe,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index cb0673c1e940..b9f6157d4271 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -560,7 +560,7 @@ static int uv_probe(void)
560 return apic == &apic_x2apic_uv_x; 560 return apic == &apic_x2apic_uv_x;
561} 561}
562 562
563static struct apic __refdata apic_x2apic_uv_x = { 563static struct apic apic_x2apic_uv_x __ro_after_init = {
564 564
565 .name = "UV large system", 565 .name = "UV large system",
566 .probe = uv_probe, 566 .probe = uv_probe,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 2bd5c6ff7ee7..c62e015b126c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -29,10 +29,13 @@
29 29
30void common(void) { 30void common(void) {
31 BLANK(); 31 BLANK();
32 OFFSET(TI_flags, thread_info, flags); 32 OFFSET(TASK_threadsp, task_struct, thread.sp);
33 OFFSET(TI_status, thread_info, status); 33#ifdef CONFIG_CC_STACKPROTECTOR
34 OFFSET(TASK_stack_canary, task_struct, stack_canary);
35#endif
34 36
35 BLANK(); 37 BLANK();
38 OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
36 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); 39 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
37 40
38 BLANK(); 41 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ecdc1d217dc0..880aa093268d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@ void foo(void)
57 /* Size of SYSENTER_stack */ 57 /* Size of SYSENTER_stack */
58 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); 58 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
59 59
60#ifdef CONFIG_CC_STACKPROTECTOR
61 BLANK();
62 OFFSET(stack_canary_offset, stack_canary, canary);
63#endif
64
60#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) 65#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
61 BLANK(); 66 BLANK();
62 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); 67 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d875f97d4e0b..210927ee2e74 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@ int main(void)
56 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 56 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
57 BLANK(); 57 BLANK();
58 58
59#ifdef CONFIG_CC_STACKPROTECTOR
60 DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
61 BLANK();
62#endif
63
59 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); 64 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
60 DEFINE(NR_syscalls, sizeof(syscalls_64)); 65 DEFINE(NR_syscalls, sizeof(syscalls_64));
61 66
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bcc9ccc220c9..9bd910a7dd0a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1264,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg)
1264__setup("clearcpuid=", setup_disablecpuid); 1264__setup("clearcpuid=", setup_disablecpuid);
1265 1265
1266#ifdef CONFIG_X86_64 1266#ifdef CONFIG_X86_64
1267struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1267struct desc_ptr idt_descr __ro_after_init = {
1268struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, 1268 .size = NR_VECTORS * 16 - 1,
1269 (unsigned long) debug_idt_table }; 1269 .address = (unsigned long) idt_table,
1270};
1271const struct desc_ptr debug_idt_descr = {
1272 .size = NR_VECTORS * 16 - 1,
1273 .address = (unsigned long) debug_idt_table,
1274};
1270 1275
1271DEFINE_PER_CPU_FIRST(union irq_stack_union, 1276DEFINE_PER_CPU_FIRST(union irq_stack_union,
1272 irq_stack_union) __aligned(PAGE_SIZE) __visible; 1277 irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1280,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
1280EXPORT_PER_CPU_SYMBOL(current_task); 1285EXPORT_PER_CPU_SYMBOL(current_task);
1281 1286
1282DEFINE_PER_CPU(char *, irq_stack_ptr) = 1287DEFINE_PER_CPU(char *, irq_stack_ptr) =
1283 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 1288 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
1284 1289
1285DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; 1290DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1286 1291
@@ -1304,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1304/* May not be marked __init: used by software suspend */ 1309/* May not be marked __init: used by software suspend */
1305void syscall_init(void) 1310void syscall_init(void)
1306{ 1311{
1307 /*
1308 * LSTAR and STAR live in a bit strange symbiosis.
1309 * They both write to the same internal register. STAR allows to
1310 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
1311 */
1312 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1312 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1313 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1313 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1314 1314
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 28f1b54b7fad..24e87e74990d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex);
72u64 size_or_mask, size_and_mask; 72u64 size_or_mask, size_and_mask;
73static bool mtrr_aps_delayed_init; 73static bool mtrr_aps_delayed_init;
74 74
75static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 75static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
76 76
77const struct mtrr_ops *mtrr_if; 77const struct mtrr_ops *mtrr_if;
78 78
79static void set_mtrr(unsigned int reg, unsigned long base, 79static void set_mtrr(unsigned int reg, unsigned long base,
80 unsigned long size, mtrr_type type); 80 unsigned long size, mtrr_type type);
81 81
82void set_mtrr_ops(const struct mtrr_ops *ops) 82void __init set_mtrr_ops(const struct mtrr_ops *ops)
83{ 83{
84 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 84 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
85 mtrr_ops[ops->vendor] = ops; 85 mtrr_ops[ops->vendor] = ops;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 6c7ced07d16d..ad8bd763efa5 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index,
54bool get_mtrr_state(void); 54bool get_mtrr_state(void);
55void mtrr_bp_pat_init(void); 55void mtrr_bp_pat_init(void);
56 56
57extern void set_mtrr_ops(const struct mtrr_ops *ops); 57extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
58 58
59extern u64 size_or_mask, size_and_mask; 59extern u64 size_or_mask, size_and_mask;
60extern const struct mtrr_ops *mtrr_if; 60extern const struct mtrr_ops *mtrr_if;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 92e8f0a7159c..9b7cf5c28f5f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,7 +17,7 @@
17#include <linux/sysfs.h> 17#include <linux/sysfs.h>
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20#include <asm/unwind.h>
21 21
22int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
23int panic_on_io_nmi; 23int panic_on_io_nmi;
@@ -25,11 +25,29 @@ unsigned int code_bytes = 64;
25int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; 25int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
26static int die_counter; 26static int die_counter;
27 27
28bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info)
30{
31 unsigned long *begin = task_stack_page(task);
32 unsigned long *end = task_stack_page(task) + THREAD_SIZE;
33
34 if (stack < begin || stack >= end)
35 return false;
36
37 info->type = STACK_TYPE_TASK;
38 info->begin = begin;
39 info->end = end;
40 info->next_sp = NULL;
41
42 return true;
43}
44
28static void printk_stack_address(unsigned long address, int reliable, 45static void printk_stack_address(unsigned long address, int reliable,
29 void *data) 46 char *log_lvl)
30{ 47{
48 touch_nmi_watchdog();
31 printk("%s [<%p>] %s%pB\n", 49 printk("%s [<%p>] %s%pB\n",
32 (char *)data, (void *)address, reliable ? "" : "? ", 50 log_lvl, (void *)address, reliable ? "" : "? ",
33 (void *)address); 51 (void *)address);
34} 52}
35 53
@@ -38,176 +56,120 @@ void printk_address(unsigned long address)
38 pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); 56 pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
39} 57}
40 58
41#ifdef CONFIG_FUNCTION_GRAPH_TRACER 59void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
42static void 60 unsigned long *stack, char *log_lvl)
43print_ftrace_graph_addr(unsigned long addr, void *data,
44 const struct stacktrace_ops *ops,
45 struct task_struct *task, int *graph)
46{ 61{
47 unsigned long ret_addr; 62 struct unwind_state state;
48 int index; 63 struct stack_info stack_info = {0};
49 64 unsigned long visit_mask = 0;
50 if (addr != (unsigned long)return_to_handler) 65 int graph_idx = 0;
51 return;
52
53 index = task->curr_ret_stack;
54
55 if (!task->ret_stack || index < *graph)
56 return;
57
58 index -= *graph;
59 ret_addr = task->ret_stack[index].ret;
60
61 ops->address(data, ret_addr, 1);
62 66
63 (*graph)++; 67 printk("%sCall Trace:\n", log_lvl);
64}
65#else
66static inline void
67print_ftrace_graph_addr(unsigned long addr, void *data,
68 const struct stacktrace_ops *ops,
69 struct task_struct *task, int *graph)
70{ }
71#endif
72
73/*
74 * x86-64 can have up to three kernel stacks:
75 * process stack
76 * interrupt stack
77 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
78 */
79
80static inline int valid_stack_ptr(struct task_struct *task,
81 void *p, unsigned int size, void *end)
82{
83 void *t = task_stack_page(task);
84 if (end) {
85 if (p < end && p >= (end-THREAD_SIZE))
86 return 1;
87 else
88 return 0;
89 }
90 return p >= t && p < t + THREAD_SIZE - size;
91}
92 68
93unsigned long 69 unwind_start(&state, task, regs, stack);
94print_context_stack(struct task_struct *task,
95 unsigned long *stack, unsigned long bp,
96 const struct stacktrace_ops *ops, void *data,
97 unsigned long *end, int *graph)
98{
99 struct stack_frame *frame = (struct stack_frame *)bp;
100 70
101 /* 71 /*
102 * If we overflowed the stack into a guard page, jump back to the 72 * Iterate through the stacks, starting with the current stack pointer.
103 * bottom of the usable stack. 73 * Each stack has a pointer to the next one.
74 *
75 * x86-64 can have several stacks:
76 * - task stack
77 * - interrupt stack
78 * - HW exception stacks (double fault, nmi, debug, mce)
79 *
80 * x86-32 can have up to three stacks:
81 * - task stack
82 * - softirq stack
83 * - hardirq stack
104 */ 84 */
105 if ((unsigned long)task_stack_page(task) - (unsigned long)stack < 85 for (; stack; stack = stack_info.next_sp) {
106 PAGE_SIZE) 86 const char *str_begin, *str_end;
107 stack = (unsigned long *)task_stack_page(task);
108
109 while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
110 unsigned long addr;
111
112 addr = *stack;
113 if (__kernel_text_address(addr)) {
114 if ((unsigned long) stack == bp + sizeof(long)) {
115 ops->address(data, addr, 1);
116 frame = frame->next_frame;
117 bp = (unsigned long) frame;
118 } else {
119 ops->address(data, addr, 0);
120 }
121 print_ftrace_graph_addr(addr, data, ops, task, graph);
122 }
123 stack++;
124 }
125 return bp;
126}
127EXPORT_SYMBOL_GPL(print_context_stack);
128
129unsigned long
130print_context_stack_bp(struct task_struct *task,
131 unsigned long *stack, unsigned long bp,
132 const struct stacktrace_ops *ops, void *data,
133 unsigned long *end, int *graph)
134{
135 struct stack_frame *frame = (struct stack_frame *)bp;
136 unsigned long *ret_addr = &frame->return_address;
137 87
138 while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { 88 /*
139 unsigned long addr = *ret_addr; 89 * If we overflowed the task stack into a guard page, jump back
90 * to the bottom of the usable stack.
91 */
92 if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
93 stack = task_stack_page(task);
140 94
141 if (!__kernel_text_address(addr)) 95 if (get_stack_info(stack, task, &stack_info, &visit_mask))
142 break; 96 break;
143 97
144 if (ops->address(data, addr, 1)) 98 stack_type_str(stack_info.type, &str_begin, &str_end);
145 break; 99 if (str_begin)
146 frame = frame->next_frame; 100 printk("%s <%s> ", log_lvl, str_begin);
147 ret_addr = &frame->return_address; 101
148 print_ftrace_graph_addr(addr, data, ops, task, graph); 102 /*
149 } 103 * Scan the stack, printing any text addresses we find. At the
150 104 * same time, follow proper stack frames with the unwinder.
151 return (unsigned long)frame; 105 *
152} 106 * Addresses found during the scan which are not reported by
153EXPORT_SYMBOL_GPL(print_context_stack_bp); 107 * the unwinder are considered to be additional clues which are
154 108 * sometimes useful for debugging and are prefixed with '?'.
155static int print_trace_stack(void *data, char *name) 109 * This also serves as a failsafe option in case the unwinder
156{ 110 * goes off in the weeds.
157 printk("%s <%s> ", (char *)data, name); 111 */
158 return 0; 112 for (; stack < stack_info.end; stack++) {
159} 113 unsigned long real_addr;
160 114 int reliable = 0;
161/* 115 unsigned long addr = *stack;
162 * Print one address/symbol entries per line. 116 unsigned long *ret_addr_p =
163 */ 117 unwind_get_return_address_ptr(&state);
164static int print_trace_address(void *data, unsigned long addr, int reliable) 118
165{ 119 if (!__kernel_text_address(addr))
166 touch_nmi_watchdog(); 120 continue;
167 printk_stack_address(addr, reliable, data); 121
168 return 0; 122 if (stack == ret_addr_p)
169} 123 reliable = 1;
170 124
171static const struct stacktrace_ops print_trace_ops = { 125 /*
172 .stack = print_trace_stack, 126 * When function graph tracing is enabled for a
173 .address = print_trace_address, 127 * function, its return address on the stack is
174 .walk_stack = print_context_stack, 128 * replaced with the address of an ftrace handler
175}; 129 * (return_to_handler). In that case, before printing
176 130 * the "real" address, we want to print the handler
177void 131 * address as an "unreliable" hint that function graph
178show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 132 * tracing was involved.
179 unsigned long *stack, unsigned long bp, char *log_lvl) 133 */
180{ 134 real_addr = ftrace_graph_ret_addr(task, &graph_idx,
181 printk("%sCall Trace:\n", log_lvl); 135 addr, stack);
182 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 136 if (real_addr != addr)
183} 137 printk_stack_address(addr, 0, log_lvl);
138 printk_stack_address(real_addr, reliable, log_lvl);
139
140 if (!reliable)
141 continue;
142
143 /*
144 * Get the next frame from the unwinder. No need to
145 * check for an error: if anything goes wrong, the rest
146 * of the addresses will just be printed as unreliable.
147 */
148 unwind_next_frame(&state);
149 }
184 150
185void show_trace(struct task_struct *task, struct pt_regs *regs, 151 if (str_end)
186 unsigned long *stack, unsigned long bp) 152 printk("%s <%s> ", log_lvl, str_end);
187{ 153 }
188 show_trace_log_lvl(task, regs, stack, bp, "");
189} 154}
190 155
191void show_stack(struct task_struct *task, unsigned long *sp) 156void show_stack(struct task_struct *task, unsigned long *sp)
192{ 157{
193 unsigned long bp = 0; 158 task = task ? : current;
194 unsigned long stack;
195 159
196 /* 160 /*
197 * Stack frames below this one aren't interesting. Don't show them 161 * Stack frames below this one aren't interesting. Don't show them
198 * if we're printing for %current. 162 * if we're printing for %current.
199 */ 163 */
200 if (!sp && (!task || task == current)) { 164 if (!sp && task == current)
201 sp = &stack; 165 sp = get_stack_pointer(current, NULL);
202 bp = stack_frame(current, NULL);
203 }
204 166
205 show_stack_log_lvl(task, NULL, sp, bp, ""); 167 show_stack_log_lvl(task, NULL, sp, "");
206} 168}
207 169
208void show_stack_regs(struct pt_regs *regs) 170void show_stack_regs(struct pt_regs *regs)
209{ 171{
210 show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); 172 show_stack_log_lvl(current, regs, NULL, "");
211} 173}
212 174
213static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; 175static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 09675712eba8..06eb322b5f9f 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,93 +16,121 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19static void *is_irq_stack(void *p, void *irq) 19void stack_type_str(enum stack_type type, const char **begin, const char **end)
20{ 20{
21 if (p < irq || p >= (irq + THREAD_SIZE)) 21 switch (type) {
22 return NULL; 22 case STACK_TYPE_IRQ:
23 return irq + THREAD_SIZE; 23 case STACK_TYPE_SOFTIRQ:
24 *begin = "IRQ";
25 *end = "EOI";
26 break;
27 default:
28 *begin = NULL;
29 *end = NULL;
30 }
24} 31}
25 32
26 33static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
27static void *is_hardirq_stack(unsigned long *stack, int cpu)
28{ 34{
29 void *irq = per_cpu(hardirq_stack, cpu); 35 unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
36 unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
30 37
31 return is_irq_stack(stack, irq); 38 /*
32} 39 * This is a software stack, so 'end' can be a valid stack pointer.
40 * It just means the stack is empty.
41 */
42 if (stack < begin || stack > end)
43 return false;
33 44
34static void *is_softirq_stack(unsigned long *stack, int cpu) 45 info->type = STACK_TYPE_IRQ;
35{ 46 info->begin = begin;
36 void *irq = per_cpu(softirq_stack, cpu); 47 info->end = end;
37 48
38 return is_irq_stack(stack, irq); 49 /*
50 * See irq_32.c -- the next stack pointer is stored at the beginning of
51 * the stack.
52 */
53 info->next_sp = (unsigned long *)*begin;
54
55 return true;
39} 56}
40 57
41void dump_trace(struct task_struct *task, struct pt_regs *regs, 58static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
42 unsigned long *stack, unsigned long bp,
43 const struct stacktrace_ops *ops, void *data)
44{ 59{
45 const unsigned cpu = get_cpu(); 60 unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
46 int graph = 0; 61 unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
47 u32 *prev_esp;
48 62
49 if (!task) 63 /*
50 task = current; 64 * This is a software stack, so 'end' can be a valid stack pointer.
65 * It just means the stack is empty.
66 */
67 if (stack < begin || stack > end)
68 return false;
51 69
52 if (!stack) { 70 info->type = STACK_TYPE_SOFTIRQ;
53 unsigned long dummy; 71 info->begin = begin;
72 info->end = end;
54 73
55 stack = &dummy; 74 /*
56 if (task != current) 75 * The next stack pointer is stored at the beginning of the stack.
57 stack = (unsigned long *)task->thread.sp; 76 * See irq_32.c.
58 } 77 */
78 info->next_sp = (unsigned long *)*begin;
59 79
60 if (!bp) 80 return true;
61 bp = stack_frame(task, regs); 81}
62 82
63 for (;;) { 83int get_stack_info(unsigned long *stack, struct task_struct *task,
64 void *end_stack; 84 struct stack_info *info, unsigned long *visit_mask)
85{
86 if (!stack)
87 goto unknown;
65 88
66 end_stack = is_hardirq_stack(stack, cpu); 89 task = task ? : current;
67 if (!end_stack)
68 end_stack = is_softirq_stack(stack, cpu);
69 90
70 bp = ops->walk_stack(task, stack, bp, ops, data, 91 if (in_task_stack(stack, task, info))
71 end_stack, &graph); 92 goto recursion_check;
72 93
73 /* Stop if not on irq stack */ 94 if (task != current)
74 if (!end_stack) 95 goto unknown;
75 break;
76 96
77 /* The previous esp is saved on the bottom of the stack */ 97 if (in_hardirq_stack(stack, info))
78 prev_esp = (u32 *)(end_stack - THREAD_SIZE); 98 goto recursion_check;
79 stack = (unsigned long *)*prev_esp;
80 if (!stack)
81 break;
82 99
83 if (ops->stack(data, "IRQ") < 0) 100 if (in_softirq_stack(stack, info))
84 break; 101 goto recursion_check;
85 touch_nmi_watchdog(); 102
103 goto unknown;
104
105recursion_check:
106 /*
107 * Make sure we don't iterate through any given stack more than once.
108 * If it comes up a second time then there's something wrong going on:
109 * just break out and report an unknown stack type.
110 */
111 if (visit_mask) {
112 if (*visit_mask & (1UL << info->type))
113 goto unknown;
114 *visit_mask |= 1UL << info->type;
86 } 115 }
87 put_cpu(); 116
117 return 0;
118
119unknown:
120 info->type = STACK_TYPE_UNKNOWN;
121 return -EINVAL;
88} 122}
89EXPORT_SYMBOL(dump_trace);
90 123
91void 124void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 125 unsigned long *sp, char *log_lvl)
93 unsigned long *sp, unsigned long bp, char *log_lvl)
94{ 126{
95 unsigned long *stack; 127 unsigned long *stack;
96 int i; 128 int i;
97 129
98 if (sp == NULL) { 130 if (!try_get_task_stack(task))
99 if (regs) 131 return;
100 sp = (unsigned long *)regs->sp; 132
101 else if (task) 133 sp = sp ? : get_stack_pointer(task, regs);
102 sp = (unsigned long *)task->thread.sp;
103 else
104 sp = (unsigned long *)&sp;
105 }
106 134
107 stack = sp; 135 stack = sp;
108 for (i = 0; i < kstack_depth_to_print; i++) { 136 for (i = 0; i < kstack_depth_to_print; i++) {
@@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
117 touch_nmi_watchdog(); 145 touch_nmi_watchdog();
118 } 146 }
119 pr_cont("\n"); 147 pr_cont("\n");
120 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 148 show_trace_log_lvl(task, regs, sp, log_lvl);
149
150 put_task_stack(task);
121} 151}
122 152
123 153
@@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs)
139 u8 *ip; 169 u8 *ip;
140 170
141 pr_emerg("Stack:\n"); 171 pr_emerg("Stack:\n");
142 show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG); 172 show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
143 173
144 pr_emerg("Code:"); 174 pr_emerg("Code:");
145 175
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 9ee4520ce83c..36cf1a498227 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,261 +16,145 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19static char *exception_stack_names[N_EXCEPTION_STACKS] = {
20 [ DOUBLEFAULT_STACK-1 ] = "#DF",
21 [ NMI_STACK-1 ] = "NMI",
22 [ DEBUG_STACK-1 ] = "#DB",
23 [ MCE_STACK-1 ] = "#MC",
24};
19 25
20#define N_EXCEPTION_STACKS_END \ 26static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 27 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
22 28 [DEBUG_STACK - 1] = DEBUG_STKSZ
23static char x86_stack_ids[][8] = {
24 [ DEBUG_STACK-1 ] = "#DB",
25 [ NMI_STACK-1 ] = "NMI",
26 [ DOUBLEFAULT_STACK-1 ] = "#DF",
27 [ MCE_STACK-1 ] = "#MC",
28#if DEBUG_STKSZ > EXCEPTION_STKSZ
29 [ N_EXCEPTION_STACKS ...
30 N_EXCEPTION_STACKS_END ] = "#DB[?]"
31#endif
32}; 29};
33 30
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 31void stack_type_str(enum stack_type type, const char **begin, const char **end)
35 unsigned *usedp, char **idp)
36{ 32{
37 unsigned k; 33 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
38 34
39 /* 35 switch (type) {
40 * Iterate over all exception stacks, and figure out whether 36 case STACK_TYPE_IRQ:
41 * 'stack' is in one of them: 37 *begin = "IRQ";
42 */ 38 *end = "EOI";
43 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 39 break;
44 unsigned long end = per_cpu(orig_ist, cpu).ist[k]; 40 case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
45 /* 41 *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
46 * Is 'stack' above this exception frame's end? 42 *end = "EOE";
47 * If yes then skip to the next frame. 43 break;
48 */ 44 default:
49 if (stack >= end) 45 *begin = NULL;
50 continue; 46 *end = NULL;
51 /*
52 * Is 'stack' above this exception frame's start address?
53 * If yes then we found the right frame.
54 */
55 if (stack >= end - EXCEPTION_STKSZ) {
56 /*
57 * Make sure we only iterate through an exception
58 * stack once. If it comes up for the second time
59 * then there's something wrong going on - just
60 * break out and return NULL:
61 */
62 if (*usedp & (1U << k))
63 break;
64 *usedp |= 1U << k;
65 *idp = x86_stack_ids[k];
66 return (unsigned long *)end;
67 }
68 /*
69 * If this is a debug stack, and if it has a larger size than
70 * the usual exception stacks, then 'stack' might still
71 * be within the lower portion of the debug stack:
72 */
73#if DEBUG_STKSZ > EXCEPTION_STKSZ
74 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
75 unsigned j = N_EXCEPTION_STACKS - 1;
76
77 /*
78 * Black magic. A large debug stack is composed of
79 * multiple exception stack entries, which we
80 * iterate through now. Dont look:
81 */
82 do {
83 ++j;
84 end -= EXCEPTION_STKSZ;
85 x86_stack_ids[j][4] = '1' +
86 (j - N_EXCEPTION_STACKS);
87 } while (stack < end - EXCEPTION_STKSZ);
88 if (*usedp & (1U << j))
89 break;
90 *usedp |= 1U << j;
91 *idp = x86_stack_ids[j];
92 return (unsigned long *)end;
93 }
94#endif
95 } 47 }
96 return NULL;
97} 48}
98 49
99static inline int 50static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
100in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
101 unsigned long *irq_stack_end)
102{ 51{
103 return (stack >= irq_stack && stack < irq_stack_end); 52 unsigned long *begin, *end;
104} 53 struct pt_regs *regs;
105 54 unsigned k;
106static const unsigned long irq_stack_size =
107 (IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
108
109enum stack_type {
110 STACK_IS_UNKNOWN,
111 STACK_IS_NORMAL,
112 STACK_IS_EXCEPTION,
113 STACK_IS_IRQ,
114};
115
116static enum stack_type
117analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
118 unsigned long **stack_end, unsigned long *irq_stack,
119 unsigned *used, char **id)
120{
121 unsigned long addr;
122 55
123 addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); 56 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
124 if ((unsigned long)task_stack_page(task) == addr)
125 return STACK_IS_NORMAL;
126 57
127 *stack_end = in_exception_stack(cpu, (unsigned long)stack, 58 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
128 used, id); 59 end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
129 if (*stack_end) 60 begin = end - (exception_stack_sizes[k] / sizeof(long));
130 return STACK_IS_EXCEPTION; 61 regs = (struct pt_regs *)end - 1;
131 62
132 if (!irq_stack) 63 if (stack < begin || stack >= end)
133 return STACK_IS_NORMAL; 64 continue;
134 65
135 *stack_end = irq_stack; 66 info->type = STACK_TYPE_EXCEPTION + k;
136 irq_stack = irq_stack - irq_stack_size; 67 info->begin = begin;
68 info->end = end;
69 info->next_sp = (unsigned long *)regs->sp;
137 70
138 if (in_irq_stack(stack, irq_stack, *stack_end)) 71 return true;
139 return STACK_IS_IRQ; 72 }
140 73
141 return STACK_IS_UNKNOWN; 74 return false;
142} 75}
143 76
144/* 77static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
145 * x86-64 can have up to three kernel stacks:
146 * process stack
147 * interrupt stack
148 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
149 */
150
151void dump_trace(struct task_struct *task, struct pt_regs *regs,
152 unsigned long *stack, unsigned long bp,
153 const struct stacktrace_ops *ops, void *data)
154{ 78{
155 const unsigned cpu = get_cpu(); 79 unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
156 unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); 80 unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
157 unsigned long dummy;
158 unsigned used = 0;
159 int graph = 0;
160 int done = 0;
161
162 if (!task)
163 task = current;
164
165 if (!stack) {
166 if (regs)
167 stack = (unsigned long *)regs->sp;
168 else if (task != current)
169 stack = (unsigned long *)task->thread.sp;
170 else
171 stack = &dummy;
172 }
173 81
174 if (!bp)
175 bp = stack_frame(task, regs);
176 /* 82 /*
177 * Print function call entries in all stacks, starting at the 83 * This is a software stack, so 'end' can be a valid stack pointer.
178 * current stack address. If the stacks consist of nested 84 * It just means the stack is empty.
179 * exceptions
180 */ 85 */
181 while (!done) { 86 if (stack < begin || stack > end)
182 unsigned long *stack_end; 87 return false;
183 enum stack_type stype;
184 char *id;
185 88
186 stype = analyze_stack(cpu, task, stack, &stack_end, 89 info->type = STACK_TYPE_IRQ;
187 irq_stack, &used, &id); 90 info->begin = begin;
91 info->end = end;
188 92
189 /* Default finish unless specified to continue */ 93 /*
190 done = 1; 94 * The next stack pointer is the first thing pushed by the entry code
95 * after switching to the irq stack.
96 */
97 info->next_sp = (unsigned long *)*(end - 1);
191 98
192 switch (stype) { 99 return true;
100}
193 101
194 /* Break out early if we are on the thread stack */ 102int get_stack_info(unsigned long *stack, struct task_struct *task,
195 case STACK_IS_NORMAL: 103 struct stack_info *info, unsigned long *visit_mask)
196 break; 104{
105 if (!stack)
106 goto unknown;
197 107
198 case STACK_IS_EXCEPTION: 108 task = task ? : current;
199 109
200 if (ops->stack(data, id) < 0) 110 if (in_task_stack(stack, task, info))
201 break; 111 goto recursion_check;
202 112
203 bp = ops->walk_stack(task, stack, bp, ops, 113 if (task != current)
204 data, stack_end, &graph); 114 goto unknown;
205 ops->stack(data, "<EOE>");
206 /*
207 * We link to the next stack via the
208 * second-to-last pointer (index -2 to end) in the
209 * exception stack:
210 */
211 stack = (unsigned long *) stack_end[-2];
212 done = 0;
213 break;
214 115
215 case STACK_IS_IRQ: 116 if (in_exception_stack(stack, info))
117 goto recursion_check;
216 118
217 if (ops->stack(data, "IRQ") < 0) 119 if (in_irq_stack(stack, info))
218 break; 120 goto recursion_check;
219 bp = ops->walk_stack(task, stack, bp,
220 ops, data, stack_end, &graph);
221 /*
222 * We link to the next stack (which would be
223 * the process stack normally) the last
224 * pointer (index -1 to end) in the IRQ stack:
225 */
226 stack = (unsigned long *) (stack_end[-1]);
227 irq_stack = NULL;
228 ops->stack(data, "EOI");
229 done = 0;
230 break;
231 121
232 case STACK_IS_UNKNOWN: 122 goto unknown;
233 ops->stack(data, "UNK");
234 break;
235 }
236 }
237 123
124recursion_check:
238 /* 125 /*
239 * This handles the process stack: 126 * Make sure we don't iterate through any given stack more than once.
127 * If it comes up a second time then there's something wrong going on:
128 * just break out and report an unknown stack type.
240 */ 129 */
241 bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); 130 if (visit_mask) {
242 put_cpu(); 131 if (*visit_mask & (1UL << info->type))
132 goto unknown;
133 *visit_mask |= 1UL << info->type;
134 }
135
136 return 0;
137
138unknown:
139 info->type = STACK_TYPE_UNKNOWN;
140 return -EINVAL;
243} 141}
244EXPORT_SYMBOL(dump_trace);
245 142
246void 143void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
247show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 144 unsigned long *sp, char *log_lvl)
248 unsigned long *sp, unsigned long bp, char *log_lvl)
249{ 145{
250 unsigned long *irq_stack_end; 146 unsigned long *irq_stack_end;
251 unsigned long *irq_stack; 147 unsigned long *irq_stack;
252 unsigned long *stack; 148 unsigned long *stack;
253 int cpu;
254 int i; 149 int i;
255 150
256 preempt_disable(); 151 if (!try_get_task_stack(task))
257 cpu = smp_processor_id(); 152 return;
258 153
259 irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); 154 irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
260 irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); 155 irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
261 156
262 /* 157 sp = sp ? : get_stack_pointer(task, regs);
263 * Debugging aid: "show_stack(NULL, NULL);" prints the
264 * back trace for this cpu:
265 */
266 if (sp == NULL) {
267 if (regs)
268 sp = (unsigned long *)regs->sp;
269 else if (task)
270 sp = (unsigned long *)task->thread.sp;
271 else
272 sp = (unsigned long *)&sp;
273 }
274 158
275 stack = sp; 159 stack = sp;
276 for (i = 0; i < kstack_depth_to_print; i++) { 160 for (i = 0; i < kstack_depth_to_print; i++) {
@@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
299 stack++; 183 stack++;
300 touch_nmi_watchdog(); 184 touch_nmi_watchdog();
301 } 185 }
302 preempt_enable();
303 186
304 pr_cont("\n"); 187 pr_cont("\n");
305 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 188 show_trace_log_lvl(task, regs, sp, log_lvl);
189
190 put_task_stack(task);
306} 191}
307 192
308void show_regs(struct pt_regs *regs) 193void show_regs(struct pt_regs *regs)
309{ 194{
310 int i; 195 int i;
311 unsigned long sp;
312 196
313 sp = regs->sp;
314 show_regs_print_info(KERN_DEFAULT); 197 show_regs_print_info(KERN_DEFAULT);
315 __show_regs(regs, 1); 198 __show_regs(regs, 1);
316 199
@@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs)
325 u8 *ip; 208 u8 *ip;
326 209
327 printk(KERN_DEFAULT "Stack:\n"); 210 printk(KERN_DEFAULT "Stack:\n");
328 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 211 show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
329 0, KERN_DEFAULT);
330 212
331 printk(KERN_DEFAULT "Code: "); 213 printk(KERN_DEFAULT "Code: ");
332 214
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 93982aebb398..2f2b8c7ccb85 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void)
317 on_boot_cpu = 0; 317 on_boot_cpu = 0;
318 318
319 WARN_ON_FPU(current->thread.fpu.fpstate_active); 319 WARN_ON_FPU(current->thread.fpu.fpstate_active);
320 current_thread_info()->status = 0;
321 320
322 if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) 321 if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
323 eagerfpu = ENABLE; 322 eagerfpu = ENABLE;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d036cfb4495d..8639bb2ae058 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
1029 } 1029 }
1030 1030
1031 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 1031 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
1032 frame_pointer) == -EBUSY) { 1032 frame_pointer, parent) == -EBUSY) {
1033 *parent = old; 1033 *parent = old;
1034 return; 1034 return;
1035 } 1035 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6f8902b0d151..5f401262f12d 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
94 */ 94 */
95__HEAD 95__HEAD
96ENTRY(startup_32) 96ENTRY(startup_32)
97 movl pa(stack_start),%ecx 97 movl pa(initial_stack),%ecx
98 98
99 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 99 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
100 us to not reload segments */ 100 us to not reload segments */
@@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4
286 * start_secondary(). 286 * start_secondary().
287 */ 287 */
288ENTRY(start_cpu0) 288ENTRY(start_cpu0)
289 movl stack_start, %ecx 289 movl initial_stack, %ecx
290 movl %ecx, %esp 290 movl %ecx, %esp
291 jmp *(initial_code) 291 jmp *(initial_code)
292ENDPROC(start_cpu0) 292ENDPROC(start_cpu0)
@@ -307,7 +307,7 @@ ENTRY(startup_32_smp)
307 movl %eax,%es 307 movl %eax,%es
308 movl %eax,%fs 308 movl %eax,%fs
309 movl %eax,%gs 309 movl %eax,%gs
310 movl pa(stack_start),%ecx 310 movl pa(initial_stack),%ecx
311 movl %eax,%ss 311 movl %eax,%ss
312 leal -__PAGE_OFFSET(%ecx),%esp 312 leal -__PAGE_OFFSET(%ecx),%esp
313 313
@@ -703,7 +703,7 @@ ENTRY(initial_page_table)
703 703
704.data 704.data
705.balign 4 705.balign 4
706ENTRY(stack_start) 706ENTRY(initial_stack)
707 .long init_thread_union+THREAD_SIZE 707 .long init_thread_union+THREAD_SIZE
708 708
709__INITRODATA 709__INITRODATA
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 9f8efc9f0075..c98a559c346e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,7 +66,7 @@ startup_64:
66 */ 66 */
67 67
68 /* 68 /*
69 * Setup stack for verify_cpu(). "-8" because stack_start is defined 69 * Setup stack for verify_cpu(). "-8" because initial_stack is defined
70 * this way, see below. Our best guess is a NULL ptr for stack 70 * this way, see below. Our best guess is a NULL ptr for stack
71 * termination heuristics and we don't want to break anything which 71 * termination heuristics and we don't want to break anything which
72 * might depend on it (kgdb, ...). 72 * might depend on it (kgdb, ...).
@@ -226,7 +226,7 @@ ENTRY(secondary_startup_64)
226 movq %rax, %cr0 226 movq %rax, %cr0
227 227
228 /* Setup a boot time stack */ 228 /* Setup a boot time stack */
229 movq stack_start(%rip), %rsp 229 movq initial_stack(%rip), %rsp
230 230
231 /* zero EFLAGS after setting rsp */ 231 /* zero EFLAGS after setting rsp */
232 pushq $0 232 pushq $0
@@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64)
310 * start_secondary(). 310 * start_secondary().
311 */ 311 */
312ENTRY(start_cpu0) 312ENTRY(start_cpu0)
313 movq stack_start(%rip),%rsp 313 movq initial_stack(%rip),%rsp
314 movq initial_code(%rip),%rax 314 movq initial_code(%rip),%rax
315 pushq $0 # fake return address to stop unwinder 315 pushq $0 # fake return address to stop unwinder
316 pushq $__KERNEL_CS # set correct cs 316 pushq $__KERNEL_CS # set correct cs
@@ -319,17 +319,15 @@ ENTRY(start_cpu0)
319ENDPROC(start_cpu0) 319ENDPROC(start_cpu0)
320#endif 320#endif
321 321
322 /* SMP bootup changes these two */ 322 /* Both SMP bootup and ACPI suspend change these variables */
323 __REFDATA 323 __REFDATA
324 .balign 8 324 .balign 8
325 GLOBAL(initial_code) 325 GLOBAL(initial_code)
326 .quad x86_64_start_kernel 326 .quad x86_64_start_kernel
327 GLOBAL(initial_gs) 327 GLOBAL(initial_gs)
328 .quad INIT_PER_CPU_VAR(irq_stack_union) 328 .quad INIT_PER_CPU_VAR(irq_stack_union)
329 329 GLOBAL(initial_stack)
330 GLOBAL(stack_start)
331 .quad init_thread_union+THREAD_SIZE-8 330 .quad init_thread_union+THREAD_SIZE-8
332 .word 0
333 __FINITDATA 331 __FINITDATA
334 332
335bad_address: 333bad_address:
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4a7903714065..9ebd0b0e73d9 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
40 if (user_mode(regs)) 40 if (user_mode(regs))
41 return; 41 return;
42 42
43 if (regs->sp >= curbase + sizeof(struct thread_info) + 43 if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
44 sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
45 regs->sp <= curbase + THREAD_SIZE) 44 regs->sp <= curbase + THREAD_SIZE)
46 return; 45 return;
47 46
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 04cde527d728..8e36f249646e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
50#include <asm/apicdef.h> 50#include <asm/apicdef.h>
51#include <asm/apic.h> 51#include <asm/apic.h>
52#include <asm/nmi.h> 52#include <asm/nmi.h>
53#include <asm/switch_to.h>
53 54
54struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = 55struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
55{ 56{
@@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
166 gdb_regs[GDB_DX] = 0; 167 gdb_regs[GDB_DX] = 0;
167 gdb_regs[GDB_SI] = 0; 168 gdb_regs[GDB_SI] = 0;
168 gdb_regs[GDB_DI] = 0; 169 gdb_regs[GDB_DI] = 0;
169 gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; 170 gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp;
170#ifdef CONFIG_X86_32 171#ifdef CONFIG_X86_32
171 gdb_regs[GDB_DS] = __KERNEL_DS; 172 gdb_regs[GDB_DS] = __KERNEL_DS;
172 gdb_regs[GDB_ES] = __KERNEL_DS; 173 gdb_regs[GDB_ES] = __KERNEL_DS;
173 gdb_regs[GDB_PS] = 0; 174 gdb_regs[GDB_PS] = 0;
174 gdb_regs[GDB_CS] = __KERNEL_CS; 175 gdb_regs[GDB_CS] = __KERNEL_CS;
175 gdb_regs[GDB_PC] = p->thread.ip;
176 gdb_regs[GDB_SS] = __KERNEL_DS; 176 gdb_regs[GDB_SS] = __KERNEL_DS;
177 gdb_regs[GDB_FS] = 0xFFFF; 177 gdb_regs[GDB_FS] = 0xFFFF;
178 gdb_regs[GDB_GS] = 0xFFFF; 178 gdb_regs[GDB_GS] = 0xFFFF;
179#else 179#else
180 gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); 180 gdb_regs32[GDB_PS] = 0;
181 gdb_regs32[GDB_CS] = __KERNEL_CS; 181 gdb_regs32[GDB_CS] = __KERNEL_CS;
182 gdb_regs32[GDB_SS] = __KERNEL_DS; 182 gdb_regs32[GDB_SS] = __KERNEL_DS;
183 gdb_regs[GDB_PC] = 0;
184 gdb_regs[GDB_R8] = 0; 183 gdb_regs[GDB_R8] = 0;
185 gdb_regs[GDB_R9] = 0; 184 gdb_regs[GDB_R9] = 0;
186 gdb_regs[GDB_R10] = 0; 185 gdb_regs[GDB_R10] = 0;
@@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
190 gdb_regs[GDB_R14] = 0; 189 gdb_regs[GDB_R14] = 0;
191 gdb_regs[GDB_R15] = 0; 190 gdb_regs[GDB_R15] = 0;
192#endif 191#endif
192 gdb_regs[GDB_PC] = 0;
193 gdb_regs[GDB_SP] = p->thread.sp; 193 gdb_regs[GDB_SP] = p->thread.sp;
194} 194}
195 195
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index c2bedaea11f7..4afc67f5facc 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -184,7 +184,7 @@ out:
184 184
185static struct kobj_attribute type_attr = __ATTR_RO(type); 185static struct kobj_attribute type_attr = __ATTR_RO(type);
186 186
187static struct bin_attribute data_attr = { 187static struct bin_attribute data_attr __ro_after_init = {
188 .attr = { 188 .attr = {
189 .name = "data", 189 .name = "data",
190 .mode = S_IRUGO, 190 .mode = S_IRUGO,
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 3692249a70f1..60b9949f1e65 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,7 +29,7 @@
29#include <asm/x86_init.h> 29#include <asm/x86_init.h>
30#include <asm/reboot.h> 30#include <asm/reboot.h>
31 31
32static int kvmclock = 1; 32static int kvmclock __ro_after_init = 1;
33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
35static cycle_t kvm_sched_clock_offset; 35static cycle_t kvm_sched_clock_offset;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1acfd76e3e26..bbf3d5933eaa 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
332 .read_cr0 = native_read_cr0, 332 .read_cr0 = native_read_cr0,
333 .write_cr0 = native_write_cr0, 333 .write_cr0 = native_write_cr0,
334 .read_cr4 = native_read_cr4, 334 .read_cr4 = native_read_cr4,
335 .read_cr4_safe = native_read_cr4_safe,
336 .write_cr4 = native_write_cr4, 335 .write_cr4 = native_write_cr4,
337#ifdef CONFIG_X86_64 336#ifdef CONFIG_X86_64
338 .read_cr8 = native_read_cr8, 337 .read_cr8 = native_read_cr8,
@@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt);
389#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) 388#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
390#endif 389#endif
391 390
392struct pv_mmu_ops pv_mmu_ops = { 391struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
393 392
394 .read_cr2 = native_read_cr2, 393 .read_cr2 = native_read_cr2,
395 .write_cr2 = native_write_cr2, 394 .write_cr2 = native_write_cr2,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0ea2ce4..4002b475171c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/mce.h> 33#include <asm/mce.h>
34#include <asm/vm86.h> 34#include <asm/vm86.h>
35#include <asm/switch_to.h>
35 36
36/* 37/*
37 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 38 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -513,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
513} 514}
514 515
515/* 516/*
517 * Return saved PC of a blocked thread.
518 * What is this good for? it will be always the scheduler or ret_from_fork.
519 */
520unsigned long thread_saved_pc(struct task_struct *tsk)
521{
522 struct inactive_task_frame *frame =
523 (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
524 return READ_ONCE_NOCHECK(frame->ret_addr);
525}
526
527/*
516 * Called from fs/proc with a reference on @p to find the function 528 * Called from fs/proc with a reference on @p to find the function
517 * which called into schedule(). This needs to be done carefully 529 * which called into schedule(). This needs to be done carefully
518 * because the task might wake up and we might look at a stack 530 * because the task might wake up and we might look at a stack
@@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
520 */ 532 */
521unsigned long get_wchan(struct task_struct *p) 533unsigned long get_wchan(struct task_struct *p)
522{ 534{
523 unsigned long start, bottom, top, sp, fp, ip; 535 unsigned long start, bottom, top, sp, fp, ip, ret = 0;
524 int count = 0; 536 int count = 0;
525 537
526 if (!p || p == current || p->state == TASK_RUNNING) 538 if (!p || p == current || p->state == TASK_RUNNING)
527 return 0; 539 return 0;
528 540
541 if (!try_get_task_stack(p))
542 return 0;
543
529 start = (unsigned long)task_stack_page(p); 544 start = (unsigned long)task_stack_page(p);
530 if (!start) 545 if (!start)
531 return 0; 546 goto out;
532 547
533 /* 548 /*
534 * Layout of the stack page: 549 * Layout of the stack page:
@@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p)
537 * PADDING 552 * PADDING
538 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING 553 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
539 * stack 554 * stack
540 * ----------- bottom = start + sizeof(thread_info) 555 * ----------- bottom = start
541 * thread_info
542 * ----------- start
543 * 556 *
544 * The tasks stack pointer points at the location where the 557 * The tasks stack pointer points at the location where the
545 * framepointer is stored. The data on the stack is: 558 * framepointer is stored. The data on the stack is:
@@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p)
550 */ 563 */
551 top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; 564 top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
552 top -= 2 * sizeof(unsigned long); 565 top -= 2 * sizeof(unsigned long);
553 bottom = start + sizeof(struct thread_info); 566 bottom = start;
554 567
555 sp = READ_ONCE(p->thread.sp); 568 sp = READ_ONCE(p->thread.sp);
556 if (sp < bottom || sp > top) 569 if (sp < bottom || sp > top)
557 return 0; 570 goto out;
558 571
559 fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); 572 fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
560 do { 573 do {
561 if (fp < bottom || fp > top) 574 if (fp < bottom || fp > top)
562 return 0; 575 goto out;
563 ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); 576 ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
564 if (!in_sched_functions(ip)) 577 if (!in_sched_functions(ip)) {
565 return ip; 578 ret = ip;
579 goto out;
580 }
566 fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); 581 fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
567 } while (count++ < 16 && p->state != TASK_RUNNING); 582 } while (count++ < 16 && p->state != TASK_RUNNING);
568 return 0; 583
584out:
585 put_task_stack(p);
586 return ret;
569} 587}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29c38c7..bd7be8efdc4c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,17 +55,6 @@
55#include <asm/switch_to.h> 55#include <asm/switch_to.h>
56#include <asm/vm86.h> 56#include <asm/vm86.h>
57 57
58asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
59asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
60
61/*
62 * Return saved PC of a blocked thread.
63 */
64unsigned long thread_saved_pc(struct task_struct *tsk)
65{
66 return ((unsigned long *)tsk->thread.sp)[3];
67}
68
69void __show_regs(struct pt_regs *regs, int all) 58void __show_regs(struct pt_regs *regs, int all)
70{ 59{
71 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 60 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all)
101 cr0 = read_cr0(); 90 cr0 = read_cr0();
102 cr2 = read_cr2(); 91 cr2 = read_cr2();
103 cr3 = read_cr3(); 92 cr3 = read_cr3();
104 cr4 = __read_cr4_safe(); 93 cr4 = __read_cr4();
105 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 94 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
106 cr0, cr2, cr3, cr4); 95 cr0, cr2, cr3, cr4);
107 96
@@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
133 unsigned long arg, struct task_struct *p, unsigned long tls) 122 unsigned long arg, struct task_struct *p, unsigned long tls)
134{ 123{
135 struct pt_regs *childregs = task_pt_regs(p); 124 struct pt_regs *childregs = task_pt_regs(p);
125 struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
126 struct inactive_task_frame *frame = &fork_frame->frame;
136 struct task_struct *tsk; 127 struct task_struct *tsk;
137 int err; 128 int err;
138 129
139 p->thread.sp = (unsigned long) childregs; 130 frame->bp = 0;
131 frame->ret_addr = (unsigned long) ret_from_fork;
132 p->thread.sp = (unsigned long) fork_frame;
140 p->thread.sp0 = (unsigned long) (childregs+1); 133 p->thread.sp0 = (unsigned long) (childregs+1);
141 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 134 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
142 135
143 if (unlikely(p->flags & PF_KTHREAD)) { 136 if (unlikely(p->flags & PF_KTHREAD)) {
144 /* kernel thread */ 137 /* kernel thread */
145 memset(childregs, 0, sizeof(struct pt_regs)); 138 memset(childregs, 0, sizeof(struct pt_regs));
146 p->thread.ip = (unsigned long) ret_from_kernel_thread; 139 frame->bx = sp; /* function */
147 task_user_gs(p) = __KERNEL_STACK_CANARY; 140 frame->di = arg;
148 childregs->ds = __USER_DS;
149 childregs->es = __USER_DS;
150 childregs->fs = __KERNEL_PERCPU;
151 childregs->bx = sp; /* function */
152 childregs->bp = arg;
153 childregs->orig_ax = -1;
154 childregs->cs = __KERNEL_CS | get_kernel_rpl();
155 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
156 p->thread.io_bitmap_ptr = NULL; 141 p->thread.io_bitmap_ptr = NULL;
157 return 0; 142 return 0;
158 } 143 }
144 frame->bx = 0;
159 *childregs = *current_pt_regs(); 145 *childregs = *current_pt_regs();
160 childregs->ax = 0; 146 childregs->ax = 0;
161 if (sp) 147 if (sp)
162 childregs->sp = sp; 148 childregs->sp = sp;
163 149
164 p->thread.ip = (unsigned long) ret_from_fork;
165 task_user_gs(p) = get_user_gs(current_pt_regs()); 150 task_user_gs(p) = get_user_gs(current_pt_regs());
166 151
167 p->thread.io_bitmap_ptr = NULL; 152 p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8f84bf..de9acaf2d371 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -50,8 +50,6 @@
50#include <asm/switch_to.h> 50#include <asm/switch_to.h>
51#include <asm/xen/hypervisor.h> 51#include <asm/xen/hypervisor.h>
52 52
53asmlinkage extern void ret_from_fork(void);
54
55__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); 53__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
56 54
57/* Prints also some state that isn't saved in the pt_regs */ 55/* Prints also some state that isn't saved in the pt_regs */
@@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
141{ 139{
142 int err; 140 int err;
143 struct pt_regs *childregs; 141 struct pt_regs *childregs;
142 struct fork_frame *fork_frame;
143 struct inactive_task_frame *frame;
144 struct task_struct *me = current; 144 struct task_struct *me = current;
145 145
146 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; 146 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
147 childregs = task_pt_regs(p); 147 childregs = task_pt_regs(p);
148 p->thread.sp = (unsigned long) childregs; 148 fork_frame = container_of(childregs, struct fork_frame, regs);
149 set_tsk_thread_flag(p, TIF_FORK); 149 frame = &fork_frame->frame;
150 frame->bp = 0;
151 frame->ret_addr = (unsigned long) ret_from_fork;
152 p->thread.sp = (unsigned long) fork_frame;
150 p->thread.io_bitmap_ptr = NULL; 153 p->thread.io_bitmap_ptr = NULL;
151 154
152 savesegment(gs, p->thread.gsindex); 155 savesegment(gs, p->thread.gsindex);
@@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
160 if (unlikely(p->flags & PF_KTHREAD)) { 163 if (unlikely(p->flags & PF_KTHREAD)) {
161 /* kernel thread */ 164 /* kernel thread */
162 memset(childregs, 0, sizeof(struct pt_regs)); 165 memset(childregs, 0, sizeof(struct pt_regs));
163 childregs->sp = (unsigned long)childregs; 166 frame->bx = sp; /* function */
164 childregs->ss = __KERNEL_DS; 167 frame->r12 = arg;
165 childregs->bx = sp; /* function */
166 childregs->bp = arg;
167 childregs->orig_ax = -1;
168 childregs->cs = __KERNEL_CS | get_kernel_rpl();
169 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
170 return 0; 168 return 0;
171 } 169 }
170 frame->bx = 0;
172 *childregs = *current_pt_regs(); 171 *childregs = *current_pt_regs();
173 172
174 childregs->ax = 0; 173 childregs->ax = 0;
@@ -511,7 +510,7 @@ void set_personality_ia32(bool x32)
511 current->personality &= ~READ_IMPLIES_EXEC; 510 current->personality &= ~READ_IMPLIES_EXEC;
512 /* in_compat_syscall() uses the presence of the x32 511 /* in_compat_syscall() uses the presence of the x32
513 syscall bit flag to determine compat status */ 512 syscall bit flag to determine compat status */
514 current_thread_info()->status &= ~TS_COMPAT; 513 current->thread.status &= ~TS_COMPAT;
515 } else { 514 } else {
516 set_thread_flag(TIF_IA32); 515 set_thread_flag(TIF_IA32);
517 clear_thread_flag(TIF_X32); 516 clear_thread_flag(TIF_X32);
@@ -519,7 +518,7 @@ void set_personality_ia32(bool x32)
519 current->mm->context.ia32_compat = TIF_IA32; 518 current->mm->context.ia32_compat = TIF_IA32;
520 current->personality |= force_personality32; 519 current->personality |= force_personality32;
521 /* Prepare the first "return" to user space */ 520 /* Prepare the first "return" to user space */
522 current_thread_info()->status |= TS_COMPAT; 521 current->thread.status |= TS_COMPAT;
523 } 522 }
524} 523}
525EXPORT_SYMBOL_GPL(set_personality_ia32); 524EXPORT_SYMBOL_GPL(set_personality_ia32);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a541ff..ce94c38cf4d6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
173 return sp; 173 return sp;
174 174
175 prev_esp = (u32 *)(context); 175 prev_esp = (u32 *)(context);
176 if (prev_esp) 176 if (*prev_esp)
177 return (unsigned long)prev_esp; 177 return (unsigned long)*prev_esp;
178 178
179 return (unsigned long)regs; 179 return (unsigned long)regs;
180} 180}
@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
934 */ 934 */
935 regs->orig_ax = value; 935 regs->orig_ax = value;
936 if (syscall_get_nr(child, regs) >= 0) 936 if (syscall_get_nr(child, regs) >= 0)
937 task_thread_info(child)->status |= TS_I386_REGS_POKED; 937 child->thread.status |= TS_I386_REGS_POKED;
938 break; 938 break;
939 939
940 case offsetof(struct user32, regs.eflags): 940 case offsetof(struct user32, regs.eflags):
@@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1250 1250
1251#ifdef CONFIG_X86_64 1251#ifdef CONFIG_X86_64
1252 1252
1253static struct user_regset x86_64_regsets[] __read_mostly = { 1253static struct user_regset x86_64_regsets[] __ro_after_init = {
1254 [REGSET_GENERAL] = { 1254 [REGSET_GENERAL] = {
1255 .core_note_type = NT_PRSTATUS, 1255 .core_note_type = NT_PRSTATUS,
1256 .n = sizeof(struct user_regs_struct) / sizeof(long), 1256 .n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = {
1291#endif /* CONFIG_X86_64 */ 1291#endif /* CONFIG_X86_64 */
1292 1292
1293#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1293#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1294static struct user_regset x86_32_regsets[] __read_mostly = { 1294static struct user_regset x86_32_regsets[] __ro_after_init = {
1295 [REGSET_GENERAL] = { 1295 [REGSET_GENERAL] = {
1296 .core_note_type = NT_PRSTATUS, 1296 .core_note_type = NT_PRSTATUS,
1297 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1297 .n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = {
1344 */ 1344 */
1345u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 1345u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1346 1346
1347void update_regset_xstate_info(unsigned int size, u64 xstate_mask) 1347void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1348{ 1348{
1349#ifdef CONFIG_X86_64 1349#ifdef CONFIG_X86_64
1350 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); 1350 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 63bf27d972b7..e244c19a2451 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -705,7 +705,7 @@ static void native_machine_power_off(void)
705 tboot_shutdown(TB_SHUTDOWN_HALT); 705 tboot_shutdown(TB_SHUTDOWN_HALT);
706} 706}
707 707
708struct machine_ops machine_ops = { 708struct machine_ops machine_ops __ro_after_init = {
709 .power_off = native_machine_power_off, 709 .power_off = native_machine_power_off,
710 .shutdown = native_machine_shutdown, 710 .shutdown = native_machine_shutdown,
711 .emergency_restart = native_machine_emergency_restart, 711 .emergency_restart = native_machine_emergency_restart,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2c4bc85dfe90..eeb094ea794a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data);
210 210
211 211
212#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) 212#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
213__visible unsigned long mmu_cr4_features; 213__visible unsigned long mmu_cr4_features __ro_after_init;
214#else 214#else
215__visible unsigned long mmu_cr4_features = X86_CR4_PAE; 215__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
216#endif 216#endif
217 217
218/* Boot loader ID and version as integers, for the benefit of proc_dointvec */ 218/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p)
1137 * auditing all the early-boot CR4 manipulation would be needed to 1137 * auditing all the early-boot CR4 manipulation would be needed to
1138 * rule it out. 1138 * rule it out.
1139 */ 1139 */
1140 mmu_cr4_features = __read_cr4_safe(); 1140 mmu_cr4_features = __read_cr4();
1141 1141
1142 memblock_set_current_limit(get_max_mapped()); 1142 memblock_set_current_limit(get_max_mapped());
1143 1143
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7a40e068302d..2bbd27f89802 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
33DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; 33DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
34EXPORT_PER_CPU_SYMBOL(this_cpu_off); 34EXPORT_PER_CPU_SYMBOL(this_cpu_off);
35 35
36unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { 36unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
37 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, 37 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
38}; 38};
39EXPORT_SYMBOL(__per_cpu_offset); 39EXPORT_SYMBOL(__per_cpu_offset);
@@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void)
246#ifdef CONFIG_X86_64 246#ifdef CONFIG_X86_64
247 per_cpu(irq_stack_ptr, cpu) = 247 per_cpu(irq_stack_ptr, cpu) =
248 per_cpu(irq_stack_union.irq_stack, cpu) + 248 per_cpu(irq_stack_union.irq_stack, cpu) +
249 IRQ_STACK_SIZE - 64; 249 IRQ_STACK_SIZE;
250#endif 250#endif
251#ifdef CONFIG_NUMA 251#ifdef CONFIG_NUMA
252 per_cpu(x86_cpu_to_node_map, cpu) = 252 per_cpu(x86_cpu_to_node_map, cpu) =
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 04cb3212db2d..da20ecb5397a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
783 * than the tracee. 783 * than the tracee.
784 */ 784 */
785#ifdef CONFIG_IA32_EMULATION 785#ifdef CONFIG_IA32_EMULATION
786 if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) 786 if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
787 return __NR_ia32_restart_syscall; 787 return __NR_ia32_restart_syscall;
788#endif 788#endif
789#ifdef CONFIG_X86_X32_ABI 789#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 54e2f1a968a4..7249dcf2cbcb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -943,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
943 per_cpu(cpu_current_top_of_stack, cpu) = 943 per_cpu(cpu_current_top_of_stack, cpu) =
944 (unsigned long)task_stack_page(idle) + THREAD_SIZE; 944 (unsigned long)task_stack_page(idle) + THREAD_SIZE;
945#else 945#else
946 clear_tsk_thread_flag(idle, TIF_FORK);
947 initial_gs = per_cpu_offset(cpu); 946 initial_gs = per_cpu_offset(cpu);
948#endif 947#endif
949} 948}
@@ -970,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
970 969
971 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 970 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
972 initial_code = (unsigned long)start_secondary; 971 initial_code = (unsigned long)start_secondary;
973 stack_start = idle->thread.sp; 972 initial_stack = idle->thread.sp;
974 973
975 /* 974 /*
976 * Enable the espfix hack for this CPU 975 * Enable the espfix hack for this CPU
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4738f5e0f2ab..0653788026e2 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -8,80 +8,69 @@
8#include <linux/export.h> 8#include <linux/export.h>
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
11#include <asm/unwind.h>
11 12
12static int save_stack_stack(void *data, char *name) 13static int save_stack_address(struct stack_trace *trace, unsigned long addr,
14 bool nosched)
13{ 15{
14 return 0;
15}
16
17static int
18__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
19{
20 struct stack_trace *trace = data;
21#ifdef CONFIG_FRAME_POINTER
22 if (!reliable)
23 return 0;
24#endif
25 if (nosched && in_sched_functions(addr)) 16 if (nosched && in_sched_functions(addr))
26 return 0; 17 return 0;
18
27 if (trace->skip > 0) { 19 if (trace->skip > 0) {
28 trace->skip--; 20 trace->skip--;
29 return 0; 21 return 0;
30 } 22 }
31 if (trace->nr_entries < trace->max_entries) {
32 trace->entries[trace->nr_entries++] = addr;
33 return 0;
34 } else {
35 return -1; /* no more room, stop walking the stack */
36 }
37}
38 23
39static int save_stack_address(void *data, unsigned long addr, int reliable) 24 if (trace->nr_entries >= trace->max_entries)
40{ 25 return -1;
41 return __save_stack_address(data, addr, reliable, false); 26
27 trace->entries[trace->nr_entries++] = addr;
28 return 0;
42} 29}
43 30
44static int 31static void __save_stack_trace(struct stack_trace *trace,
45save_stack_address_nosched(void *data, unsigned long addr, int reliable) 32 struct task_struct *task, struct pt_regs *regs,
33 bool nosched)
46{ 34{
47 return __save_stack_address(data, addr, reliable, true); 35 struct unwind_state state;
48} 36 unsigned long addr;
49 37
50static const struct stacktrace_ops save_stack_ops = { 38 if (regs)
51 .stack = save_stack_stack, 39 save_stack_address(trace, regs->ip, nosched);
52 .address = save_stack_address,
53 .walk_stack = print_context_stack,
54};
55 40
56static const struct stacktrace_ops save_stack_ops_nosched = { 41 for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
57 .stack = save_stack_stack, 42 unwind_next_frame(&state)) {
58 .address = save_stack_address_nosched, 43 addr = unwind_get_return_address(&state);
59 .walk_stack = print_context_stack, 44 if (!addr || save_stack_address(trace, addr, nosched))
60}; 45 break;
46 }
47
48 if (trace->nr_entries < trace->max_entries)
49 trace->entries[trace->nr_entries++] = ULONG_MAX;
50}
61 51
62/* 52/*
63 * Save stack-backtrace addresses into a stack_trace buffer. 53 * Save stack-backtrace addresses into a stack_trace buffer.
64 */ 54 */
65void save_stack_trace(struct stack_trace *trace) 55void save_stack_trace(struct stack_trace *trace)
66{ 56{
67 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 57 __save_stack_trace(trace, current, NULL, false);
68 if (trace->nr_entries < trace->max_entries)
69 trace->entries[trace->nr_entries++] = ULONG_MAX;
70} 58}
71EXPORT_SYMBOL_GPL(save_stack_trace); 59EXPORT_SYMBOL_GPL(save_stack_trace);
72 60
73void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) 61void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
74{ 62{
75 dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); 63 __save_stack_trace(trace, current, regs, false);
76 if (trace->nr_entries < trace->max_entries)
77 trace->entries[trace->nr_entries++] = ULONG_MAX;
78} 64}
79 65
80void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 66void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81{ 67{
82 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 68 if (!try_get_task_stack(tsk))
83 if (trace->nr_entries < trace->max_entries) 69 return;
84 trace->entries[trace->nr_entries++] = ULONG_MAX; 70
71 __save_stack_trace(trace, tsk, NULL, true);
72
73 put_task_stack(tsk);
85} 74}
86EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 75EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87 76
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b70ca12dd389..bd4e3d4d3625 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
292DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 292DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
293DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 293DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
294 294
295#ifdef CONFIG_VMAP_STACK
296__visible void __noreturn handle_stack_overflow(const char *message,
297 struct pt_regs *regs,
298 unsigned long fault_address)
299{
300 printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
301 (void *)fault_address, current->stack,
302 (char *)current->stack + THREAD_SIZE - 1);
303 die(message, regs, 0);
304
305 /* Be absolutely certain we don't return. */
306 panic(message);
307}
308#endif
309
295#ifdef CONFIG_X86_64 310#ifdef CONFIG_X86_64
296/* Runs on IST stack */ 311/* Runs on IST stack */
297dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 312dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
298{ 313{
299 static const char str[] = "double fault"; 314 static const char str[] = "double fault";
300 struct task_struct *tsk = current; 315 struct task_struct *tsk = current;
316#ifdef CONFIG_VMAP_STACK
317 unsigned long cr2;
318#endif
301 319
302#ifdef CONFIG_X86_ESPFIX64 320#ifdef CONFIG_X86_ESPFIX64
303 extern unsigned char native_irq_return_iret[]; 321 extern unsigned char native_irq_return_iret[];
@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
332 tsk->thread.error_code = error_code; 350 tsk->thread.error_code = error_code;
333 tsk->thread.trap_nr = X86_TRAP_DF; 351 tsk->thread.trap_nr = X86_TRAP_DF;
334 352
353#ifdef CONFIG_VMAP_STACK
354 /*
355 * If we overflow the stack into a guard page, the CPU will fail
356 * to deliver #PF and will send #DF instead. Similarly, if we
357 * take any non-IST exception while too close to the bottom of
358 * the stack, the processor will get a page fault while
359 * delivering the exception and will generate a double fault.
360 *
361 * According to the SDM (footnote in 6.15 under "Interrupt 14 -
362 * Page-Fault Exception (#PF):
363 *
364 * Processors update CR2 whenever a page fault is detected. If a
365 * second page fault occurs while an earlier page fault is being
366 * deliv- ered, the faulting linear address of the second fault will
367 * overwrite the contents of CR2 (replacing the previous
368 * address). These updates to CR2 occur even if the page fault
369 * results in a double fault or occurs during the delivery of a
370 * double fault.
371 *
372 * The logic below has a small possibility of incorrectly diagnosing
373 * some errors as stack overflows. For example, if the IDT or GDT
374 * gets corrupted such that #GP delivery fails due to a bad descriptor
375 * causing #GP and we hit this condition while CR2 coincidentally
376 * points to the stack guard page, we'll think we overflowed the
377 * stack. Given that we're going to panic one way or another
378 * if this happens, this isn't necessarily worth fixing.
379 *
380 * If necessary, we could improve the test by only diagnosing
381 * a stack overflow if the saved RSP points within 47 bytes of
382 * the bottom of the stack: if RSP == tsk_stack + 48 and we
383 * take an exception, the stack is already aligned and there
384 * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
385 * possible error code, so a stack overflow would *not* double
386 * fault. With any less space left, exception delivery could
387 * fail, and, as a practical matter, we've overflowed the
388 * stack even if the actual trigger for the double fault was
389 * something else.
390 */
391 cr2 = read_cr2();
392 if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
393 handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
394#endif
395
335#ifdef CONFIG_DOUBLEFAULT 396#ifdef CONFIG_DOUBLEFAULT
336 df_debug(regs, error_code); 397 df_debug(regs, error_code);
337#endif 398#endif
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
new file mode 100644
index 000000000000..a2456d4d286a
--- /dev/null
+++ b/arch/x86/kernel/unwind_frame.c
@@ -0,0 +1,93 @@
1#include <linux/sched.h>
2#include <asm/ptrace.h>
3#include <asm/bitops.h>
4#include <asm/stacktrace.h>
5#include <asm/unwind.h>
6
7#define FRAME_HEADER_SIZE (sizeof(long) * 2)
8
9unsigned long unwind_get_return_address(struct unwind_state *state)
10{
11 unsigned long addr;
12 unsigned long *addr_p = unwind_get_return_address_ptr(state);
13
14 if (unwind_done(state))
15 return 0;
16
17 addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
18 addr_p);
19
20 return __kernel_text_address(addr) ? addr : 0;
21}
22EXPORT_SYMBOL_GPL(unwind_get_return_address);
23
24static bool update_stack_state(struct unwind_state *state, void *addr,
25 size_t len)
26{
27 struct stack_info *info = &state->stack_info;
28
29 /*
30 * If addr isn't on the current stack, switch to the next one.
31 *
32 * We may have to traverse multiple stacks to deal with the possibility
33 * that 'info->next_sp' could point to an empty stack and 'addr' could
34 * be on a subsequent stack.
35 */
36 while (!on_stack(info, addr, len))
37 if (get_stack_info(info->next_sp, state->task, info,
38 &state->stack_mask))
39 return false;
40
41 return true;
42}
43
44bool unwind_next_frame(struct unwind_state *state)
45{
46 unsigned long *next_bp;
47
48 if (unwind_done(state))
49 return false;
50
51 next_bp = (unsigned long *)*state->bp;
52
53 /* make sure the next frame's data is accessible */
54 if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
55 return false;
56
57 /* move to the next frame */
58 state->bp = next_bp;
59 return true;
60}
61EXPORT_SYMBOL_GPL(unwind_next_frame);
62
63void __unwind_start(struct unwind_state *state, struct task_struct *task,
64 struct pt_regs *regs, unsigned long *first_frame)
65{
66 memset(state, 0, sizeof(*state));
67 state->task = task;
68
69 /* don't even attempt to start from user mode regs */
70 if (regs && user_mode(regs)) {
71 state->stack_info.type = STACK_TYPE_UNKNOWN;
72 return;
73 }
74
75 /* set up the starting stack frame */
76 state->bp = get_frame_pointer(task, regs);
77
78 /* initialize stack info and make sure the frame data is accessible */
79 get_stack_info(state->bp, state->task, &state->stack_info,
80 &state->stack_mask);
81 update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
82
83 /*
84 * The caller can provide the address of the first frame directly
85 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
86 * to start unwinding at. Skip ahead until we reach it.
87 */
88 while (!unwind_done(state) &&
89 (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
90 state->bp < first_frame))
91 unwind_next_frame(state);
92}
93EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
new file mode 100644
index 000000000000..b5a834c93065
--- /dev/null
+++ b/arch/x86/kernel/unwind_guess.c
@@ -0,0 +1,43 @@
1#include <linux/sched.h>
2#include <linux/ftrace.h>
3#include <asm/ptrace.h>
4#include <asm/bitops.h>
5#include <asm/stacktrace.h>
6#include <asm/unwind.h>
7
8bool unwind_next_frame(struct unwind_state *state)
9{
10 struct stack_info *info = &state->stack_info;
11
12 if (unwind_done(state))
13 return false;
14
15 do {
16 for (state->sp++; state->sp < info->end; state->sp++)
17 if (__kernel_text_address(*state->sp))
18 return true;
19
20 state->sp = info->next_sp;
21
22 } while (!get_stack_info(state->sp, state->task, info,
23 &state->stack_mask));
24
25 return false;
26}
27EXPORT_SYMBOL_GPL(unwind_next_frame);
28
29void __unwind_start(struct unwind_state *state, struct task_struct *task,
30 struct pt_regs *regs, unsigned long *first_frame)
31{
32 memset(state, 0, sizeof(*state));
33
34 state->task = task;
35 state->sp = first_frame;
36
37 get_stack_info(first_frame, state->task, &state->stack_info,
38 &state->stack_mask);
39
40 if (!__kernel_text_address(*first_frame))
41 unwind_next_frame(state);
42}
43EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 76c5e52436c4..0bd9f1287f39 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
91static void default_nmi_init(void) { }; 91static void default_nmi_init(void) { };
92static int default_i8042_detect(void) { return 1; }; 92static int default_i8042_detect(void) { return 1; };
93 93
94struct x86_platform_ops x86_platform = { 94struct x86_platform_ops x86_platform __ro_after_init = {
95 .calibrate_cpu = native_calibrate_cpu, 95 .calibrate_cpu = native_calibrate_cpu,
96 .calibrate_tsc = native_calibrate_tsc, 96 .calibrate_tsc = native_calibrate_tsc,
97 .get_wallclock = mach_get_cmos_time, 97 .get_wallclock = mach_get_cmos_time,
@@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = {
108EXPORT_SYMBOL_GPL(x86_platform); 108EXPORT_SYMBOL_GPL(x86_platform);
109 109
110#if defined(CONFIG_PCI_MSI) 110#if defined(CONFIG_PCI_MSI)
111struct x86_msi_ops x86_msi = { 111struct x86_msi_ops x86_msi __ro_after_init = {
112 .setup_msi_irqs = native_setup_msi_irqs, 112 .setup_msi_irqs = native_setup_msi_irqs,
113 .teardown_msi_irq = native_teardown_msi_irq, 113 .teardown_msi_irq = native_teardown_msi_irq,
114 .teardown_msi_irqs = default_teardown_msi_irqs, 114 .teardown_msi_irqs = default_teardown_msi_irqs,
@@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
137} 137}
138#endif 138#endif
139 139
140struct x86_io_apic_ops x86_io_apic_ops = { 140struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
141 .read = native_io_apic_read, 141 .read = native_io_apic_read,
142 .disable = native_disable_io_apic, 142 .disable = native_disable_io_apic,
143}; 143};
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af523d84d102..1e6b84b96ea6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4961,7 +4961,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
4961 avic_handle_ldr_update(vcpu); 4961 avic_handle_ldr_update(vcpu);
4962} 4962}
4963 4963
4964static struct kvm_x86_ops svm_x86_ops = { 4964static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
4965 .cpu_has_kvm_support = has_svm, 4965 .cpu_has_kvm_support = has_svm,
4966 .disabled_by_bios = is_disabled, 4966 .disabled_by_bios = is_disabled,
4967 .hardware_setup = svm_hardware_setup, 4967 .hardware_setup = svm_hardware_setup,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5cede40e2552..121fdf6e9ed0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11177,7 +11177,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
11177 ~FEATURE_CONTROL_LMCE; 11177 ~FEATURE_CONTROL_LMCE;
11178} 11178}
11179 11179
11180static struct kvm_x86_ops vmx_x86_ops = { 11180static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
11181 .cpu_has_kvm_support = cpu_has_kvm_support, 11181 .cpu_has_kvm_support = cpu_has_kvm_support,
11182 .disabled_by_bios = vmx_disabled_by_bios, 11182 .disabled_by_bios = vmx_disabled_by_bios,
11183 .hardware_setup = hardware_setup, 11183 .hardware_setup = hardware_setup,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index dc8023060456..0b92fce3e6c0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code,
753 return; 753 return;
754 } 754 }
755 755
756#ifdef CONFIG_VMAP_STACK
757 /*
758 * Stack overflow? During boot, we can fault near the initial
759 * stack in the direct map, but that's not an overflow -- check
760 * that we're in vmalloc space to avoid this.
761 */
762 if (is_vmalloc_addr((void *)address) &&
763 (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
764 address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
765 register void *__sp asm("rsp");
766 unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
767 /*
768 * We're likely to be running with very little stack space
769 * left. It's plausible that we'd hit this condition but
770 * double-fault even before we get this far, in which case
771 * we're fine: the double-fault handler will deal with it.
772 *
773 * We don't want to make it all the way into the oops code
774 * and then double-fault, though, because we're likely to
775 * break the console driver and lose most of the stack dump.
776 */
777 asm volatile ("movq %[stack], %%rsp\n\t"
778 "call handle_stack_overflow\n\t"
779 "1: jmp 1b"
780 : "+r" (__sp)
781 : "D" ("kernel stack overflow (page fault)"),
782 "S" (regs), "d" (address),
783 [stack] "rm" (stack));
784 unreachable();
785 }
786#endif
787
756 /* 788 /*
757 * 32-bit: 789 * 32-bit:
758 * 790 *
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index bda8d5eef04d..ddd2661c4502 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -40,17 +40,26 @@
40 * You need to add an if/def entry if you introduce a new memory region 40 * You need to add an if/def entry if you introduce a new memory region
41 * compatible with KASLR. Your entry must be in logical order with memory 41 * compatible with KASLR. Your entry must be in logical order with memory
42 * layout. For example, ESPFIX is before EFI because its virtual address is 42 * layout. For example, ESPFIX is before EFI because its virtual address is
43 * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to 43 * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
44 * ensure that this order is correct and won't be changed. 44 * ensure that this order is correct and won't be changed.
45 */ 45 */
46static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; 46static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
47static const unsigned long vaddr_end = VMEMMAP_START; 47
48#if defined(CONFIG_X86_ESPFIX64)
49static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
50#elif defined(CONFIG_EFI)
51static const unsigned long vaddr_end = EFI_VA_START;
52#else
53static const unsigned long vaddr_end = __START_KERNEL_map;
54#endif
48 55
49/* Default values */ 56/* Default values */
50unsigned long page_offset_base = __PAGE_OFFSET_BASE; 57unsigned long page_offset_base = __PAGE_OFFSET_BASE;
51EXPORT_SYMBOL(page_offset_base); 58EXPORT_SYMBOL(page_offset_base);
52unsigned long vmalloc_base = __VMALLOC_BASE; 59unsigned long vmalloc_base = __VMALLOC_BASE;
53EXPORT_SYMBOL(vmalloc_base); 60EXPORT_SYMBOL(vmalloc_base);
61unsigned long vmemmap_base = __VMEMMAP_BASE;
62EXPORT_SYMBOL(vmemmap_base);
54 63
55/* 64/*
56 * Memory regions randomized by KASLR (except modules that use a separate logic 65 * Memory regions randomized by KASLR (except modules that use a separate logic
@@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region {
63} kaslr_regions[] = { 72} kaslr_regions[] = {
64 { &page_offset_base, 64/* Maximum */ }, 73 { &page_offset_base, 64/* Maximum */ },
65 { &vmalloc_base, VMALLOC_SIZE_TB }, 74 { &vmalloc_base, VMALLOC_SIZE_TB },
75 { &vmemmap_base, 1 },
66}; 76};
67 77
68/* Get size in bytes used by the memory region */ 78/* Get size in bytes used by the memory region */
@@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void)
89 struct rnd_state rand_state; 99 struct rnd_state rand_state;
90 unsigned long remain_entropy; 100 unsigned long remain_entropy;
91 101
102 /*
103 * All these BUILD_BUG_ON checks ensures the memory layout is
104 * consistent with the vaddr_start/vaddr_end variables.
105 */
106 BUILD_BUG_ON(vaddr_start >= vaddr_end);
107 BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
108 vaddr_end >= EFI_VA_START);
109 BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
110 config_enabled(CONFIG_EFI)) &&
111 vaddr_end >= __START_KERNEL_map);
112 BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
113
92 if (!kaslr_memory_enabled()) 114 if (!kaslr_memory_enabled())
93 return; 115 return;
94 116
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4dbe65622810..a7655f6caf7d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
77 unsigned cpu = smp_processor_id(); 77 unsigned cpu = smp_processor_id();
78 78
79 if (likely(prev != next)) { 79 if (likely(prev != next)) {
80 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
81 /*
82 * If our current stack is in vmalloc space and isn't
83 * mapped in the new pgd, we'll double-fault. Forcibly
84 * map it.
85 */
86 unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
87
88 pgd_t *pgd = next->pgd + stack_pgd_index;
89
90 if (unlikely(pgd_none(*pgd)))
91 set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
92 }
93
80#ifdef CONFIG_SMP 94#ifdef CONFIG_SMP
81 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); 95 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
82 this_cpu_write(cpu_tlbstate.active_mm, next); 96 this_cpu_write(cpu_tlbstate.active_mm, next);
83#endif 97#endif
98
84 cpumask_set_cpu(cpu, mm_cpumask(next)); 99 cpumask_set_cpu(cpu, mm_cpumask(next));
85 100
86 /* 101 /*
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index cb31a4440e58..a2488b6e27d6 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,27 +16,7 @@
16 16
17#include <asm/ptrace.h> 17#include <asm/ptrace.h>
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19#include <asm/unwind.h>
20static int backtrace_stack(void *data, char *name)
21{
22 /* Yes, we want all stacks */
23 return 0;
24}
25
26static int backtrace_address(void *data, unsigned long addr, int reliable)
27{
28 unsigned int *depth = data;
29
30 if ((*depth)--)
31 oprofile_add_trace(addr);
32 return 0;
33}
34
35static struct stacktrace_ops backtrace_ops = {
36 .stack = backtrace_stack,
37 .address = backtrace_address,
38 .walk_stack = print_context_stack,
39};
40 20
41#ifdef CONFIG_COMPAT 21#ifdef CONFIG_COMPAT
42static struct stack_frame_ia32 * 22static struct stack_frame_ia32 *
@@ -113,10 +93,29 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
113 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); 93 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
114 94
115 if (!user_mode(regs)) { 95 if (!user_mode(regs)) {
116 unsigned long stack = kernel_stack_pointer(regs); 96 struct unwind_state state;
117 if (depth) 97 unsigned long addr;
118 dump_trace(NULL, regs, (unsigned long *)stack, 0, 98
119 &backtrace_ops, &depth); 99 if (!depth)
100 return;
101
102 oprofile_add_trace(regs->ip);
103
104 if (!--depth)
105 return;
106
107 for (unwind_start(&state, current, regs, NULL);
108 !unwind_done(&state); unwind_next_frame(&state)) {
109 addr = unwind_get_return_address(&state);
110 if (!addr)
111 break;
112
113 oprofile_add_trace(addr);
114
115 if (!--depth)
116 break;
117 }
118
120 return; 119 return;
121 } 120 }
122 121
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 9770e55e768f..1d97cea3b3a4 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -120,9 +120,12 @@ static unsigned long __init bios32_service(unsigned long service)
120static struct { 120static struct {
121 unsigned long address; 121 unsigned long address;
122 unsigned short segment; 122 unsigned short segment;
123} pci_indirect = { 0, __KERNEL_CS }; 123} pci_indirect __ro_after_init = {
124 .address = 0,
125 .segment = __KERNEL_CS,
126};
124 127
125static int pci_bios_present; 128static int pci_bios_present __ro_after_init;
126 129
127static int __init check_pcibios(void) 130static int __init check_pcibios(void)
128{ 131{
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index b12c26e2e309..53cace2ec0e2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -130,7 +130,7 @@ static void __save_processor_state(struct saved_context *ctxt)
130 ctxt->cr0 = read_cr0(); 130 ctxt->cr0 = read_cr0();
131 ctxt->cr2 = read_cr2(); 131 ctxt->cr2 = read_cr2();
132 ctxt->cr3 = read_cr3(); 132 ctxt->cr3 = read_cr3();
133 ctxt->cr4 = __read_cr4_safe(); 133 ctxt->cr4 = __read_cr4();
134#ifdef CONFIG_X86_64 134#ifdef CONFIG_X86_64
135 ctxt->cr8 = read_cr8(); 135 ctxt->cr8 = read_cr8();
136#endif 136#endif
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index a7ef7b131e25..5766ead6fdb9 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -194,7 +194,7 @@ int peek_user(struct task_struct *child, long addr, long data)
194 194
195static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) 195static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
196{ 196{
197 int err, n, cpu = ((struct thread_info *) child->stack)->cpu; 197 int err, n, cpu = task_cpu(child);
198 struct user_i387_struct fpregs; 198 struct user_i387_struct fpregs;
199 199
200 err = save_i387_registers(userspace_pid[cpu], 200 err = save_i387_registers(userspace_pid[cpu],
@@ -211,7 +211,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
211 211
212static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) 212static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
213{ 213{
214 int n, cpu = ((struct thread_info *) child->stack)->cpu; 214 int n, cpu = task_cpu(child);
215 struct user_i387_struct fpregs; 215 struct user_i387_struct fpregs;
216 216
217 n = copy_from_user(&fpregs, buf, sizeof(fpregs)); 217 n = copy_from_user(&fpregs, buf, sizeof(fpregs));
@@ -224,7 +224,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
224 224
225static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) 225static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
226{ 226{
227 int err, n, cpu = ((struct thread_info *) child->stack)->cpu; 227 int err, n, cpu = task_cpu(child);
228 struct user_fxsr_struct fpregs; 228 struct user_fxsr_struct fpregs;
229 229
230 err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); 230 err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
@@ -240,7 +240,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *
240 240
241static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) 241static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
242{ 242{
243 int n, cpu = ((struct thread_info *) child->stack)->cpu; 243 int n, cpu = task_cpu(child);
244 struct user_fxsr_struct fpregs; 244 struct user_fxsr_struct fpregs;
245 245
246 n = copy_from_user(&fpregs, buf, sizeof(fpregs)); 246 n = copy_from_user(&fpregs, buf, sizeof(fpregs));
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bc9aaba01a22..f1d2182e071f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1237,7 +1237,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
1237 .write_cr0 = xen_write_cr0, 1237 .write_cr0 = xen_write_cr0,
1238 1238
1239 .read_cr4 = native_read_cr4, 1239 .read_cr4 = native_read_cr4,
1240 .read_cr4_safe = native_read_cr4_safe,
1241 .write_cr4 = xen_write_cr4, 1240 .write_cr4 = xen_write_cr4,
1242 1241
1243#ifdef CONFIG_X86_64 1242#ifdef CONFIG_X86_64
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 96de97a46079..4025291ea0ae 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -940,15 +940,13 @@ static void build_inv_irt(struct iommu_cmd *cmd, u16 devid)
940 * Writes the command to the IOMMUs command buffer and informs the 940 * Writes the command to the IOMMUs command buffer and informs the
941 * hardware about the new command. 941 * hardware about the new command.
942 */ 942 */
943static int iommu_queue_command_sync(struct amd_iommu *iommu, 943static int __iommu_queue_command_sync(struct amd_iommu *iommu,
944 struct iommu_cmd *cmd, 944 struct iommu_cmd *cmd,
945 bool sync) 945 bool sync)
946{ 946{
947 u32 left, tail, head, next_tail; 947 u32 left, tail, head, next_tail;
948 unsigned long flags;
949 948
950again: 949again:
951 spin_lock_irqsave(&iommu->lock, flags);
952 950
953 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 951 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
954 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 952 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
@@ -957,15 +955,14 @@ again:
957 955
958 if (left <= 2) { 956 if (left <= 2) {
959 struct iommu_cmd sync_cmd; 957 struct iommu_cmd sync_cmd;
960 volatile u64 sem = 0;
961 int ret; 958 int ret;
962 959
963 build_completion_wait(&sync_cmd, (u64)&sem); 960 iommu->cmd_sem = 0;
964 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
965 961
966 spin_unlock_irqrestore(&iommu->lock, flags); 962 build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
963 copy_cmd_to_buffer(iommu, &sync_cmd, tail);
967 964
968 if ((ret = wait_on_sem(&sem)) != 0) 965 if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
969 return ret; 966 return ret;
970 967
971 goto again; 968 goto again;
@@ -976,9 +973,21 @@ again:
976 /* We need to sync now to make sure all commands are processed */ 973 /* We need to sync now to make sure all commands are processed */
977 iommu->need_sync = sync; 974 iommu->need_sync = sync;
978 975
976 return 0;
977}
978
979static int iommu_queue_command_sync(struct amd_iommu *iommu,
980 struct iommu_cmd *cmd,
981 bool sync)
982{
983 unsigned long flags;
984 int ret;
985
986 spin_lock_irqsave(&iommu->lock, flags);
987 ret = __iommu_queue_command_sync(iommu, cmd, sync);
979 spin_unlock_irqrestore(&iommu->lock, flags); 988 spin_unlock_irqrestore(&iommu->lock, flags);
980 989
981 return 0; 990 return ret;
982} 991}
983 992
984static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) 993static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
@@ -993,19 +1002,29 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
993static int iommu_completion_wait(struct amd_iommu *iommu) 1002static int iommu_completion_wait(struct amd_iommu *iommu)
994{ 1003{
995 struct iommu_cmd cmd; 1004 struct iommu_cmd cmd;
996 volatile u64 sem = 0; 1005 unsigned long flags;
997 int ret; 1006 int ret;
998 1007
999 if (!iommu->need_sync) 1008 if (!iommu->need_sync)
1000 return 0; 1009 return 0;
1001 1010
1002 build_completion_wait(&cmd, (u64)&sem);
1003 1011
1004 ret = iommu_queue_command_sync(iommu, &cmd, false); 1012 build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
1013
1014 spin_lock_irqsave(&iommu->lock, flags);
1015
1016 iommu->cmd_sem = 0;
1017
1018 ret = __iommu_queue_command_sync(iommu, &cmd, false);
1005 if (ret) 1019 if (ret)
1006 return ret; 1020 goto out_unlock;
1021
1022 ret = wait_on_sem(&iommu->cmd_sem);
1007 1023
1008 return wait_on_sem(&sem); 1024out_unlock:
1025 spin_unlock_irqrestore(&iommu->lock, flags);
1026
1027 return ret;
1009} 1028}
1010 1029
1011static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) 1030static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index caf5e3822715..9652848e3155 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -524,6 +524,8 @@ struct amd_iommu {
524 struct irq_domain *ir_domain; 524 struct irq_domain *ir_domain;
525 struct irq_domain *msi_domain; 525 struct irq_domain *msi_domain;
526#endif 526#endif
527
528 volatile u64 __aligned(8) cmd_sem;
527}; 529};
528 530
529#define ACPIHID_UID_LEN 256 531#define ACPIHID_UID_LEN 256
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ac0df4dde823..3b792ab3c0dc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -483,7 +483,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
483 save_stack_trace_tsk(task, &trace); 483 save_stack_trace_tsk(task, &trace);
484 484
485 for (i = 0; i < trace.nr_entries; i++) { 485 for (i = 0; i < trace.nr_entries; i++) {
486 seq_printf(m, "[<%pK>] %pS\n", 486 seq_printf(m, "[<%pK>] %pB\n",
487 (void *)entries[i], (void *)entries[i]); 487 (void *)entries[i], (void *)entries[i]);
488 } 488 }
489 unlock_trace(task); 489 unlock_trace(task);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7d565afe35d2..6f93ac46e7f0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -795,7 +795,12 @@ struct ftrace_ret_stack {
795 unsigned long func; 795 unsigned long func;
796 unsigned long long calltime; 796 unsigned long long calltime;
797 unsigned long long subtime; 797 unsigned long long subtime;
798#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
798 unsigned long fp; 799 unsigned long fp;
800#endif
801#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
802 unsigned long *retp;
803#endif
799}; 804};
800 805
801/* 806/*
@@ -807,7 +812,10 @@ extern void return_to_handler(void);
807 812
808extern int 813extern int
809ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, 814ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
810 unsigned long frame_pointer); 815 unsigned long frame_pointer, unsigned long *retp);
816
817unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
818 unsigned long ret, unsigned long *retp);
811 819
812/* 820/*
813 * Sometimes we don't want to trace a function with the function 821 * Sometimes we don't want to trace a function with the function
@@ -870,6 +878,13 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
870 return -1; 878 return -1;
871} 879}
872 880
881static inline unsigned long
882ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret,
883 unsigned long *retp)
884{
885 return ret;
886}
887
873static inline void pause_graph_tracing(void) { } 888static inline void pause_graph_tracing(void) { }
874static inline void unpause_graph_tracing(void) { } 889static inline void unpause_graph_tracing(void) { }
875#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 890#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f8834f820ec2..325f649d77ff 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,8 @@
15#include <net/net_namespace.h> 15#include <net/net_namespace.h>
16#include <linux/sched/rt.h> 16#include <linux/sched/rt.h>
17 17
18#include <asm/thread_info.h>
19
18#ifdef CONFIG_SMP 20#ifdef CONFIG_SMP
19# define INIT_PUSHABLE_TASKS(tsk) \ 21# define INIT_PUSHABLE_TASKS(tsk) \
20 .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), 22 .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
@@ -183,12 +185,21 @@ extern struct task_group root_task_group;
183# define INIT_KASAN(tsk) 185# define INIT_KASAN(tsk)
184#endif 186#endif
185 187
188#ifdef CONFIG_THREAD_INFO_IN_TASK
189# define INIT_TASK_TI(tsk) \
190 .thread_info = INIT_THREAD_INFO(tsk), \
191 .stack_refcount = ATOMIC_INIT(1),
192#else
193# define INIT_TASK_TI(tsk)
194#endif
195
186/* 196/*
187 * INIT_TASK is used to set up the first task table, touch at 197 * INIT_TASK is used to set up the first task table, touch at
188 * your own risk!. Base=0, limit=0x1fffff (=2MB) 198 * your own risk!. Base=0, limit=0x1fffff (=2MB)
189 */ 199 */
190#define INIT_TASK(tsk) \ 200#define INIT_TASK(tsk) \
191{ \ 201{ \
202 INIT_TASK_TI(tsk) \
192 .state = 0, \ 203 .state = 0, \
193 .stack = init_stack, \ 204 .stack = init_stack, \
194 .usage = ATOMIC_INIT(2), \ 205 .usage = ATOMIC_INIT(2), \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f76d75fc9eaf..7543a476178b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1471,6 +1471,13 @@ struct tlbflush_unmap_batch {
1471}; 1471};
1472 1472
1473struct task_struct { 1473struct task_struct {
1474#ifdef CONFIG_THREAD_INFO_IN_TASK
1475 /*
1476 * For reasons of header soup (see current_thread_info()), this
1477 * must be the first element of task_struct.
1478 */
1479 struct thread_info thread_info;
1480#endif
1474 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1481 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
1475 void *stack; 1482 void *stack;
1476 atomic_t usage; 1483 atomic_t usage;
@@ -1480,6 +1487,9 @@ struct task_struct {
1480#ifdef CONFIG_SMP 1487#ifdef CONFIG_SMP
1481 struct llist_node wake_entry; 1488 struct llist_node wake_entry;
1482 int on_cpu; 1489 int on_cpu;
1490#ifdef CONFIG_THREAD_INFO_IN_TASK
1491 unsigned int cpu; /* current CPU */
1492#endif
1483 unsigned int wakee_flips; 1493 unsigned int wakee_flips;
1484 unsigned long wakee_flip_decay_ts; 1494 unsigned long wakee_flip_decay_ts;
1485 struct task_struct *last_wakee; 1495 struct task_struct *last_wakee;
@@ -1936,6 +1946,13 @@ struct task_struct {
1936#ifdef CONFIG_MMU 1946#ifdef CONFIG_MMU
1937 struct task_struct *oom_reaper_list; 1947 struct task_struct *oom_reaper_list;
1938#endif 1948#endif
1949#ifdef CONFIG_VMAP_STACK
1950 struct vm_struct *stack_vm_area;
1951#endif
1952#ifdef CONFIG_THREAD_INFO_IN_TASK
1953 /* A live task holds one reference. */
1954 atomic_t stack_refcount;
1955#endif
1939/* CPU-specific state of this task */ 1956/* CPU-specific state of this task */
1940 struct thread_struct thread; 1957 struct thread_struct thread;
1941/* 1958/*
@@ -1952,6 +1969,18 @@ extern int arch_task_struct_size __read_mostly;
1952# define arch_task_struct_size (sizeof(struct task_struct)) 1969# define arch_task_struct_size (sizeof(struct task_struct))
1953#endif 1970#endif
1954 1971
1972#ifdef CONFIG_VMAP_STACK
1973static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
1974{
1975 return t->stack_vm_area;
1976}
1977#else
1978static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
1979{
1980 return NULL;
1981}
1982#endif
1983
1955/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1984/* Future-safe accessor for struct task_struct's cpus_allowed. */
1956#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) 1985#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
1957 1986
@@ -2586,7 +2615,9 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p);
2586void yield(void); 2615void yield(void);
2587 2616
2588union thread_union { 2617union thread_union {
2618#ifndef CONFIG_THREAD_INFO_IN_TASK
2589 struct thread_info thread_info; 2619 struct thread_info thread_info;
2620#endif
2590 unsigned long stack[THREAD_SIZE/sizeof(long)]; 2621 unsigned long stack[THREAD_SIZE/sizeof(long)];
2591}; 2622};
2592 2623
@@ -3074,10 +3105,34 @@ static inline void threadgroup_change_end(struct task_struct *tsk)
3074 cgroup_threadgroup_change_end(tsk); 3105 cgroup_threadgroup_change_end(tsk);
3075} 3106}
3076 3107
3077#ifndef __HAVE_THREAD_FUNCTIONS 3108#ifdef CONFIG_THREAD_INFO_IN_TASK
3109
3110static inline struct thread_info *task_thread_info(struct task_struct *task)
3111{
3112 return &task->thread_info;
3113}
3114
3115/*
3116 * When accessing the stack of a non-current task that might exit, use
3117 * try_get_task_stack() instead. task_stack_page will return a pointer
3118 * that could get freed out from under you.
3119 */
3120static inline void *task_stack_page(const struct task_struct *task)
3121{
3122 return task->stack;
3123}
3124
3125#define setup_thread_stack(new,old) do { } while(0)
3126
3127static inline unsigned long *end_of_stack(const struct task_struct *task)
3128{
3129 return task->stack;
3130}
3131
3132#elif !defined(__HAVE_THREAD_FUNCTIONS)
3078 3133
3079#define task_thread_info(task) ((struct thread_info *)(task)->stack) 3134#define task_thread_info(task) ((struct thread_info *)(task)->stack)
3080#define task_stack_page(task) ((task)->stack) 3135#define task_stack_page(task) ((void *)(task)->stack)
3081 3136
3082static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) 3137static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
3083{ 3138{
@@ -3104,6 +3159,24 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
3104} 3159}
3105 3160
3106#endif 3161#endif
3162
3163#ifdef CONFIG_THREAD_INFO_IN_TASK
3164static inline void *try_get_task_stack(struct task_struct *tsk)
3165{
3166 return atomic_inc_not_zero(&tsk->stack_refcount) ?
3167 task_stack_page(tsk) : NULL;
3168}
3169
3170extern void put_task_stack(struct task_struct *tsk);
3171#else
3172static inline void *try_get_task_stack(struct task_struct *tsk)
3173{
3174 return task_stack_page(tsk);
3175}
3176
3177static inline void put_task_stack(struct task_struct *tsk) {}
3178#endif
3179
3107#define task_stack_end_corrupted(task) \ 3180#define task_stack_end_corrupted(task) \
3108 (*(end_of_stack(task)) != STACK_END_MAGIC) 3181 (*(end_of_stack(task)) != STACK_END_MAGIC)
3109 3182
@@ -3390,7 +3463,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
3390 3463
3391static inline unsigned int task_cpu(const struct task_struct *p) 3464static inline unsigned int task_cpu(const struct task_struct *p)
3392{ 3465{
3466#ifdef CONFIG_THREAD_INFO_IN_TASK
3467 return p->cpu;
3468#else
3393 return task_thread_info(p)->cpu; 3469 return task_thread_info(p)->cpu;
3470#endif
3394} 3471}
3395 3472
3396static inline int task_node(const struct task_struct *p) 3473static inline int task_node(const struct task_struct *p)
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 2b5b10eed74f..45f004e9cc59 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -13,6 +13,21 @@
13struct timespec; 13struct timespec;
14struct compat_timespec; 14struct compat_timespec;
15 15
16#ifdef CONFIG_THREAD_INFO_IN_TASK
17struct thread_info {
18 unsigned long flags; /* low level flags */
19};
20
21#define INIT_THREAD_INFO(tsk) \
22{ \
23 .flags = 0, \
24}
25#endif
26
27#ifdef CONFIG_THREAD_INFO_IN_TASK
28#define current_thread_info() ((struct thread_info *)current)
29#endif
30
16/* 31/*
17 * System call restart block. 32 * System call restart block.
18 */ 33 */
diff --git a/init/Kconfig b/init/Kconfig
index cac3f096050d..3b9a47fe843b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -26,6 +26,16 @@ config IRQ_WORK
26config BUILDTIME_EXTABLE_SORT 26config BUILDTIME_EXTABLE_SORT
27 bool 27 bool
28 28
29config THREAD_INFO_IN_TASK
30 bool
31 help
32 Select this to move thread_info off the stack into task_struct. To
33 make this work, an arch will need to remove all thread_info fields
34 except flags and fix any runtime bugs.
35
36 One subtle change that will be needed is to use try_get_task_stack()
37 and put_task_stack() in save_thread_stack_tsk() and get_wchan().
38
29menu "General setup" 39menu "General setup"
30 40
31config BROKEN 41config BROKEN
diff --git a/init/init_task.c b/init/init_task.c
index ba0a7f362d9e..11f83be1fa79 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -22,5 +22,8 @@ EXPORT_SYMBOL(init_task);
22 * Initial thread structure. Alignment of this is handled by a special 22 * Initial thread structure. Alignment of this is handled by a special
23 * linker map entry. 23 * linker map entry.
24 */ 24 */
25union thread_union init_thread_union __init_task_data = 25union thread_union init_thread_union __init_task_data = {
26 { INIT_THREAD_INFO(init_task) }; 26#ifndef CONFIG_THREAD_INFO_IN_TASK
27 INIT_THREAD_INFO(init_task)
28#endif
29};
diff --git a/kernel/fork.c b/kernel/fork.c
index beb31725f7e2..c060c7e7c247 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -158,19 +158,83 @@ void __weak arch_release_thread_stack(unsigned long *stack)
158 * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a 158 * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
159 * kmemcache based allocator. 159 * kmemcache based allocator.
160 */ 160 */
161# if THREAD_SIZE >= PAGE_SIZE 161# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
162static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, 162
163 int node) 163#ifdef CONFIG_VMAP_STACK
164/*
165 * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
166 * flush. Try to minimize the number of calls by caching stacks.
167 */
168#define NR_CACHED_STACKS 2
169static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
170#endif
171
172static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
164{ 173{
174#ifdef CONFIG_VMAP_STACK
175 void *stack;
176 int i;
177
178 local_irq_disable();
179 for (i = 0; i < NR_CACHED_STACKS; i++) {
180 struct vm_struct *s = this_cpu_read(cached_stacks[i]);
181
182 if (!s)
183 continue;
184 this_cpu_write(cached_stacks[i], NULL);
185
186 tsk->stack_vm_area = s;
187 local_irq_enable();
188 return s->addr;
189 }
190 local_irq_enable();
191
192 stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
193 VMALLOC_START, VMALLOC_END,
194 THREADINFO_GFP | __GFP_HIGHMEM,
195 PAGE_KERNEL,
196 0, node, __builtin_return_address(0));
197
198 /*
199 * We can't call find_vm_area() in interrupt context, and
200 * free_thread_stack() can be called in interrupt context,
201 * so cache the vm_struct.
202 */
203 if (stack)
204 tsk->stack_vm_area = find_vm_area(stack);
205 return stack;
206#else
165 struct page *page = alloc_pages_node(node, THREADINFO_GFP, 207 struct page *page = alloc_pages_node(node, THREADINFO_GFP,
166 THREAD_SIZE_ORDER); 208 THREAD_SIZE_ORDER);
167 209
168 return page ? page_address(page) : NULL; 210 return page ? page_address(page) : NULL;
211#endif
169} 212}
170 213
171static inline void free_thread_stack(unsigned long *stack) 214static inline void free_thread_stack(struct task_struct *tsk)
172{ 215{
173 __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER); 216#ifdef CONFIG_VMAP_STACK
217 if (task_stack_vm_area(tsk)) {
218 unsigned long flags;
219 int i;
220
221 local_irq_save(flags);
222 for (i = 0; i < NR_CACHED_STACKS; i++) {
223 if (this_cpu_read(cached_stacks[i]))
224 continue;
225
226 this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
227 local_irq_restore(flags);
228 return;
229 }
230 local_irq_restore(flags);
231
232 vfree(tsk->stack);
233 return;
234 }
235#endif
236
237 __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
174} 238}
175# else 239# else
176static struct kmem_cache *thread_stack_cache; 240static struct kmem_cache *thread_stack_cache;
@@ -181,9 +245,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
181 return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); 245 return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
182} 246}
183 247
184static void free_thread_stack(unsigned long *stack) 248static void free_thread_stack(struct task_struct *tsk)
185{ 249{
186 kmem_cache_free(thread_stack_cache, stack); 250 kmem_cache_free(thread_stack_cache, tsk->stack);
187} 251}
188 252
189void thread_stack_cache_init(void) 253void thread_stack_cache_init(void)
@@ -213,24 +277,76 @@ struct kmem_cache *vm_area_cachep;
213/* SLAB cache for mm_struct structures (tsk->mm) */ 277/* SLAB cache for mm_struct structures (tsk->mm) */
214static struct kmem_cache *mm_cachep; 278static struct kmem_cache *mm_cachep;
215 279
216static void account_kernel_stack(unsigned long *stack, int account) 280static void account_kernel_stack(struct task_struct *tsk, int account)
217{ 281{
218 /* All stack pages are in the same zone and belong to the same memcg. */ 282 void *stack = task_stack_page(tsk);
219 struct page *first_page = virt_to_page(stack); 283 struct vm_struct *vm = task_stack_vm_area(tsk);
284
285 BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
286
287 if (vm) {
288 int i;
220 289
221 mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, 290 BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
222 THREAD_SIZE / 1024 * account);
223 291
224 memcg_kmem_update_page_stat( 292 for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
225 first_page, MEMCG_KERNEL_STACK_KB, 293 mod_zone_page_state(page_zone(vm->pages[i]),
226 account * (THREAD_SIZE / 1024)); 294 NR_KERNEL_STACK_KB,
295 PAGE_SIZE / 1024 * account);
296 }
297
298 /* All stack pages belong to the same memcg. */
299 memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
300 account * (THREAD_SIZE / 1024));
301 } else {
302 /*
303 * All stack pages are in the same zone and belong to the
304 * same memcg.
305 */
306 struct page *first_page = virt_to_page(stack);
307
308 mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
309 THREAD_SIZE / 1024 * account);
310
311 memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
312 account * (THREAD_SIZE / 1024));
313 }
227} 314}
228 315
229void free_task(struct task_struct *tsk) 316static void release_task_stack(struct task_struct *tsk)
230{ 317{
231 account_kernel_stack(tsk->stack, -1); 318 account_kernel_stack(tsk, -1);
232 arch_release_thread_stack(tsk->stack); 319 arch_release_thread_stack(tsk->stack);
233 free_thread_stack(tsk->stack); 320 free_thread_stack(tsk);
321 tsk->stack = NULL;
322#ifdef CONFIG_VMAP_STACK
323 tsk->stack_vm_area = NULL;
324#endif
325}
326
327#ifdef CONFIG_THREAD_INFO_IN_TASK
328void put_task_stack(struct task_struct *tsk)
329{
330 if (atomic_dec_and_test(&tsk->stack_refcount))
331 release_task_stack(tsk);
332}
333#endif
334
335void free_task(struct task_struct *tsk)
336{
337#ifndef CONFIG_THREAD_INFO_IN_TASK
338 /*
339 * The task is finally done with both the stack and thread_info,
340 * so free both.
341 */
342 release_task_stack(tsk);
343#else
344 /*
345 * If the task had a separate stack allocation, it should be gone
346 * by now.
347 */
348 WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
349#endif
234 rt_mutex_debug_task_free(tsk); 350 rt_mutex_debug_task_free(tsk);
235 ftrace_graph_exit_task(tsk); 351 ftrace_graph_exit_task(tsk);
236 put_seccomp_filter(tsk); 352 put_seccomp_filter(tsk);
@@ -342,6 +458,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
342{ 458{
343 struct task_struct *tsk; 459 struct task_struct *tsk;
344 unsigned long *stack; 460 unsigned long *stack;
461 struct vm_struct *stack_vm_area;
345 int err; 462 int err;
346 463
347 if (node == NUMA_NO_NODE) 464 if (node == NUMA_NO_NODE)
@@ -354,11 +471,26 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
354 if (!stack) 471 if (!stack)
355 goto free_tsk; 472 goto free_tsk;
356 473
474 stack_vm_area = task_stack_vm_area(tsk);
475
357 err = arch_dup_task_struct(tsk, orig); 476 err = arch_dup_task_struct(tsk, orig);
477
478 /*
479 * arch_dup_task_struct() clobbers the stack-related fields. Make
480 * sure they're properly initialized before using any stack-related
481 * functions again.
482 */
483 tsk->stack = stack;
484#ifdef CONFIG_VMAP_STACK
485 tsk->stack_vm_area = stack_vm_area;
486#endif
487#ifdef CONFIG_THREAD_INFO_IN_TASK
488 atomic_set(&tsk->stack_refcount, 1);
489#endif
490
358 if (err) 491 if (err)
359 goto free_stack; 492 goto free_stack;
360 493
361 tsk->stack = stack;
362#ifdef CONFIG_SECCOMP 494#ifdef CONFIG_SECCOMP
363 /* 495 /*
364 * We must handle setting up seccomp filters once we're under 496 * We must handle setting up seccomp filters once we're under
@@ -390,14 +522,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
390 tsk->task_frag.page = NULL; 522 tsk->task_frag.page = NULL;
391 tsk->wake_q.next = NULL; 523 tsk->wake_q.next = NULL;
392 524
393 account_kernel_stack(stack, 1); 525 account_kernel_stack(tsk, 1);
394 526
395 kcov_task_init(tsk); 527 kcov_task_init(tsk);
396 528
397 return tsk; 529 return tsk;
398 530
399free_stack: 531free_stack:
400 free_thread_stack(stack); 532 free_thread_stack(tsk);
401free_tsk: 533free_tsk:
402 free_task_struct(tsk); 534 free_task_struct(tsk);
403 return NULL; 535 return NULL;
@@ -1715,6 +1847,7 @@ bad_fork_cleanup_count:
1715 atomic_dec(&p->cred->user->processes); 1847 atomic_dec(&p->cred->user->processes);
1716 exit_creds(p); 1848 exit_creds(p);
1717bad_fork_free: 1849bad_fork_free:
1850 put_task_stack(p);
1718 free_task(p); 1851 free_task(p);
1719fork_out: 1852fork_out:
1720 return ERR_PTR(retval); 1853 return ERR_PTR(retval);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9ff173dca1ae..4ab4c3766a80 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -64,7 +64,7 @@ static inline struct kthread *to_kthread(struct task_struct *k)
64static struct kthread *to_live_kthread(struct task_struct *k) 64static struct kthread *to_live_kthread(struct task_struct *k)
65{ 65{
66 struct completion *vfork = ACCESS_ONCE(k->vfork_done); 66 struct completion *vfork = ACCESS_ONCE(k->vfork_done);
67 if (likely(vfork)) 67 if (likely(vfork) && try_get_task_stack(k))
68 return __to_kthread(vfork); 68 return __to_kthread(vfork);
69 return NULL; 69 return NULL;
70} 70}
@@ -425,8 +425,10 @@ void kthread_unpark(struct task_struct *k)
425{ 425{
426 struct kthread *kthread = to_live_kthread(k); 426 struct kthread *kthread = to_live_kthread(k);
427 427
428 if (kthread) 428 if (kthread) {
429 __kthread_unpark(k, kthread); 429 __kthread_unpark(k, kthread);
430 put_task_stack(k);
431 }
430} 432}
431EXPORT_SYMBOL_GPL(kthread_unpark); 433EXPORT_SYMBOL_GPL(kthread_unpark);
432 434
@@ -455,6 +457,7 @@ int kthread_park(struct task_struct *k)
455 wait_for_completion(&kthread->parked); 457 wait_for_completion(&kthread->parked);
456 } 458 }
457 } 459 }
460 put_task_stack(k);
458 ret = 0; 461 ret = 0;
459 } 462 }
460 return ret; 463 return ret;
@@ -490,6 +493,7 @@ int kthread_stop(struct task_struct *k)
490 __kthread_unpark(k, kthread); 493 __kthread_unpark(k, kthread);
491 wake_up_process(k); 494 wake_up_process(k);
492 wait_for_completion(&kthread->exited); 495 wait_for_completion(&kthread->exited);
496 put_task_stack(k);
493 } 497 }
494 ret = k->exit_code; 498 ret = k->exit_code;
495 put_task_struct(k); 499 put_task_struct(k);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fac6492f0b98..94732d1ab00a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2781,6 +2781,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2781 * task and put them back on the free list. 2781 * task and put them back on the free list.
2782 */ 2782 */
2783 kprobe_flush_task(prev); 2783 kprobe_flush_task(prev);
2784
2785 /* Task is done with its stack. */
2786 put_task_stack(prev);
2787
2784 put_task_struct(prev); 2788 put_task_struct(prev);
2785 } 2789 }
2786 2790
@@ -3403,7 +3407,6 @@ static void __sched notrace __schedule(bool preempt)
3403 3407
3404 balance_callback(rq); 3408 balance_callback(rq);
3405} 3409}
3406STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
3407 3410
3408void __noreturn do_task_dead(void) 3411void __noreturn do_task_dead(void)
3409{ 3412{
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 58df5590d028..055f935d4421 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1021,7 +1021,11 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1021 * per-task data have been completed by this moment. 1021 * per-task data have been completed by this moment.
1022 */ 1022 */
1023 smp_wmb(); 1023 smp_wmb();
1024#ifdef CONFIG_THREAD_INFO_IN_TASK
1025 p->cpu = cpu;
1026#else
1024 task_thread_info(p)->cpu = cpu; 1027 task_thread_info(p)->cpu = cpu;
1028#endif
1025 p->wake_cpu = cpu; 1029 p->wake_cpu = cpu;
1026#endif 1030#endif
1027} 1031}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f4b86e8ca1e7..ba3326785ca4 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -24,11 +24,6 @@ config HAVE_FUNCTION_GRAPH_TRACER
24 help 24 help
25 See Documentation/trace/ftrace-design.txt 25 See Documentation/trace/ftrace-design.txt
26 26
27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool
29 help
30 See Documentation/trace/ftrace-design.txt
31
32config HAVE_DYNAMIC_FTRACE 27config HAVE_DYNAMIC_FTRACE
33 bool 28 bool
34 help 29 help
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 7363ccf79512..0cbe38a844fa 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -119,7 +119,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
119/* Add a function return address to the trace stack on thread info.*/ 119/* Add a function return address to the trace stack on thread info.*/
120int 120int
121ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, 121ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
122 unsigned long frame_pointer) 122 unsigned long frame_pointer, unsigned long *retp)
123{ 123{
124 unsigned long long calltime; 124 unsigned long long calltime;
125 int index; 125 int index;
@@ -171,7 +171,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
171 current->ret_stack[index].func = func; 171 current->ret_stack[index].func = func;
172 current->ret_stack[index].calltime = calltime; 172 current->ret_stack[index].calltime = calltime;
173 current->ret_stack[index].subtime = 0; 173 current->ret_stack[index].subtime = 0;
174#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
174 current->ret_stack[index].fp = frame_pointer; 175 current->ret_stack[index].fp = frame_pointer;
176#endif
177#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
178 current->ret_stack[index].retp = retp;
179#endif
175 *depth = current->curr_ret_stack; 180 *depth = current->curr_ret_stack;
176 181
177 return 0; 182 return 0;
@@ -204,7 +209,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
204 return; 209 return;
205 } 210 }
206 211
207#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) 212#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
208 /* 213 /*
209 * The arch may choose to record the frame pointer used 214 * The arch may choose to record the frame pointer used
210 * and check it here to make sure that it is what we expect it 215 * and check it here to make sure that it is what we expect it
@@ -279,6 +284,64 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
279 return ret; 284 return ret;
280} 285}
281 286
287/**
288 * ftrace_graph_ret_addr - convert a potentially modified stack return address
289 * to its original value
290 *
291 * This function can be called by stack unwinding code to convert a found stack
292 * return address ('ret') to its original value, in case the function graph
293 * tracer has modified it to be 'return_to_handler'. If the address hasn't
294 * been modified, the unchanged value of 'ret' is returned.
295 *
296 * 'idx' is a state variable which should be initialized by the caller to zero
297 * before the first call.
298 *
299 * 'retp' is a pointer to the return address on the stack. It's ignored if
300 * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
301 */
302#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
303unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
304 unsigned long ret, unsigned long *retp)
305{
306 int index = task->curr_ret_stack;
307 int i;
308
309 if (ret != (unsigned long)return_to_handler)
310 return ret;
311
312 if (index < -1)
313 index += FTRACE_NOTRACE_DEPTH;
314
315 if (index < 0)
316 return ret;
317
318 for (i = 0; i <= index; i++)
319 if (task->ret_stack[i].retp == retp)
320 return task->ret_stack[i].ret;
321
322 return ret;
323}
324#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
325unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
326 unsigned long ret, unsigned long *retp)
327{
328 int task_idx;
329
330 if (ret != (unsigned long)return_to_handler)
331 return ret;
332
333 task_idx = task->curr_ret_stack;
334
335 if (!task->ret_stack || task_idx < *idx)
336 return ret;
337
338 task_idx -= *idx;
339 (*idx)++;
340
341 return task->ret_stack[task_idx].ret;
342}
343#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
344
282int __trace_graph_entry(struct trace_array *tr, 345int __trace_graph_entry(struct trace_array *tr,
283 struct ftrace_graph_ent *trace, 346 struct ftrace_graph_ent *trace,
284 unsigned long flags, 347 unsigned long flags,
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fcfa1939ac41..06f02f6aecd2 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,6 +22,7 @@
22#include <linux/stacktrace.h> 22#include <linux/stacktrace.h>
23#include <linux/dma-debug.h> 23#include <linux/dma-debug.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/vmalloc.h>
25#include <linux/debugfs.h> 26#include <linux/debugfs.h>
26#include <linux/uaccess.h> 27#include <linux/uaccess.h>
27#include <linux/export.h> 28#include <linux/export.h>
@@ -1164,11 +1165,32 @@ static void check_unmap(struct dma_debug_entry *ref)
1164 put_hash_bucket(bucket, &flags); 1165 put_hash_bucket(bucket, &flags);
1165} 1166}
1166 1167
1167static void check_for_stack(struct device *dev, void *addr) 1168static void check_for_stack(struct device *dev,
1169 struct page *page, size_t offset)
1168{ 1170{
1169 if (object_is_on_stack(addr)) 1171 void *addr;
1170 err_printk(dev, NULL, "DMA-API: device driver maps memory from " 1172 struct vm_struct *stack_vm_area = task_stack_vm_area(current);
1171 "stack [addr=%p]\n", addr); 1173
1174 if (!stack_vm_area) {
1175 /* Stack is direct-mapped. */
1176 if (PageHighMem(page))
1177 return;
1178 addr = page_address(page) + offset;
1179 if (object_is_on_stack(addr))
1180 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
1181 } else {
1182 /* Stack is vmalloced. */
1183 int i;
1184
1185 for (i = 0; i < stack_vm_area->nr_pages; i++) {
1186 if (page != stack_vm_area->pages[i])
1187 continue;
1188
1189 addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
1190 err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
1191 break;
1192 }
1193 }
1172} 1194}
1173 1195
1174static inline bool overlap(void *addr, unsigned long len, void *start, void *end) 1196static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
@@ -1291,10 +1313,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
1291 if (map_single) 1313 if (map_single)
1292 entry->type = dma_debug_single; 1314 entry->type = dma_debug_single;
1293 1315
1316 check_for_stack(dev, page, offset);
1317
1294 if (!PageHighMem(page)) { 1318 if (!PageHighMem(page)) {
1295 void *addr = page_address(page) + offset; 1319 void *addr = page_address(page) + offset;
1296 1320
1297 check_for_stack(dev, addr);
1298 check_for_illegal_area(dev, addr, size); 1321 check_for_illegal_area(dev, addr, size);
1299 } 1322 }
1300 1323
@@ -1386,8 +1409,9 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
1386 entry->sg_call_ents = nents; 1409 entry->sg_call_ents = nents;
1387 entry->sg_mapped_ents = mapped_ents; 1410 entry->sg_mapped_ents = mapped_ents;
1388 1411
1412 check_for_stack(dev, sg_page(s), s->offset);
1413
1389 if (!PageHighMem(sg_page(s))) { 1414 if (!PageHighMem(sg_page(s))) {
1390 check_for_stack(dev, sg_virt(s));
1391 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); 1415 check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
1392 } 1416 }
1393 1417
diff --git a/lib/syscall.c b/lib/syscall.c
index e30e03932480..63239e097b13 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -7,9 +7,19 @@ static int collect_syscall(struct task_struct *target, long *callno,
7 unsigned long args[6], unsigned int maxargs, 7 unsigned long args[6], unsigned int maxargs,
8 unsigned long *sp, unsigned long *pc) 8 unsigned long *sp, unsigned long *pc)
9{ 9{
10 struct pt_regs *regs = task_pt_regs(target); 10 struct pt_regs *regs;
11 if (unlikely(!regs)) 11
12 if (!try_get_task_stack(target)) {
13 /* Task has no stack, so the task isn't in a syscall. */
14 *callno = -1;
15 return 0;
16 }
17
18 regs = task_pt_regs(target);
19 if (unlikely(!regs)) {
20 put_task_stack(target);
12 return -EAGAIN; 21 return -EAGAIN;
22 }
13 23
14 *sp = user_stack_pointer(regs); 24 *sp = user_stack_pointer(regs);
15 *pc = instruction_pointer(regs); 25 *pc = instruction_pointer(regs);
@@ -18,6 +28,7 @@ static int collect_syscall(struct task_struct *target, long *callno,
18 if (*callno != -1L && maxargs > 0) 28 if (*callno != -1L && maxargs > 0)
19 syscall_get_arguments(target, regs, 0, maxargs, args); 29 syscall_get_arguments(target, regs, 0, maxargs, args);
20 30
31 put_task_stack(target);
21 return 0; 32 return 0;
22} 33}
23 34
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 421456784bc6..b037ce9cf116 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -147,7 +147,7 @@ static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *))
147 if (args.nr != getpid() || 147 if (args.nr != getpid() ||
148 args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 || 148 args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
149 args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { 149 args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
150 printf("[FAIL]\tgetpid() failed to preseve regs\n"); 150 printf("[FAIL]\tgetpid() failed to preserve regs\n");
151 nerrs++; 151 nerrs++;
152 } else { 152 } else {
153 printf("[OK]\tgetpid() preserves regs\n"); 153 printf("[OK]\tgetpid() preserves regs\n");
@@ -162,7 +162,7 @@ static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *))
162 if (args.nr != 0 || 162 if (args.nr != 0 ||
163 args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 || 163 args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
164 args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { 164 args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
165 printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preseve regs\n"); 165 printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preserve regs\n");
166 nerrs++; 166 nerrs++;
167 } else { 167 } else {
168 printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n"); 168 printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 8a577e7070c6..246145b84a12 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -106,7 +106,7 @@ asm (".pushsection .text\n\t"
106 ".type int3, @function\n\t" 106 ".type int3, @function\n\t"
107 ".align 4096\n\t" 107 ".align 4096\n\t"
108 "int3:\n\t" 108 "int3:\n\t"
109 "mov %ss,%eax\n\t" 109 "mov %ss,%ecx\n\t"
110 "int3\n\t" 110 "int3\n\t"
111 ".size int3, . - int3\n\t" 111 ".size int3, . - int3\n\t"
112 ".align 4096, 0xcc\n\t" 112 ".align 4096, 0xcc\n\t"
@@ -306,7 +306,7 @@ static volatile sig_atomic_t sig_corrupt_final_ss;
306#ifdef __x86_64__ 306#ifdef __x86_64__
307# define REG_IP REG_RIP 307# define REG_IP REG_RIP
308# define REG_SP REG_RSP 308# define REG_SP REG_RSP
309# define REG_AX REG_RAX 309# define REG_CX REG_RCX
310 310
311struct selectors { 311struct selectors {
312 unsigned short cs, gs, fs, ss; 312 unsigned short cs, gs, fs, ss;
@@ -326,7 +326,7 @@ static unsigned short *csptr(ucontext_t *ctx)
326#else 326#else
327# define REG_IP REG_EIP 327# define REG_IP REG_EIP
328# define REG_SP REG_ESP 328# define REG_SP REG_ESP
329# define REG_AX REG_EAX 329# define REG_CX REG_ECX
330 330
331static greg_t *ssptr(ucontext_t *ctx) 331static greg_t *ssptr(ucontext_t *ctx)
332{ 332{
@@ -457,10 +457,10 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
457 ctx->uc_mcontext.gregs[REG_IP] = 457 ctx->uc_mcontext.gregs[REG_IP] =
458 sig_cs == code16_sel ? 0 : (unsigned long)&int3; 458 sig_cs == code16_sel ? 0 : (unsigned long)&int3;
459 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; 459 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
460 ctx->uc_mcontext.gregs[REG_AX] = 0; 460 ctx->uc_mcontext.gregs[REG_CX] = 0;
461 461
462 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 462 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
463 requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ 463 requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
464 464
465 return; 465 return;
466} 466}
@@ -482,7 +482,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
482 unsigned short ss; 482 unsigned short ss;
483 asm ("mov %%ss,%0" : "=r" (ss)); 483 asm ("mov %%ss,%0" : "=r" (ss));
484 484
485 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; 485 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
486 if (asm_ss != sig_ss && sig == SIGTRAP) { 486 if (asm_ss != sig_ss && sig == SIGTRAP) {
487 /* Sanity check failure. */ 487 /* Sanity check failure. */
488 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", 488 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
@@ -654,8 +654,8 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
654#endif 654#endif
655 655
656 /* Sanity check on the kernel */ 656 /* Sanity check on the kernel */
657 if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { 657 if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
658 printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", 658 printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
659 (unsigned long long)requested_regs[i], 659 (unsigned long long)requested_regs[i],
660 (unsigned long long)resulting_regs[i]); 660 (unsigned long long)resulting_regs[i]);
661 nerrs++; 661 nerrs++;